From 93aed7595bff23279bbb7e59723c20c20d4571eb Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Tue, 13 Feb 2024 12:20:33 +0200 Subject: [PATCH] common : don't crash if newline token is not found --- llama.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index 61c695187..4e3a7f3d4 100644 --- a/llama.cpp +++ b/llama.cpp @@ -3314,7 +3314,12 @@ static void llm_load_vocab( // determine the newline token: LLaMA "<0x0A>" == 10 == '\n', Falcon 193 == '\n' if (vocab.type == LLAMA_VOCAB_TYPE_SPM) { - vocab.linefeed_id = llama_byte_to_token(vocab, '\n'); + try { + vocab.linefeed_id = llama_byte_to_token(vocab, '\n'); + } catch (const std::exception & e) { + LLAMA_LOG_WARN("%s: SPM vocabulary, but newline token not found: %s! Using special_pad_id instead.", __func__, e.what()); + vocab.linefeed_id = vocab.special_pad_id; + } } else if (vocab.type == LLAMA_VOCAB_TYPE_WPM) { vocab.linefeed_id = vocab.special_pad_id; } else {