common : don't crash if newline token is not found

This commit is contained in:
Aarni Koskela 2024-02-13 12:20:33 +02:00
parent 037259be68
commit 93aed7595b

View file

@ -3314,7 +3314,12 @@ static void llm_load_vocab(
// determine the newline token: LLaMA "<0x0A>" == 10 == '\n', Falcon 193 == '\n' // determine the newline token: LLaMA "<0x0A>" == 10 == '\n', Falcon 193 == '\n'
if (vocab.type == LLAMA_VOCAB_TYPE_SPM) { if (vocab.type == LLAMA_VOCAB_TYPE_SPM) {
try {
vocab.linefeed_id = llama_byte_to_token(vocab, '\n'); vocab.linefeed_id = llama_byte_to_token(vocab, '\n');
} catch (const std::exception & e) {
LLAMA_LOG_WARN("%s: SPM vocabulary, but newline token not found: %s! Using special_pad_id instead.", __func__, e.what());
vocab.linefeed_id = vocab.special_pad_id;
}
} else if (vocab.type == LLAMA_VOCAB_TYPE_WPM) { } else if (vocab.type == LLAMA_VOCAB_TYPE_WPM) {
vocab.linefeed_id = vocab.special_pad_id; vocab.linefeed_id = vocab.special_pad_id;
} else { } else {