From 05750239236f8e1ebc5f1cce5b6c55f69551791d Mon Sep 17 00:00:00 2001 From: jaime-m-p <> Date: Fri, 14 Jun 2024 20:12:39 +0200 Subject: [PATCH] Skip missing byte tokens (falcon) --- llama.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llama.cpp b/llama.cpp index 6422587c9..1113086f6 100644 --- a/llama.cpp +++ b/llama.cpp @@ -13172,10 +13172,9 @@ struct llm_tokenizer_bpe { for (auto j = str.begin(); j != str.end(); ++j) { std::string byte_str(1, *j); auto token_multibyte = vocab.token_to_id.find(byte_str); - if (token_multibyte == vocab.token_to_id.end()) { - throw std::runtime_error("ERROR: byte not found in vocab"); + if (token_multibyte != vocab.token_to_id.end()) { + output.push_back(token_multibyte->second); } - output.push_back((*token_multibyte).second); } } else { output.push_back((*token).second);