Skip missing byte tokens (falcon)

This commit is contained in:
jaime-m-p 2024-06-14 20:12:39 +02:00
parent 4ff15d4fda
commit 0575023923

View file

@ -13172,10 +13172,9 @@ struct llm_tokenizer_bpe {
for (auto j = str.begin(); j != str.end(); ++j) {
std::string byte_str(1, *j);
auto token_multibyte = vocab.token_to_id.find(byte_str);
if (token_multibyte == vocab.token_to_id.end()) {
throw std::runtime_error("ERROR: byte not found in vocab");
if (token_multibyte != vocab.token_to_id.end()) {
output.push_back(token_multibyte->second);
}
output.push_back((*token_multibyte).second);
}
} else {
output.push_back((*token).second);