Skip missing byte tokens (falcon)
This commit is contained in:
parent
4ff15d4fda
commit
0575023923
1 changed files with 2 additions and 3 deletions
|
@ -13172,10 +13172,9 @@ struct llm_tokenizer_bpe {
|
||||||
for (auto j = str.begin(); j != str.end(); ++j) {
|
for (auto j = str.begin(); j != str.end(); ++j) {
|
||||||
std::string byte_str(1, *j);
|
std::string byte_str(1, *j);
|
||||||
auto token_multibyte = vocab.token_to_id.find(byte_str);
|
auto token_multibyte = vocab.token_to_id.find(byte_str);
|
||||||
if (token_multibyte == vocab.token_to_id.end()) {
|
if (token_multibyte != vocab.token_to_id.end()) {
|
||||||
throw std::runtime_error("ERROR: byte not found in vocab");
|
output.push_back(token_multibyte->second);
|
||||||
}
|
}
|
||||||
output.push_back((*token_multibyte).second);
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
output.push_back((*token).second);
|
output.push_back((*token).second);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue