llama: fix crash when tokenize unkown spm vocab token.
This commit is contained in:
parent
55a2a900ff
commit
6b2921423e
1 changed files with 4 additions and 1 deletions
|
@ -8770,7 +8770,10 @@ static llama_token llama_byte_to_token(const llama_vocab & vocab, uint8_t ch) {
|
||||||
}
|
}
|
||||||
// Try to fall back to just the byte as a string
|
// Try to fall back to just the byte as a string
|
||||||
const char buf2[2] = { (char)ch, 0 };
|
const char buf2[2] = { (char)ch, 0 };
|
||||||
return vocab.token_to_id.at(buf2);
|
token = vocab.token_to_id.find(buf2);
|
||||||
|
if (token != vocab.token_to_id.end()) {
|
||||||
|
return (*token).second;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
case LLAMA_VOCAB_TYPE_WPM:
|
case LLAMA_VOCAB_TYPE_WPM:
|
||||||
case LLAMA_VOCAB_TYPE_BPE: {
|
case LLAMA_VOCAB_TYPE_BPE: {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue