This commit is contained in:
Seungwon 2024-03-07 16:28:39 +09:00
parent e04e04f8fa
commit fd4b59be14

View file

@ -13308,13 +13308,20 @@ int32_t llama_tokenize(
bool special) { bool special) {
auto res = llama_tokenize_internal(model->vocab, std::string(text, text_len), add_bos, special); auto res = llama_tokenize_internal(model->vocab, std::string(text, text_len), add_bos, special);
if (n_max_tokens < (int) res.size()) { // add last eos
// LLAMA_LOG_ERROR("%s: too many tokens\n", __func__); //TODO: control from the arguments
return -((int) res.size()); if (n_max_tokens <= (int) res.size()) {
} for (size_t i = 0; i < static_cast<size_t>(n_max_tokens) - 1; i++) {
tokens[i] = res[i];
for (size_t i = 0; i < res.size(); i++) { }
tokens[i] = res[i]; tokens[n_max_tokens - 1] = model->vocab.special_eos_id;
} else {
for (size_t i = 0; i < res.size(); i++) {
tokens[i] = res[i];
}
res.resize(res.size() + 1);
tokens[res.size()-1] = model->vocab.special_eos_id;
} }
return res.size(); return res.size();