diff --git a/llama.cpp b/llama.cpp index e9192b4fa..94bba1ea8 100644 --- a/llama.cpp +++ b/llama.cpp @@ -13308,15 +13308,22 @@ int32_t llama_tokenize( bool special) { auto res = llama_tokenize_internal(model->vocab, std::string(text, text_len), add_bos, special); - if (n_max_tokens < (int) res.size()) { - // LLAMA_LOG_ERROR("%s: too many tokens\n", __func__); - return -((int) res.size()); + // add last eos + //TODO: control from the arguments + if (n_max_tokens <= (int) res.size()) { + for (size_t i = 0; i < static_cast(n_max_tokens) - 1; i++) { + tokens[i] = res[i]; + + } + tokens[n_max_tokens - 1] = model->vocab.special_eos_id; + } else { + for (size_t i = 0; i < res.size(); i++) { + tokens[i] = res[i]; + } + res.resize(res.size() + 1); + tokens[res.size()-1] = model->vocab.special_eos_id; } - - for (size_t i = 0; i < res.size(); i++) { - tokens[i] = res[i]; - } - + return res.size(); }