From fd4b59be14fce95450e756570da11d2b999147b2 Mon Sep 17 00:00:00 2001 From: Seungwon Date: Thu, 7 Mar 2024 16:28:39 +0900 Subject: [PATCH] add eos --- llama.cpp | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/llama.cpp b/llama.cpp index e9192b4fa..94bba1ea8 100644 --- a/llama.cpp +++ b/llama.cpp @@ -13308,15 +13308,22 @@ int32_t llama_tokenize( bool special) { auto res = llama_tokenize_internal(model->vocab, std::string(text, text_len), add_bos, special); - if (n_max_tokens < (int) res.size()) { - // LLAMA_LOG_ERROR("%s: too many tokens\n", __func__); - return -((int) res.size()); + // add last eos + //TODO: control from the arguments + if (n_max_tokens <= (int) res.size()) { + for (size_t i = 0; i < static_cast(n_max_tokens) - 1; i++) { + tokens[i] = res[i]; + + } + tokens[n_max_tokens - 1] = model->vocab.special_eos_id; + } else { + for (size_t i = 0; i < res.size(); i++) { + tokens[i] = res[i]; + } + res.resize(res.size() + 1); + tokens[res.size()-1] = model->vocab.special_eos_id; } - - for (size_t i = 0; i < res.size(); i++) { - tokens[i] = res[i]; - } - + return res.size(); }