add eos

2024-03-07 16:28:39 +09:00 · 2024-03-07 16:28:39 +09:00 · fd4b59be14
commit fd4b59be14
parent e04e04f8fa
1 changed files with 15 additions and 8 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -13308,13 +13308,20 @@ int32_t llama_tokenize(
                        bool   special) {
    auto res = llama_tokenize_internal(model->vocab, std::string(text, text_len), add_bos, special);
-    if (n_max_tokens < (int) res.size()) {
+    // add last eos
-        // LLAMA_LOG_ERROR("%s: too many tokens\n", __func__);
+    //TODO: control from the arguments
-        return -((int) res.size());
+    if (n_max_tokens <= (int) res.size()) {
-    }
+        for (size_t i = 0; i < static_cast<size_t>(n_max_tokens) - 1; i++) {
            tokens[i] = res[i];
-    for (size_t i = 0; i < res.size(); i++) {
+        }
-        tokens[i] = res[i];
+        tokens[n_max_tokens - 1] = model->vocab.special_eos_id;
    } else {
        for (size_t i = 0; i < res.size(); i++) {
            tokens[i] = res[i];
        }
        res.resize(res.size() + 1);
        tokens[res.size()-1] = model->vocab.special_eos_id;
    }
    return res.size();