From d0a4cc8ec8c65fb664d3dd773813170fc175c4af Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 21 Apr 2024 11:14:19 +0300 Subject: [PATCH] llama : auto-detect more EOT tokens when missing in KV data --- examples/server/utils.hpp | 4 ---- llama.cpp | 14 ++++++++++++-- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index a8d43ac63..1a2212502 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -381,10 +381,6 @@ static json oaicompat_completion_params_parse( } else { llama_params["stop"] = json_value(body, "stop", json::array()); } - // Some chat templates don't use EOS token to stop generation - // We must add their end sequences to list of stop words - llama_params["stop"].push_back("<|im_end|>"); // chatml - llama_params["stop"].push_back(""); // gemma // Handle "response_format" field if (body.contains("response_format")) { diff --git a/llama.cpp b/llama.cpp index 2068d555c..92b222392 100644 --- a/llama.cpp +++ b/llama.cpp @@ -4309,11 +4309,21 @@ static void llm_load_vocab( } } - // find EOT token "<|eot_id|>" + // find EOT token: "<|eot_id|>", "<|im_emd|>", "", etc. + // // TODO: convert scripts should provide this token through the KV metadata LLAMA_KV_TOKENIZER_EOT_ID + // for now, we apply this workaround to find the EOT token based on its text if (vocab.special_eot_id == -1) { for (const auto & t : vocab.token_to_id) { - if (t.first == "<|eot_id|>" && vocab.id_to_token[t.second].type == LLAMA_TOKEN_TYPE_CONTROL) { + if ( + // TODO: gemma "" is exported as a normal token, so the following check does not work + // need to fix convert script + //vocab.id_to_token[t.second].type == LLAMA_TOKEN_TYPE_CONTROL && + (t.first == "<|eot_id|>" || + t.first == "<|im_emd|>" || + t.first == "" + ) + ) { vocab.special_eot_id = t.second; break; }