llama : auto-detect more EOT tokens when missing in KV data

2024-04-21 11:14:19 +03:00 · 2024-04-21 11:14:19 +03:00 · d0a4cc8ec8
commit d0a4cc8ec8
parent 4b47c24bf2
2 changed files with 12 additions and 6 deletions
--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@ -381,10 +381,6 @@ static json oaicompat_completion_params_parse(
    } else {
        llama_params["stop"] = json_value(body, "stop", json::array());
    }
-    // Some chat templates don't use EOS token to stop generation
-    // We must add their end sequences to list of stop words
-    llama_params["stop"].push_back("<|im_end|>"); // chatml
-    llama_params["stop"].push_back("<end_of_turn>"); // gemma

    // Handle "response_format" field
    if (body.contains("response_format")) {
--- a/llama.cpp
+++ b/llama.cpp
@ -4309,11 +4309,21 @@ static void llm_load_vocab(
            }
        }

-        // find EOT token "<|eot_id|>"
+        // find EOT token: "<|eot_id|>", "<|im_emd|>", "<end_of_turn>", etc.
+        //
        // TODO: convert scripts should provide this token through the KV metadata LLAMA_KV_TOKENIZER_EOT_ID
+        //       for now, we apply this workaround to find the EOT token based on its text
        if (vocab.special_eot_id == -1) {
            for (const auto & t : vocab.token_to_id) {
-                if (t.first == "<|eot_id|>" && vocab.id_to_token[t.second].type == LLAMA_TOKEN_TYPE_CONTROL) {
+                if (
+                        // TODO: gemma "<end_of_turn>" is exported as a normal token, so the following check does not work
+                        //       need to fix convert script
+                        //vocab.id_to_token[t.second].type == LLAMA_TOKEN_TYPE_CONTROL &&
+                        (t.first == "<|eot_id|>" ||
+                         t.first == "<|im_emd|>" ||
+                         t.first == "<end_of_turn>"
+                        )
+                   ) {
                    vocab.special_eot_id = t.second;
                    break;
                }