From d0a4cc8ec8c65fb664d3dd773813170fc175c4af Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Sun, 21 Apr 2024 11:14:19 +0300
Subject: [PATCH] llama : auto-detect more EOT tokens when missing in KV data

---
 examples/server/utils.hpp |  4 ----
 llama.cpp                 | 14 ++++++++++++--
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
index a8d43ac63..1a2212502 100644
--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@@ -381,10 +381,6 @@ static json oaicompat_completion_params_parse(
     } else {
         llama_params["stop"] = json_value(body, "stop", json::array());
     }
-    // Some chat templates don't use EOS token to stop generation
-    // We must add their end sequences to list of stop words
-    llama_params["stop"].push_back("<|im_end|>"); // chatml
-    llama_params["stop"].push_back("<end_of_turn>"); // gemma
 
     // Handle "response_format" field
     if (body.contains("response_format")) {
diff --git a/llama.cpp b/llama.cpp
index 2068d555c..92b222392 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -4309,11 +4309,21 @@ static void llm_load_vocab(
             }
         }
 
-        // find EOT token "<|eot_id|>"
+        // find EOT token: "<|eot_id|>", "<|im_emd|>", "<end_of_turn>", etc.
+        //
         // TODO: convert scripts should provide this token through the KV metadata LLAMA_KV_TOKENIZER_EOT_ID
+        //       for now, we apply this workaround to find the EOT token based on its text
         if (vocab.special_eot_id == -1) {
             for (const auto & t : vocab.token_to_id) {
-                if (t.first == "<|eot_id|>" && vocab.id_to_token[t.second].type == LLAMA_TOKEN_TYPE_CONTROL) {
+                if (
+                        // TODO: gemma "<end_of_turn>" is exported as a normal token, so the following check does not work
+                        //       need to fix convert script
+                        //vocab.id_to_token[t.second].type == LLAMA_TOKEN_TYPE_CONTROL &&
+                        (t.first == "<|eot_id|>" ||
+                         t.first == "<|im_emd|>" ||
+                         t.first == "<end_of_turn>"
+                        )
+                   ) {
                     vocab.special_eot_id = t.second;
                     break;
                 }