From 58a5c3bb0f427e7e59a9458ae9a521b73815556c Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Thu, 12 Dec 2024 21:22:33 +0200
Subject: [PATCH] common : apply ignore_eos as logit bias

ggml-ci
---
 common/common.cpp          | 5 +++++
 examples/server/server.cpp | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/common/common.cpp b/common/common.cpp
index 3adfb0329..4f09bee3a 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -940,6 +940,11 @@ struct common_init_result common_init_from_params(common_params & params) {
         params.sampling.ignore_eos = false;
     }
 
+    if (params.sampling.ignore_eos) {
+        LOG_INF("%s: added EOS logit bias = %f\n", __func__, -INFINITY);
+        params.sampling.logit_bias.push_back({llama_token_eos(model), -INFINITY});
+    }
+
     if (params.warmup) {
         LOG_WRN("%s: warming up the model with an empty run - please wait ... (--no-warmup to disable)\n", __func__);
 
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 879b041b0..d8aeb094e 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -1467,7 +1467,7 @@ struct server_context {
         n_ctx = llama_n_ctx(ctx);
 
         add_bos_token = llama_add_bos_token(model);
-        has_eos_token = !llama_add_eos_token(model);
+        has_eos_token = llama_token_eos(model) != LLAMA_TOKEN_NULL;
 
         if (!params_base.speculative.model.empty()) {
             SRV_INF("loading draft model '%s'\n", params_base.speculative.model.c_str());