From 58a5c3bb0f427e7e59a9458ae9a521b73815556c Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 12 Dec 2024 21:22:33 +0200 Subject: [PATCH] common : apply ignore_eos as logit bias ggml-ci --- common/common.cpp | 5 +++++ examples/server/server.cpp | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/common/common.cpp b/common/common.cpp index 3adfb0329..4f09bee3a 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -940,6 +940,11 @@ struct common_init_result common_init_from_params(common_params & params) { params.sampling.ignore_eos = false; } + if (params.sampling.ignore_eos) { + LOG_INF("%s: added EOS logit bias = %f\n", __func__, -INFINITY); + params.sampling.logit_bias.push_back({llama_token_eos(model), -INFINITY}); + } + if (params.warmup) { LOG_WRN("%s: warming up the model with an empty run - please wait ... (--no-warmup to disable)\n", __func__); diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 879b041b0..d8aeb094e 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1467,7 +1467,7 @@ struct server_context { n_ctx = llama_n_ctx(ctx); add_bos_token = llama_add_bos_token(model); - has_eos_token = !llama_add_eos_token(model); + has_eos_token = llama_token_eos(model) != LLAMA_TOKEN_NULL; if (!params_base.speculative.model.empty()) { SRV_INF("loading draft model '%s'\n", params_base.speculative.model.c_str());