From 6fd5ad597f16ba246dc41a072112a469eff5b345 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Fri, 12 Apr 2024 13:38:02 +0200 Subject: [PATCH] server: cap n_predict if not set to n_ctx_train --- examples/server/server.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 2e791190b..ae2086143 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -887,6 +887,18 @@ struct server_context { }); slot.params.n_predict = slot.n_predict; } + if (slot.params.n_predict < 1 && slot.ga_n == 1) { + auto n_ctx_train = llama_n_ctx_train(model); + LOG_WARNING("n_predict is not set and self-context extend is disabled. Limiting generated tokens to n_ctx_train to avoid EOS-less generation infinite loop", { + {"params.n_predict", slot.params.n_predict}, + {"slot.n_predict", slot.n_predict}, + {"n_slots", params.n_parallel}, + {"n_ctx", n_ctx}, + {"n_ctx_train", n_ctx_train}, + {"ga_n", slot.ga_n}, + }); + slot.params.n_predict = n_ctx_train; + } // infill slot.params.input_prefix = json_value(data, "input_prefix", default_params.input_prefix);