From 5d64ffd8374a47be83aa93fb733a005f494d38f1 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Fri, 19 Apr 2024 13:33:16 +0200 Subject: [PATCH] server: fix infinite loop --- examples/server/server.cpp | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 7da12b6c5..9af5ace9e 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -887,18 +887,6 @@ struct server_context { }); slot.params.n_predict = slot.n_predict; } - if (slot.params.n_predict < 1 && slot.ga_n == 1) { - auto n_ctx_train = llama_n_ctx_train(model); - LOG_WARNING("n_predict is not set and self-context extend is disabled. Limiting generated tokens to n_ctx_train to avoid EOS-less generation infinite loop", { - {"params.n_predict", slot.params.n_predict}, - {"slot.n_predict", slot.n_predict}, - {"n_slots", params.n_parallel}, - {"n_ctx", n_ctx}, - {"n_ctx_train", n_ctx_train}, - {"ga_n", slot.ga_n}, - }); - slot.params.n_predict = n_ctx_train; - } // infill slot.params.input_prefix = json_value(data, "input_prefix", default_params.input_prefix); @@ -2270,7 +2258,23 @@ struct server_context { }); } - if (!process_token(result, slot)) { + auto n_ctx_train = llama_n_ctx_train(model); + bool stop_prediction = false; + if (slot.params.n_predict < 1 && slot.ga_n == 1 && slot.n_decoded >= n_ctx_train) { + LOG_WARNING("n_predict is not set and self-context extend is disabled. Limiting generated tokens to n_ctx_train to avoid EOS-less generation infinite loop", { + {"params.n_predict", slot.params.n_predict}, + {"slot.n_predict", slot.n_predict}, + {"slot.n_decoded", slot.n_decoded}, + {"n_slots", params.n_parallel}, + {"n_ctx", n_ctx}, + {"n_ctx_train", n_ctx_train}, + {"ga_n", slot.ga_n}, + }); + slot.truncated = true; + stop_prediction = true; + } + + if (!process_token(result, slot) || stop_prediction) { slot.release(); slot.print_timings(); send_final_response(slot);