diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 2e791190b..ae2086143 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -887,6 +887,18 @@ struct server_context { }); slot.params.n_predict = slot.n_predict; } + if (slot.params.n_predict < 1 && slot.ga_n == 1) { + auto n_ctx_train = llama_n_ctx_train(model); + LOG_WARNING("n_predict is not set and self-context extend is disabled. Limiting generated tokens to n_ctx_train to avoid EOS-less generation infinite loop", { + {"params.n_predict", slot.params.n_predict}, + {"slot.n_predict", slot.n_predict}, + {"n_slots", params.n_parallel}, + {"n_ctx", n_ctx}, + {"n_ctx_train", n_ctx_train}, + {"ga_n", slot.ga_n}, + }); + slot.params.n_predict = n_ctx_train; + } // infill slot.params.input_prefix = json_value(data, "input_prefix", default_params.input_prefix);