server: cap n_predict if not set to n_ctx_train

2024-04-12 13:38:02 +02:00 · 2024-04-12 13:38:02 +02:00 · 6fd5ad597f
commit 6fd5ad597f
parent 91c736015b
1 changed files with 12 additions and 0 deletions
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -887,6 +887,18 @@ struct server_context {
            });
            slot.params.n_predict = slot.n_predict;
        }
+        if (slot.params.n_predict < 1 && slot.ga_n == 1) {
+            auto n_ctx_train = llama_n_ctx_train(model);
+            LOG_WARNING("n_predict is not set and self-context extend is disabled. Limiting generated tokens to n_ctx_train to avoid EOS-less generation infinite loop", {
+                {"params.n_predict", slot.params.n_predict},
+                {"slot.n_predict",   slot.n_predict},
+                {"n_slots",          params.n_parallel},
+                {"n_ctx",            n_ctx},
+                {"n_ctx_train",      n_ctx_train},
+                {"ga_n",             slot.ga_n},
+            });
+            slot.params.n_predict = n_ctx_train;
+        }

        // infill
        slot.params.input_prefix = json_value(data, "input_prefix", default_params.input_prefix);