server : fix crash when prompt exceeds context size

2023-11-08 21:28:39 +03:00 · 2023-11-08 21:28:39 +03:00 · cba61802c2
commit cba61802c2
parent 875fb42871
1 changed files with 29 additions and 29 deletions
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -1555,15 +1555,6 @@ struct llama_server_context

                    slot.num_prompt_tokens = prompt_tokens.size();

-                    if (!slot.params.cache_prompt)
-                    {
-                        llama_sampling_reset(slot.ctx_sampling);
-
-                        slot.n_past = 0;
-                        slot.num_prompt_tokens_processed = slot.num_prompt_tokens;
-                    }
-                    else
-                    {
                    if (slot.params.n_keep < 0)
                    {
                        slot.params.n_keep = slot.num_prompt_tokens;
@ -1593,6 +1584,15 @@ struct llama_server_context
                        GGML_ASSERT(slot.num_prompt_tokens < slot.n_ctx);
                    }

+                    if (!slot.params.cache_prompt)
+                    {
+                        llama_sampling_reset(slot.ctx_sampling);
+
+                        slot.n_past = 0;
+                        slot.num_prompt_tokens_processed = slot.num_prompt_tokens;
+                    }
+                    else
+                    {
                        // push the prompt into the sampling context (do not apply grammar)
                        for (auto &token : prompt_tokens)
                        {