server: fix core dump when input prompt larger than prompt context (n_ctx)

2023-11-10 22:53:25 +08:00 · 2023-11-10 22:53:25 +08:00 · f034effa22
commit f034effa22
parent df9d1293de
1 changed files with 7 additions and 0 deletions
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -1560,6 +1560,13 @@ struct llama_server_context
                    if (!slot.params.cache_prompt)
                    {
                        llama_sampling_reset(slot.ctx_sampling);
+                        // if input prompt is too big, truncate it
+                        if (slot.num_prompt_tokens >= slot.n_ctx)
+                        {
+                            slot.num_prompt_tokens = slot.n_ctx - 1;
+                            prompt_tokens = std::vector<llama_token>(prompt_tokens.end() - slot.num_prompt_tokens, prompt_tokens.end());
+                            slot.truncated = true;
+                        }

                        slot.n_past = 0;
                        slot.num_prompt_tokens_processed = slot.num_prompt_tokens;