server : fix crash when prompt exceeds context size

This commit is contained in:
ZXED 2023-11-08 21:28:39 +03:00
parent 875fb42871
commit cba61802c2
No known key found for this signature in database
GPG key ID: 637FB44813DCFD66

View file

@ -1555,15 +1555,6 @@ struct llama_server_context
slot.num_prompt_tokens = prompt_tokens.size();
if (!slot.params.cache_prompt)
{
llama_sampling_reset(slot.ctx_sampling);
slot.n_past = 0;
slot.num_prompt_tokens_processed = slot.num_prompt_tokens;
}
else
{
if (slot.params.n_keep < 0)
{
slot.params.n_keep = slot.num_prompt_tokens;
@ -1593,6 +1584,15 @@ struct llama_server_context
GGML_ASSERT(slot.num_prompt_tokens < slot.n_ctx);
}
if (!slot.params.cache_prompt)
{
llama_sampling_reset(slot.ctx_sampling);
slot.n_past = 0;
slot.num_prompt_tokens_processed = slot.num_prompt_tokens;
}
else
{
// push the prompt into the sampling context (do not apply grammar)
for (auto &token : prompt_tokens)
{