server: fix core dump when input prompt larger than prompt context (n_ctx)
This commit is contained in:
parent
df9d1293de
commit
f034effa22
1 changed files with 7 additions and 0 deletions
|
@ -1560,6 +1560,13 @@ struct llama_server_context
|
||||||
if (!slot.params.cache_prompt)
|
if (!slot.params.cache_prompt)
|
||||||
{
|
{
|
||||||
llama_sampling_reset(slot.ctx_sampling);
|
llama_sampling_reset(slot.ctx_sampling);
|
||||||
|
// if input prompt is too big, truncate it
|
||||||
|
if (slot.num_prompt_tokens >= slot.n_ctx)
|
||||||
|
{
|
||||||
|
slot.num_prompt_tokens = slot.n_ctx - 1;
|
||||||
|
prompt_tokens = std::vector<llama_token>(prompt_tokens.end() - slot.num_prompt_tokens, prompt_tokens.end());
|
||||||
|
slot.truncated = true;
|
||||||
|
}
|
||||||
|
|
||||||
slot.n_past = 0;
|
slot.n_past = 0;
|
||||||
slot.num_prompt_tokens_processed = slot.num_prompt_tokens;
|
slot.num_prompt_tokens_processed = slot.num_prompt_tokens;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue