server : clear the KV cache beyond n_past before llama_decode
This commit is contained in:
parent
2b8830af71
commit
ce2d995af2
1 changed files with 4 additions and 0 deletions
|
@ -434,6 +434,10 @@ struct llama_server_context
|
|||
{
|
||||
n_eval = params.n_batch;
|
||||
}
|
||||
|
||||
// since #3228 we now have to manually manage the KV cache
|
||||
llama_kv_cache_tokens_rm(ctx, n_past, -1);
|
||||
|
||||
if (llama_decode(ctx, llama_batch_get_one(&embd[n_past], n_eval, n_past, 0), params.n_threads))
|
||||
{
|
||||
LOG_ERROR("failed to eval", {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue