fix(server): infinite loop to inference
This commit is contained in:
parent
f5ca054855
commit
4447e95ec5
1 changed files with 2 additions and 0 deletions
|
@ -1737,6 +1737,8 @@ struct llama_server_context
|
||||||
{
|
{
|
||||||
// if you get here, it means the KV cache is full - try increasing it via the context size
|
// if you get here, it means the KV cache is full - try increasing it via the context size
|
||||||
LOG_TEE("%s : failed to decode the batch, n_batch = %d, ret = %d\n", __func__, n_batch, ret);
|
LOG_TEE("%s : failed to decode the batch, n_batch = %d, ret = %d\n", __func__, n_batch, ret);
|
||||||
|
LOG_ERROR("KV cache is full - try increasing it via the context size", {{"ctx-size", params.n_ctx}});
|
||||||
|
kv_cache_clear();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue