fix(server): infinite loop to inference

2024-02-14 07:40:51 +08:00 · 2024-02-14 07:40:51 +08:00 · 4447e95ec5
commit 4447e95ec5
parent f5ca054855
1 changed files with 2 additions and 0 deletions
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -1737,6 +1737,8 @@ struct llama_server_context
                {
                    // if you get here, it means the KV cache is full - try increasing it via the context size
                    LOG_TEE("%s : failed to decode the batch, n_batch = %d, ret = %d\n", __func__, n_batch, ret);
                    LOG_ERROR("KV cache is full - try increasing it via the context size", {{"ctx-size", params.n_ctx}});
                    kv_cache_clear();
                    return false;
                }