diff --git a/llama.cpp b/llama.cpp index 75916e54d..ce4d68f38 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1316,7 +1316,7 @@ static bool llama_kv_cache_find_slot( while (true) { if (cache.head + n_tokens > n_ctx) { - n_tested += cache.size - cache.head; + n_tested += n_ctx - cache.head; cache.head = 0; continue; }