From b88cd9f6cac1085866cb70d8914a749b79e2b1ae Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 14 Mar 2024 12:44:22 +0200 Subject: [PATCH] Update llama.cpp --- llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index a7d53d890..ff467c575 100644 --- a/llama.cpp +++ b/llama.cpp @@ -9036,7 +9036,7 @@ static int llama_decode_internal( //llama_synchronize(&lctx); // decide if we need to defrag the kv cache - if (cparams.defrag_thold >= 0.0f) { + if (cparams.causal_attn && cparams.defrag_thold >= 0.0f) { const float fragmentation = kv_self.n >= 128 ? 1.0f - float(kv_self.used)/float(kv_self.n) : 0.0f; // queue defragmentation for next llama_kv_cache_update