Update llama.cpp

This commit is contained in:
Georgi Gerganov 2024-03-14 12:44:22 +02:00 committed by GitHub
parent 38328bb599
commit b88cd9f6ca
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -9036,7 +9036,7 @@ static int llama_decode_internal(
//llama_synchronize(&lctx);
// decide if we need to defrag the kv cache
if (cparams.defrag_thold >= 0.0f) {
if (cparams.causal_attn && cparams.defrag_thold >= 0.0f) {
const float fragmentation = kv_self.n >= 128 ? 1.0f - float(kv_self.used)/float(kv_self.n) : 0.0f;
// queue defragmentation for next llama_kv_cache_update