From f341bd6c8690eb9fcbb53476562b14d9296606e1 Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Fri, 5 Jul 2024 14:06:03 +0200 Subject: [PATCH] llama : add early return for empty range This commit adds an early return to the llama_kv_cache_seq_add and llama_kv_cache_seq_div functions. The motivation for adding this is to avoid looping over the cache when the range is empty. I ran into this when using the self-extend feature in main.cpp. Signed-off-by: Daniel Bevenius --- src/llama.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/llama.cpp b/src/llama.cpp index 18956d441..efd7429d5 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -3258,6 +3258,11 @@ static void llama_kv_cache_seq_add( if (p0 < 0) p0 = 0; if (p1 < 0) p1 = std::numeric_limits::max(); + // If there is no range then return early to avoid looping over the cache. + if (p0 == p1) { + cache.head = 0; + return; + } if (cache.recurrent) { // for Mamba-like models, only the pos needs to be shifted @@ -3302,6 +3307,8 @@ static void llama_kv_cache_seq_div( int d) { if (p0 < 0) p0 = 0; if (p1 < 0) p1 = std::numeric_limits::max(); + // If there is no range then return early to avoid looping over the cache. + if (p0 == p1) return; if (cache.recurrent) { // for Mamba-like models, only the pos needs to be changed