From f341bd6c8690eb9fcbb53476562b14d9296606e1 Mon Sep 17 00:00:00 2001
From: Daniel Bevenius <daniel.bevenius@gmail.com>
Date: Fri, 5 Jul 2024 14:06:03 +0200
Subject: [PATCH] llama : add early return for empty range

This commit adds an early return to the llama_kv_cache_seq_add and
llama_kv_cache_seq_div functions.

The motivation for adding this is to avoid looping over the cache
when the range is empty. I ran into this when using the self-extend
feature in main.cpp.

Signed-off-by: Daniel Bevenius <daniel.bevenius@gmail.com>
---
 src/llama.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/llama.cpp b/src/llama.cpp
index 18956d441..efd7429d5 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -3258,6 +3258,11 @@ static void llama_kv_cache_seq_add(
 
     if (p0 < 0) p0 = 0;
     if (p1 < 0) p1 = std::numeric_limits<llama_pos>::max();
+    // If there is no range then return early to avoid looping over the cache.
+    if (p0 == p1) {
+        cache.head = 0;
+        return;
+    }
 
     if (cache.recurrent) {
         // for Mamba-like models, only the pos needs to be shifted
@@ -3302,6 +3307,8 @@ static void llama_kv_cache_seq_div(
                           int   d) {
     if (p0 < 0) p0 = 0;
     if (p1 < 0) p1 = std::numeric_limits<llama_pos>::max();
+    // If there is no range then return early to avoid looping over the cache.
+    if (p0 == p1) return;
 
     if (cache.recurrent) {
         // for Mamba-like models, only the pos needs to be changed