llama : add early return for empty range
This commit adds an early return to the llama_kv_cache_seq_add and llama_kv_cache_seq_div functions. The motivation for adding this is to avoid looping over the cache when the range is empty. I ran into this when using the self-extend feature in main.cpp. Signed-off-by: Daniel Bevenius <daniel.bevenius@gmail.com>
This commit is contained in:
parent
be20e7f49d
commit
f341bd6c86
1 changed files with 7 additions and 0 deletions
|
@ -3258,6 +3258,11 @@ static void llama_kv_cache_seq_add(
|
|||
|
||||
if (p0 < 0) p0 = 0;
|
||||
if (p1 < 0) p1 = std::numeric_limits<llama_pos>::max();
|
||||
// If there is no range then return early to avoid looping over the cache.
|
||||
if (p0 == p1) {
|
||||
cache.head = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
if (cache.recurrent) {
|
||||
// for Mamba-like models, only the pos needs to be shifted
|
||||
|
@ -3302,6 +3307,8 @@ static void llama_kv_cache_seq_div(
|
|||
int d) {
|
||||
if (p0 < 0) p0 = 0;
|
||||
if (p1 < 0) p1 = std::numeric_limits<llama_pos>::max();
|
||||
// If there is no range then return early to avoid looping over the cache.
|
||||
if (p0 == p1) return;
|
||||
|
||||
if (cache.recurrent) {
|
||||
// for Mamba-like models, only the pos needs to be changed
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue