diff --git a/examples/infill/infill.cpp b/examples/infill/infill.cpp index 99b13dd26..d8a02fee4 100644 --- a/examples/infill/infill.cpp +++ b/examples/infill/infill.cpp @@ -376,7 +376,7 @@ int main(int argc, char ** argv) { n_past, n_left, n_ctx, params.n_keep, n_discard); llama_kv_cache_seq_rm (ctx, 0, params.n_keep + 1 , params.n_keep + n_discard + 1); - llama_kv_cache_seq_add(ctx, 0, params.n_keep + 1 + n_discard, n_past, -n_discard); + llama_kv_cache_seq_add(ctx, 0, params.n_keep + 1 + n_discard, n_past + 1, -n_discard); n_past -= n_discard; diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 9aba34874..bc7e839a0 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -582,7 +582,7 @@ int main(int argc, char ** argv) { n_past, n_left, n_ctx, params.n_keep, n_discard); llama_kv_cache_seq_rm (ctx, 0, params.n_keep , params.n_keep + n_discard); - llama_kv_cache_seq_add(ctx, 0, params.n_keep + n_discard, n_past, -n_discard); + llama_kv_cache_seq_add(ctx, 0, params.n_keep + n_discard, n_past + 1 , -n_discard); n_past -= n_discard; diff --git a/src/llama.cpp b/src/llama.cpp index c25ae1e1e..5d41e9b19 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -21134,7 +21134,7 @@ int32_t llama_encode( struct llama_batch batch) { llama_batch_allocr batch_allocr(ctx, batch); const int ret = llama_encode_internal(*ctx, batch_allocr.batch); - if (ret < 0) { + if (ret != 0) { LLAMA_LOG_ERROR("%s: failed to encode, ret = %d\n", __func__, ret); } @@ -21146,7 +21146,7 @@ int32_t llama_decode( struct llama_batch batch) { llama_batch_allocr batch_allocr(ctx, batch); const int ret = llama_decode_internal(*ctx, batch_allocr.batch); - if (ret < 0) { + if (ret != 0) { LLAMA_LOG_ERROR("%s: failed to decode, ret = %d\n", __func__, ret); }