diff --git a/examples/infill/infill.cpp b/examples/infill/infill.cpp index 54fe9fc98..f18362c91 100644 --- a/examples/infill/infill.cpp +++ b/examples/infill/infill.cpp @@ -376,7 +376,7 @@ int main(int argc, char ** argv) { n_past, n_left, n_ctx, params.n_keep, n_discard); llama_kv_cache_seq_rm (ctx, 0, params.n_keep + 1 , params.n_keep + n_discard + 1); - llama_kv_cache_seq_add(ctx, 0, params.n_keep + 1 + n_discard, n_past + 1, -n_discard); + llama_kv_cache_seq_add(ctx, 0, params.n_keep + 1 + n_discard, n_past, -n_discard); n_past -= n_discard; diff --git a/src/llama.cpp b/src/llama.cpp index 1fa7a1a17..1813dd29b 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -21141,7 +21141,7 @@ struct llama_batch_allocr { batch = in_batch; if (!batch.pos) { // determine the last position in KV cache - llama_pos last_pos = 0; + llama_pos last_pos = -1; for (const auto & cell : ctx->kv_self.cells) { if (cell.has_seq_id(batch_default_seq_id)) { last_pos = std::max(last_pos, cell.pos);