kv_cache management

This commit is contained in:
Leon Ericsson 2023-12-16 12:12:33 +01:00
parent 1665ad8bf1
commit 21431197a1

View file

@ -130,6 +130,10 @@ int main(int argc, char ** argv){
break;
}
// KV cache management
// clean the cache of draft tokens that weren't accepted
llama_kv_cache_seq_rm(ctx, 0, n_past, -1);
llama_batch_clear(batch_tgt);
llama_batch_add(batch_tgt, draft[0], n_past, { 0 }, true);