llama : add llama_kv_cache_compress (EXPERIMENTAL)

2024-02-25 22:16:13 +02:00 · 2024-02-25 22:16:13 +02:00 · 14d757066b
commit 14d757066b
parent c24a2a6e60
3 changed files with 262 additions and 0 deletions
--- a/examples/passkey/passkey.cpp
+++ b/examples/passkey/passkey.cpp
@ -148,6 +148,7 @@ int main(int argc, char ** argv) {

            llama_kv_cache_seq_add (ctx, 0, n_past - n_batch,         n_past,         ib*bd);
            llama_kv_cache_seq_div (ctx, 0, n_past - n_batch + ib*bd, n_past + ib*bd, n_grp);
+            llama_kv_cache_compress(ctx, 0);
            llama_kv_cache_update  (ctx);

            n_past = llama_kv_cache_seq_pos_max(ctx, 0) + 1;