llama : add llama_kv_cache_compress (EXPERIMENTAL)

This commit is contained in:
Georgi Gerganov 2024-02-25 22:16:13 +02:00
parent c24a2a6e60
commit 14d757066b
No known key found for this signature in database
GPG key ID: BF970631944C16B7
3 changed files with 262 additions and 0 deletions

View file

@ -557,6 +557,14 @@ extern "C" {
struct llama_context * ctx,
llama_seq_id seq_id);
// [EXPERIMENTAL] Compress the data in the KV cache
// This will be applied:
// - lazily on next llama_decode()
// - explicitly with llama_kv_cache_update()
LLAMA_API void llama_kv_cache_compress(
struct llama_context * ctx,
llama_pos delta);
// Defragment the KV cache
// This will be applied:
// - lazily on next llama_decode()