cuBLAS: also pin kv cache

This commit is contained in:
Slaren 2023-04-28 00:48:01 +02:00
parent d5d6a8083a
commit 3cf2247d37

View file

@ -136,7 +136,7 @@ struct llama_kv_cache {
struct ggml_context * ctx = NULL;
llama_buffer buf;
llama_ctx_buffer buf;
int n; // number of tokens currently in the cache