From ed6587491c9c390b1d74005baa7a5b775f6cb921 Mon Sep 17 00:00:00 2001 From: JohannesGaessler Date: Tue, 13 Jun 2023 11:15:30 +0200 Subject: [PATCH] Free KV cache CUDA buffers upon deletion --- llama.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/llama.cpp b/llama.cpp index 0048eab24..36a5facc4 100644 --- a/llama.cpp +++ b/llama.cpp @@ -165,6 +165,11 @@ struct llama_kv_cache { if (ctx) { ggml_free(ctx); } + +#ifdef GGML_USE_CUBLAS + ggml_cuda_free_data(k); + ggml_cuda_free_data(v); +#endif // GGML_USE_CUBLAS } };