From ed6587491c9c390b1d74005baa7a5b775f6cb921 Mon Sep 17 00:00:00 2001
From: JohannesGaessler <johannesg@5d6.de>
Date: Tue, 13 Jun 2023 11:15:30 +0200
Subject: [PATCH] Free KV cache CUDA buffers upon deletion

---
 llama.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/llama.cpp b/llama.cpp
index 0048eab24..36a5facc4 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -165,6 +165,11 @@ struct llama_kv_cache {
         if (ctx) {
             ggml_free(ctx);
         }
+
+#ifdef GGML_USE_CUBLAS
+        ggml_cuda_free_data(k);
+        ggml_cuda_free_data(v);
+#endif // GGML_USE_CUBLAS
     }
 };