Free KV cache CUDA buffers upon deletion

2023-06-13 11:15:30 +02:00 · 2023-06-13 11:15:30 +02:00 · ed6587491c
commit ed6587491c
parent 8e3057b24b
1 changed files with 5 additions and 0 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -165,6 +165,11 @@ struct llama_kv_cache {
        if (ctx) {
            ggml_free(ctx);
        }
+
+#ifdef GGML_USE_CUBLAS
+        ggml_cuda_free_data(k);
+        ggml_cuda_free_data(v);
+#endif // GGML_USE_CUBLAS
    }
 };