diff --git a/llama.cpp b/llama.cpp index 049f73e44..2820802fb 100644 --- a/llama.cpp +++ b/llama.cpp @@ -165,8 +165,8 @@ struct llama_layer { }; struct llama_kv_cache { - struct ggml_tensor * k; - struct ggml_tensor * v; + struct ggml_tensor * k = NULL; + struct ggml_tensor * v = NULL; struct ggml_context * ctx = NULL; @@ -180,8 +180,12 @@ struct llama_kv_cache { } #ifdef GGML_USE_CUBLAS + if (k) { ggml_cuda_free_data(k); + } + if (v) { ggml_cuda_free_data(v); + } #endif // GGML_USE_CUBLAS } };