diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 4e0d3dbde..8c4ef84e6 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -2809,7 +2809,7 @@ void ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor) { } void ggml_cuda_free_data(struct ggml_tensor * tensor) { - if (tensor->backend != GGML_BACKEND_GPU && tensor->backend != GGML_BACKEND_GPU_SPLIT) { + if (!tensor || (tensor->backend != GGML_BACKEND_GPU && tensor->backend != GGML_BACKEND_GPU_SPLIT) ) { return; } diff --git a/llama.cpp b/llama.cpp index 2820802fb..93439624f 100644 --- a/llama.cpp +++ b/llama.cpp @@ -180,12 +180,8 @@ struct llama_kv_cache { } #ifdef GGML_USE_CUBLAS - if (k) { ggml_cuda_free_data(k); - } - if (v) { ggml_cuda_free_data(v); - } #endif // GGML_USE_CUBLAS } };