Free KV cache CUDA buffers upon deletion

This commit is contained in:
JohannesGaessler 2023-06-13 11:15:30 +02:00
parent 8e3057b24b
commit ed6587491c

View file

@ -165,6 +165,11 @@ struct llama_kv_cache {
if (ctx) {
ggml_free(ctx);
}
#ifdef GGML_USE_CUBLAS
ggml_cuda_free_data(k);
ggml_cuda_free_data(v);
#endif // GGML_USE_CUBLAS
}
};