Free KV cache CUDA buffers upon deletion
This commit is contained in:
parent
8e3057b24b
commit
ed6587491c
1 changed files with 5 additions and 0 deletions
|
@ -165,6 +165,11 @@ struct llama_kv_cache {
|
|||
if (ctx) {
|
||||
ggml_free(ctx);
|
||||
}
|
||||
|
||||
#ifdef GGML_USE_CUBLAS
|
||||
ggml_cuda_free_data(k);
|
||||
ggml_cuda_free_data(v);
|
||||
#endif // GGML_USE_CUBLAS
|
||||
}
|
||||
};
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue