Free KV cache CUDA buffers upon deletion
This commit is contained in:
parent
8e3057b24b
commit
ed6587491c
1 changed files with 5 additions and 0 deletions
|
@ -165,6 +165,11 @@ struct llama_kv_cache {
|
||||||
if (ctx) {
|
if (ctx) {
|
||||||
ggml_free(ctx);
|
ggml_free(ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef GGML_USE_CUBLAS
|
||||||
|
ggml_cuda_free_data(k);
|
||||||
|
ggml_cuda_free_data(v);
|
||||||
|
#endif // GGML_USE_CUBLAS
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue