From 7befc54e82b0018b70ec9253c7a52519ffe0df41 Mon Sep 17 00:00:00 2001 From: slaren Date: Wed, 20 Mar 2024 03:20:22 +0100 Subject: [PATCH] fix leaks --- ggml-cuda.cu | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/ggml-cuda.cu b/ggml-cuda.cu index ee2da3407..26426d90b 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -621,11 +621,6 @@ struct ggml_backend_cuda_context { cudaStream_t streams[GGML_CUDA_MAX_DEVICES][GGML_CUDA_MAX_STREAMS] = { { nullptr } }; cublasHandle_t cublas_handles[GGML_CUDA_MAX_DEVICES] = {nullptr}; - struct cuda_device { - cublasHandle_t cublas_handle = nullptr; - cudaStream_t cudaStreams[GGML_CUDA_MAX_STREAMS] = {}; - }; - explicit ggml_backend_cuda_context(int device) : device(device), name(GGML_CUDA_NAME + std::to_string(device)) { @@ -635,6 +630,16 @@ struct ggml_backend_cuda_context { if (copy_event != nullptr) { CUDA_CHECK(cudaEventDestroy(copy_event)); } + for (int i = 0; i < GGML_CUDA_MAX_DEVICES; ++i) { + for (int j = 0; j < GGML_CUDA_MAX_STREAMS; ++j) { + if (streams[i][j] != nullptr) { + CUDA_CHECK(cudaStreamDestroy(streams[i][j])); + } + } + if (cublas_handles[i] != nullptr) { + CUBLAS_CHECK(cublasDestroy(cublas_handles[i])); + } + } } cudaStream_t stream(int device, int stream) { @@ -699,7 +704,7 @@ struct ggml_backend_cuda_buffer_context { } ~ggml_backend_cuda_buffer_context() { - // TODO: free here + CUDA_CHECK(cudaFree(dev_ptr)); } }; @@ -714,7 +719,6 @@ GGML_CALL static bool ggml_backend_buffer_is_cuda(ggml_backend_buffer_t buffer) GGML_CALL static void ggml_backend_cuda_buffer_free_buffer(ggml_backend_buffer_t buffer) { ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context; - CUDA_CHECK(cudaFree(ctx->dev_ptr)); delete ctx; }