fix leaks
This commit is contained in:
parent
5b04360420
commit
7befc54e82
1 changed files with 11 additions and 7 deletions
18
ggml-cuda.cu
18
ggml-cuda.cu
|
@ -621,11 +621,6 @@ struct ggml_backend_cuda_context {
|
||||||
cudaStream_t streams[GGML_CUDA_MAX_DEVICES][GGML_CUDA_MAX_STREAMS] = { { nullptr } };
|
cudaStream_t streams[GGML_CUDA_MAX_DEVICES][GGML_CUDA_MAX_STREAMS] = { { nullptr } };
|
||||||
cublasHandle_t cublas_handles[GGML_CUDA_MAX_DEVICES] = {nullptr};
|
cublasHandle_t cublas_handles[GGML_CUDA_MAX_DEVICES] = {nullptr};
|
||||||
|
|
||||||
struct cuda_device {
|
|
||||||
cublasHandle_t cublas_handle = nullptr;
|
|
||||||
cudaStream_t cudaStreams[GGML_CUDA_MAX_STREAMS] = {};
|
|
||||||
};
|
|
||||||
|
|
||||||
explicit ggml_backend_cuda_context(int device) :
|
explicit ggml_backend_cuda_context(int device) :
|
||||||
device(device),
|
device(device),
|
||||||
name(GGML_CUDA_NAME + std::to_string(device)) {
|
name(GGML_CUDA_NAME + std::to_string(device)) {
|
||||||
|
@ -635,6 +630,16 @@ struct ggml_backend_cuda_context {
|
||||||
if (copy_event != nullptr) {
|
if (copy_event != nullptr) {
|
||||||
CUDA_CHECK(cudaEventDestroy(copy_event));
|
CUDA_CHECK(cudaEventDestroy(copy_event));
|
||||||
}
|
}
|
||||||
|
for (int i = 0; i < GGML_CUDA_MAX_DEVICES; ++i) {
|
||||||
|
for (int j = 0; j < GGML_CUDA_MAX_STREAMS; ++j) {
|
||||||
|
if (streams[i][j] != nullptr) {
|
||||||
|
CUDA_CHECK(cudaStreamDestroy(streams[i][j]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (cublas_handles[i] != nullptr) {
|
||||||
|
CUBLAS_CHECK(cublasDestroy(cublas_handles[i]));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cudaStream_t stream(int device, int stream) {
|
cudaStream_t stream(int device, int stream) {
|
||||||
|
@ -699,7 +704,7 @@ struct ggml_backend_cuda_buffer_context {
|
||||||
}
|
}
|
||||||
|
|
||||||
~ggml_backend_cuda_buffer_context() {
|
~ggml_backend_cuda_buffer_context() {
|
||||||
// TODO: free here
|
CUDA_CHECK(cudaFree(dev_ptr));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -714,7 +719,6 @@ GGML_CALL static bool ggml_backend_buffer_is_cuda(ggml_backend_buffer_t buffer)
|
||||||
|
|
||||||
GGML_CALL static void ggml_backend_cuda_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
GGML_CALL static void ggml_backend_cuda_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
||||||
ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
|
ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
|
||||||
CUDA_CHECK(cudaFree(ctx->dev_ptr));
|
|
||||||
delete ctx;
|
delete ctx;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue