Fix cuda memory leaks
This fixes 2 memory leaks in ggml-cuda.cu - cuMemUnmap called now for the pool allocation - cublasDestroy called to release cublas handles
This commit is contained in:
parent
a0c2dad9d4
commit
9ee3ba6b0f
3 changed files with 34 additions and 5 deletions
|
@ -309,6 +309,10 @@ struct llama_client_slot
|
|||
}
|
||||
};
|
||||
|
||||
#ifdef GGML_USE_CUBLAS
|
||||
extern "C" GGML_CALL void ggml_free_cublas(void);
|
||||
#endif
|
||||
|
||||
struct llama_server_context
|
||||
{
|
||||
llama_model *model = nullptr;
|
||||
|
@ -355,6 +359,10 @@ struct llama_server_context
|
|||
llama_free_model(model);
|
||||
model = nullptr;
|
||||
}
|
||||
#ifdef GGML_USE_CUBLAS
|
||||
ggml_free_cublas();
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
bool load_model(const gpt_params ¶ms_)
|
||||
|
@ -3217,6 +3225,7 @@ int main(int argc, char **argv)
|
|||
sigemptyset (&sigint_action.sa_mask);
|
||||
sigint_action.sa_flags = 0;
|
||||
sigaction(SIGINT, &sigint_action, NULL);
|
||||
sigaction(SIGUSR1, &sigint_action, NULL);
|
||||
#elif defined (_WIN32)
|
||||
auto console_ctrl_handler = +[](DWORD ctrl_type) -> BOOL {
|
||||
return (ctrl_type == CTRL_C_EVENT) ? (signal_handler(SIGINT), true) : false;
|
||||
|
@ -3230,3 +3239,4 @@ int main(int argc, char **argv)
|
|||
llama_backend_free();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
26
ggml-cuda.cu
26
ggml-cuda.cu
|
@ -39,6 +39,7 @@
|
|||
#define __shfl_xor_sync(mask, var, laneMask, width) __shfl_xor(var, laneMask, width)
|
||||
#define cublasComputeType_t hipblasDatatype_t //deprecated, new hipblasComputeType_t not in 5.6
|
||||
#define cublasCreate hipblasCreate
|
||||
#define cublasDestroy hipblasDestroy
|
||||
#define cublasGemmEx hipblasGemmEx
|
||||
#define cublasGemmBatchedEx hipblasGemmBatchedEx
|
||||
#define cublasGemmStridedBatchedEx hipblasGemmStridedBatchedEx
|
||||
|
@ -7991,10 +7992,11 @@ GGML_CALL bool ggml_cublas_loaded(void) {
|
|||
return g_cublas_loaded;
|
||||
}
|
||||
|
||||
GGML_CALL void ggml_init_cublas() {
|
||||
static bool initialized = false;
|
||||
static bool g_cublas_initialized = false;
|
||||
|
||||
if (!initialized) {
|
||||
GGML_CALL void ggml_init_cublas() {
|
||||
|
||||
if (!g_cublas_initialized) {
|
||||
|
||||
#ifdef __HIP_PLATFORM_AMD__
|
||||
// Workaround for a rocBLAS bug when using multiple graphics cards:
|
||||
|
@ -8004,7 +8006,7 @@ GGML_CALL void ggml_init_cublas() {
|
|||
#endif
|
||||
|
||||
if (cudaGetDeviceCount(&g_device_count) != cudaSuccess) {
|
||||
initialized = true;
|
||||
g_cublas_initialized = true;
|
||||
g_cublas_loaded = false;
|
||||
fprintf(stderr, "%s: no " GGML_CUDA_NAME " devices found, " GGML_CUDA_NAME " will be disabled\n", __func__);
|
||||
return;
|
||||
|
@ -8075,7 +8077,7 @@ GGML_CALL void ggml_init_cublas() {
|
|||
// configure logging to stdout
|
||||
// CUBLAS_CHECK(cublasLoggerConfigure(1, 1, 0, nullptr));
|
||||
|
||||
initialized = true;
|
||||
g_cublas_initialized = true;
|
||||
g_cublas_loaded = true;
|
||||
}
|
||||
}
|
||||
|
@ -11604,3 +11606,17 @@ GGML_CALL int ggml_backend_cuda_reg_devices() {
|
|||
}
|
||||
return device_count;
|
||||
}
|
||||
|
||||
extern "C" GGML_CALL void ggml_free_cublas(void);
|
||||
GGML_CALL void ggml_free_cublas(void) {
|
||||
for (int id = 0; id < g_device_count; ++id) {
|
||||
#if !defined(GGML_USE_HIPBLAS)
|
||||
CU_CHECK(cuMemUnmap(g_cuda_pool_addr[id], g_cuda_pool_size[id]));
|
||||
g_cuda_pool_size[id] = 0;
|
||||
g_cuda_pool_addr[id] = 0;
|
||||
#endif
|
||||
CUBLAS_CHECK(cublasDestroy(g_cublas_handles[id]));
|
||||
g_cublas_handles[id] = nullptr;
|
||||
}
|
||||
g_cublas_initialized = false;
|
||||
}
|
|
@ -20,6 +20,9 @@ extern "C" {
|
|||
// Always success. To check if CUDA is actually loaded, use `ggml_cublas_loaded`.
|
||||
GGML_API GGML_CALL void ggml_init_cublas(void);
|
||||
|
||||
// Release CUDA resources
|
||||
GGML_API GGML_CALL void ggml_free_cublas(void);
|
||||
|
||||
// Returns `true` if there are available CUDA devices and cublas loads successfully; otherwise, it returns `false`.
|
||||
GGML_API GGML_CALL bool ggml_cublas_loaded(void);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue