From fc25544867f591e0831dea493675ff0d8775dfc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E4=B8=BA?= Date: Thu, 14 Nov 2024 08:32:55 +0800 Subject: [PATCH 1/4] [memory leakage] fixed a leakage by projector free --- examples/qwen2-audio/qwen2.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/qwen2-audio/qwen2.cpp b/examples/qwen2-audio/qwen2.cpp index 71b2a4a12..a42b85bdc 100644 --- a/examples/qwen2-audio/qwen2.cpp +++ b/examples/qwen2-audio/qwen2.cpp @@ -18,6 +18,7 @@ #include #include #include +#include // // Constants @@ -708,6 +709,7 @@ void omni_free(struct omni_context *ctx_omni) if(internal_chars != nullptr) { free(internal_chars); + internal_chars = nullptr; } if (ctx_omni->ctx_whisper) { @@ -716,7 +718,7 @@ void omni_free(struct omni_context *ctx_omni) } if (ctx_omni->projector) { - ctx_omni->projector->free(); + delete ctx_omni->projector; } llama_free(ctx_omni->ctx_llama); From aad0167bc3accc17ec80db5225576e4130383cc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E4=B8=BA?= Date: Thu, 14 Nov 2024 14:50:49 +0800 Subject: [PATCH 2/4] audio embedding free() (but still memory leakage detected) --- examples/qwen2-audio/qwen2.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/qwen2-audio/qwen2.cpp b/examples/qwen2-audio/qwen2.cpp index a42b85bdc..ad6a199c7 100644 --- a/examples/qwen2-audio/qwen2.cpp +++ b/examples/qwen2-audio/qwen2.cpp @@ -763,6 +763,7 @@ static bool omni_eval_audio_embed(llama_context *ctx_llama, ggml_tensor *audio_e } *n_past += n_eval; } + free(audio_embed_data); return true; } From 8e2e6304057af44e66c0c3a123ca798dc4d25a55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E4=B8=BA?= Date: Thu, 14 Nov 2024 22:04:01 +0800 Subject: [PATCH 3/4] fix mem leakage based on leaks tool (still WIP) --- common/common-nexa.cpp | 3 ++- examples/qwen2-audio/whisper.cpp | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/common/common-nexa.cpp b/common/common-nexa.cpp index e8a54ba04..c41f91384 100644 --- a/common/common-nexa.cpp +++ b/common/common-nexa.cpp @@ -150,6 +150,7 @@ bool load_hparams_and_tensors_from_gguf(const std::string &fname, NexaBaseModel } ggml_free(meta); + gguf_free(ctx_gguf); return true; } @@ -314,4 +315,4 @@ struct ggml_tensor * ggml_graph_node(struct ggml_cgraph * cgraph, int i) { GGML_ASSERT(i < cgraph->n_nodes); return cgraph->nodes[i]; -} \ No newline at end of file +} diff --git a/examples/qwen2-audio/whisper.cpp b/examples/qwen2-audio/whisper.cpp index 6da9d268d..b2ce58475 100644 --- a/examples/qwen2-audio/whisper.cpp +++ b/examples/qwen2-audio/whisper.cpp @@ -9467,6 +9467,8 @@ static bool whisper_encoder_load(struct whisper_model_loader *loader, whisper_co wctx.t_load_us = ggml_time_us() - t_start_us; + gguf_free(gguf_ctx); + return true; } From e4ca946c48ee6e1a848cf88e5f81680179b0fbf5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E4=B8=BA?= Date: Fri, 15 Nov 2024 08:31:01 +0800 Subject: [PATCH 4/4] free omni_ctx heap malloc space in omni_free() api Currently mem leaks in qwen2audio are almost fixed. --- examples/qwen2-audio/qwen2.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/qwen2-audio/qwen2.cpp b/examples/qwen2-audio/qwen2.cpp index ad6a199c7..8a08a7ac6 100644 --- a/examples/qwen2-audio/qwen2.cpp +++ b/examples/qwen2-audio/qwen2.cpp @@ -724,6 +724,7 @@ void omni_free(struct omni_context *ctx_omni) llama_free(ctx_omni->ctx_llama); llama_free_model(ctx_omni->model); llama_backend_free(); + free(ctx_omni); } static bool omni_eval_audio_embed(llama_context *ctx_llama, ggml_tensor *audio_embed, int n_batch, int *n_past)