diff --git a/common/common-nexa.cpp b/common/common-nexa.cpp
index e8a54ba04..c41f91384 100644
--- a/common/common-nexa.cpp
+++ b/common/common-nexa.cpp
@@ -150,6 +150,7 @@ bool load_hparams_and_tensors_from_gguf(const std::string &fname, NexaBaseModel
     }
 
     ggml_free(meta);
+    gguf_free(ctx_gguf);
     return true;
 }
 
@@ -314,4 +315,4 @@ struct ggml_tensor * ggml_graph_node(struct ggml_cgraph * cgraph, int i) {
 
     GGML_ASSERT(i < cgraph->n_nodes);
     return cgraph->nodes[i];
-}
\ No newline at end of file
+}
diff --git a/examples/qwen2-audio/qwen2.cpp b/examples/qwen2-audio/qwen2.cpp
index a42b85bdc..8a08a7ac6 100644
--- a/examples/qwen2-audio/qwen2.cpp
+++ b/examples/qwen2-audio/qwen2.cpp
@@ -724,6 +724,7 @@ void omni_free(struct omni_context *ctx_omni)
     llama_free(ctx_omni->ctx_llama);
     llama_free_model(ctx_omni->model);
     llama_backend_free();
+    free(ctx_omni);
 }
 
 static bool omni_eval_audio_embed(llama_context *ctx_llama, ggml_tensor *audio_embed, int n_batch, int *n_past)
@@ -763,6 +764,7 @@ static bool omni_eval_audio_embed(llama_context *ctx_llama, ggml_tensor *audio_e
         }
         *n_past += n_eval;
     }
+    free(audio_embed_data);
     return true;
 }
 
diff --git a/examples/qwen2-audio/whisper.cpp b/examples/qwen2-audio/whisper.cpp
index 6da9d268d..b2ce58475 100644
--- a/examples/qwen2-audio/whisper.cpp
+++ b/examples/qwen2-audio/whisper.cpp
@@ -9467,6 +9467,8 @@ static bool whisper_encoder_load(struct whisper_model_loader *loader, whisper_co
 
     wctx.t_load_us = ggml_time_us() - t_start_us;
 
+    gguf_free(gguf_ctx);
+
     return true;
 }