diff --git a/common/common-nexa.cpp b/common/common-nexa.cpp index e8a54ba04..c41f91384 100644 --- a/common/common-nexa.cpp +++ b/common/common-nexa.cpp @@ -150,6 +150,7 @@ bool load_hparams_and_tensors_from_gguf(const std::string &fname, NexaBaseModel } ggml_free(meta); + gguf_free(ctx_gguf); return true; } @@ -314,4 +315,4 @@ struct ggml_tensor * ggml_graph_node(struct ggml_cgraph * cgraph, int i) { GGML_ASSERT(i < cgraph->n_nodes); return cgraph->nodes[i]; -} \ No newline at end of file +} diff --git a/examples/qwen2-audio/qwen2.cpp b/examples/qwen2-audio/qwen2.cpp index a42b85bdc..8a08a7ac6 100644 --- a/examples/qwen2-audio/qwen2.cpp +++ b/examples/qwen2-audio/qwen2.cpp @@ -724,6 +724,7 @@ void omni_free(struct omni_context *ctx_omni) llama_free(ctx_omni->ctx_llama); llama_free_model(ctx_omni->model); llama_backend_free(); + free(ctx_omni); } static bool omni_eval_audio_embed(llama_context *ctx_llama, ggml_tensor *audio_embed, int n_batch, int *n_past) @@ -763,6 +764,7 @@ static bool omni_eval_audio_embed(llama_context *ctx_llama, ggml_tensor *audio_e } *n_past += n_eval; } + free(audio_embed_data); return true; } diff --git a/examples/qwen2-audio/whisper.cpp b/examples/qwen2-audio/whisper.cpp index 6da9d268d..b2ce58475 100644 --- a/examples/qwen2-audio/whisper.cpp +++ b/examples/qwen2-audio/whisper.cpp @@ -9467,6 +9467,8 @@ static bool whisper_encoder_load(struct whisper_model_loader *loader, whisper_co wctx.t_load_us = ggml_time_us() - t_start_us; + gguf_free(gguf_ctx); + return true; }