From aa0e28f8fcc540e4c01b71ff65761551518edc27 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 20 Apr 2024 10:17:03 +0200 Subject: [PATCH] common: add llama_model_kv_override_free common: free kv override if used after model loading --- common/common.cpp | 12 ++++++++++++ common/common.h | 3 +++ llama.cpp | 8 -------- llama.h | 3 --- 4 files changed, 15 insertions(+), 11 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 9838a538f..c30b9e59f 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -2232,6 +2232,10 @@ std::tuple llama_init_from_gpt_par return std::make_tuple(nullptr, nullptr); } + if (!params.kv_overrides.empty()) { + llama_model_kv_override_free(params.kv_overrides.data()); + } + auto cparams = llama_context_params_from_gpt_params(params); llama_context * lctx = llama_new_context_with_model(model, cparams); @@ -2952,3 +2956,11 @@ llama_control_vector_data llama_control_vector_load(const std::vectorkey[0] != 0; p++) { + if (p->tag == LLAMA_KV_OVERRIDE_TYPE_STR) { + delete p->str_value; + } + } +} diff --git a/common/common.h b/common/common.h index 562d3a119..916023e25 100644 --- a/common/common.h +++ b/common/common.h @@ -172,6 +172,9 @@ struct gpt_params { bool parse_kv_override(const char * data, std::vector & overrides); +// Frees all allocated memory +void llama_model_kv_override_free(struct llama_model_kv_override * ctx); + bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params); bool gpt_params_parse(int argc, char ** argv, gpt_params & params); diff --git a/llama.cpp b/llama.cpp index 2907283f4..9d9f7b4e1 100644 --- a/llama.cpp +++ b/llama.cpp @@ -14987,14 +14987,6 @@ void llama_free_model(struct llama_model * model) { delete model; } -void llama_model_kv_override_free(struct llama_model_kv_override * kv_overrides) { - for (const struct llama_model_kv_override *p = kv_overrides; p->key[0] != 0; p++) { - if (p->tag == LLAMA_KV_OVERRIDE_TYPE_STR) { - delete p->str_value; - } - } -} - struct llama_context * llama_new_context_with_model( struct llama_model * model, struct llama_context_params params) { diff --git a/llama.h b/llama.h index 0f215f336..0cc0d3f29 100644 --- a/llama.h +++ b/llama.h @@ -209,9 +209,6 @@ extern "C" { }; }; - // Frees all allocated memory - LLAMA_API void llama_model_kv_override_free(struct llama_model_kv_override * ctx); - struct llama_model_params { int32_t n_gpu_layers; // number of layers to store in VRAM enum llama_split_mode split_mode; // how to split the model across multiple GPUs