From 82e4187f95f969b90b9e27c660cb5168c3e33ec7 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Fri, 19 Apr 2024 13:16:42 +0200 Subject: [PATCH] llama: add llama_model_kv_override_free --- common/common.cpp | 1 - llama.cpp | 18 ++++++++++++++++++ llama.h | 3 +++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/common/common.cpp b/common/common.cpp index 29fa5cdbc..9838a538f 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -263,7 +263,6 @@ bool parse_kv_override(const char * data, std::vector & fprintf(stderr, "%s: invalid boolean value for KV override '%s'\n", __func__, data); return false; } - } else if (strncmp(sep, "str:", 4) == 0) { sep += 4; kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR; diff --git a/llama.cpp b/llama.cpp index 8bf1fdbc6..2907283f4 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2929,6 +2929,16 @@ namespace GGUFMeta { return false; } + template + static typename std::enable_if::value, char *>::type + try_override(T & target, const struct llama_model_kv_override * ovrd) { + if (validate_override(LLAMA_KV_OVERRIDE_TYPE_STR, ovrd)) { + target = ovrd->str_value; + return true; + } + return false; + } + template static typename std::enable_if::value, bool>::type try_override(T & target, const struct llama_model_kv_override * ovrd) { @@ -14977,6 +14987,14 @@ void llama_free_model(struct llama_model * model) { delete model; } +void llama_model_kv_override_free(struct llama_model_kv_override * kv_overrides) { + for (const struct llama_model_kv_override *p = kv_overrides; p->key[0] != 0; p++) { + if (p->tag == LLAMA_KV_OVERRIDE_TYPE_STR) { + delete p->str_value; + } + } +} + struct llama_context * llama_new_context_with_model( struct llama_model * model, struct llama_context_params params) { diff --git a/llama.h b/llama.h index 0cc0d3f29..0f215f336 100644 --- a/llama.h +++ b/llama.h @@ -209,6 +209,9 @@ extern "C" { }; }; + // Frees all allocated memory + LLAMA_API void llama_model_kv_override_free(struct llama_model_kv_override * ctx); + struct llama_model_params { int32_t n_gpu_layers; // number of layers to store in VRAM enum llama_split_mode split_mode; // how to split the model across multiple GPUs