llama: add llama_model_kv_override_free

2024-04-19 13:16:42 +02:00 · 2024-04-19 13:16:42 +02:00 · 82e4187f95
commit 82e4187f95
parent ea0ad80a4f
3 changed files with 21 additions and 1 deletions
--- a/common/common.cpp
+++ b/common/common.cpp
@ -263,7 +263,6 @@ bool parse_kv_override(const char * data, std::vector<llama_model_kv_override> &
            fprintf(stderr, "%s: invalid boolean value for KV override '%s'\n", __func__, data);
            return false;
        }
    } else if (strncmp(sep, "str:", 4) == 0) {
        sep += 4;
        kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR;
--- a/llama.cpp
+++ b/llama.cpp
@ -2929,6 +2929,16 @@ namespace GGUFMeta {
            return false;
        }
        template<typename OT>
        static typename std::enable_if<std::is_same<OT, char *>::value, char *>::type
        try_override(T & target, const struct llama_model_kv_override * ovrd) {
            if (validate_override(LLAMA_KV_OVERRIDE_TYPE_STR, ovrd)) {
                target = ovrd->str_value;
                return true;
            }
            return false;
        }
        template<typename OT>
        static typename std::enable_if<std::is_same<OT, std::string>::value, bool>::type
        try_override(T & target, const struct llama_model_kv_override * ovrd) {
@ -14977,6 +14987,14 @@ void llama_free_model(struct llama_model * model) {
    delete model;
 }
 void llama_model_kv_override_free(struct llama_model_kv_override * kv_overrides) {
    for (const struct llama_model_kv_override *p = kv_overrides; p->key[0] != 0; p++) {
        if (p->tag == LLAMA_KV_OVERRIDE_TYPE_STR) {
            delete p->str_value;
        }
    }
 }
 struct llama_context * llama_new_context_with_model(
                 struct llama_model * model,
        struct llama_context_params   params) {
--- a/llama.h
+++ b/llama.h
@ -209,6 +209,9 @@ extern "C" {
        };
    };
    // Frees all allocated memory
    LLAMA_API void llama_model_kv_override_free(struct llama_model_kv_override * ctx);
    struct llama_model_params {
        int32_t n_gpu_layers; // number of layers to store in VRAM
        enum llama_split_mode split_mode; // how to split the model across multiple GPUs