diff --git a/examples/cvector-generator/cvector-generator.cpp b/examples/cvector-generator/cvector-generator.cpp
index d1731bba6..73cfbadbe 100644
--- a/examples/cvector-generator/cvector-generator.cpp
+++ b/examples/cvector-generator/cvector-generator.cpp
@@ -422,8 +422,7 @@ int main(int argc, char ** argv) {
     int n_layers = llama_n_layer(model);
     int n_embd = llama_n_embd(model);
     // get model hint param (a.k.a model arch name)
-    char model_hint[128];
-    llama_model_meta_val_str(model, "general.architecture", model_hint, 128);
+    char* model_hint = llama_model_meta_val_str(model, "general.architecture");
 
     // init train_context
     train_context ctx_train(n_embd, n_layers);
@@ -496,6 +495,7 @@ int main(int argc, char ** argv) {
 
     // write output vectors to gguf
     export_gguf(ctx_train.v_final, params.cvector_outfile, model_hint);
+    free(model_hint);
 
     llama_backend_free();
 
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index b8e003be9..a4740a719 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -661,13 +661,12 @@ struct server_context {
     }
 
     bool validate_model_chat_template() const {
-        std::vector<char> model_template(2048, 0); // longest known template is about 1200 bytes
         std::string template_key = "tokenizer.chat_template";
-        int32_t res = llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size());
-        if (res >= 0) {
+        char* tmpl = llama_model_meta_val_str(model, template_key.c_str());
+        if (tmpl) {
             llama_chat_message chat[] = {{"user", "test"}};
-            std::string tmpl = std::string(model_template.data(), model_template.size());
-            int32_t chat_res = llama_chat_apply_template(model, tmpl.c_str(), chat, 1, true, nullptr, 0);
+            int32_t chat_res = llama_chat_apply_template(model, tmpl, chat, 1, true, nullptr, 0);
+            free(tmpl);
             return chat_res > 0;
         }
         return false;
diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
index c47ed3e47..9d9bf5509 100644
--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@@ -336,15 +336,13 @@ inline std::string format_chat(const struct llama_model * model, const std::stri
 
 static std::string llama_get_chat_template(const struct llama_model * model) {
     std::string template_key = "tokenizer.chat_template";
-    // call with NULL buffer to get the total size of the string
-    int32_t res = llama_model_meta_val_str(model, template_key.c_str(), NULL, 0);
-    if (res < 0) {
+    char* model_template = llama_model_meta_val_str(model, template_key.c_str());
+    if (model_template == NULL) {
         return "";
-    } else {
-        std::vector<char> model_template(res, 0);
-        llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size());
-        return std::string(model_template.data(), model_template.size());
     }
+    std::string rv = model_template;
+    free(model_template);
+    return rv;
 }
 
 //
diff --git a/include/llama.h b/include/llama.h
index 90791d5f5..90d473b2e 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -449,21 +449,21 @@ extern "C" {
     LLAMA_API float llama_rope_freq_scale_train(const struct llama_model * model);
 
     // Functions to access the model's GGUF metadata scalar values
-    // - The functions return the length of the string on success, or -1 on failure
-    // - The output string is always null-terminated and cleared on failure
+    // - The functions return a copy of the string on success, and NULL on failure
+    // - The returned string must be deallocated
     // - GGUF array values are not supported by these functions
 
     // Get metadata value as a string by key name
-    LLAMA_API int32_t llama_model_meta_val_str(const struct llama_model * model, const char * key, char * buf, size_t buf_size);
+    LLAMA_API char* llama_model_meta_val_str(const struct llama_model * model, const char * key);
 
     // Get the number of metadata key/value pairs
     LLAMA_API int32_t llama_model_meta_count(const struct llama_model * model);
 
     // Get metadata key name by index
-    LLAMA_API int32_t llama_model_meta_key_by_index(const struct llama_model * model, int32_t i, char * buf, size_t buf_size);
+    LLAMA_API char* llama_model_meta_key_by_index(const struct llama_model * model, int32_t i);
 
     // Get metadata value as a string by index
-    LLAMA_API int32_t llama_model_meta_val_str_by_index(const struct llama_model * model, int32_t i, char * buf, size_t buf_size);
+    LLAMA_API char* llama_model_meta_val_str_by_index(const struct llama_model * model, int32_t i);
 
     // Get a string describing the model type
     LLAMA_API int32_t llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size);
diff --git a/src/llama.cpp b/src/llama.cpp
index c51b36e66..c02d4afd9 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -20097,43 +20097,34 @@ float llama_rope_freq_scale_train(const struct llama_model * model) {
     return model->hparams.rope_freq_scale_train;
 }
 
-int32_t llama_model_meta_val_str(const struct llama_model * model, const char * key, char * buf, size_t buf_size) {
+char* llama_model_meta_val_str(const struct llama_model * model, const char * key) {
     const auto & it = model->gguf_kv.find(key);
     if (it == model->gguf_kv.end()) {
-        if (buf_size > 0) {
-            buf[0] = '\0';
-        }
-        return -1;
+        return NULL;
     }
-    return snprintf(buf, buf_size, "%s", it->second.c_str());
+    return strdup(it->second.c_str());
 }
 
 int32_t llama_model_meta_count(const struct llama_model * model) {
     return (int)model->gguf_kv.size();
 }
 
-int32_t llama_model_meta_key_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size) {
+char* llama_model_meta_key_by_index(const struct llama_model * model, int i) {
     if (i < 0 || i >= (int)model->gguf_kv.size()) {
-        if (buf_size > 0) {
-            buf[0] = '\0';
-        }
-        return -1;
+        return NULL;
     }
     auto it = model->gguf_kv.begin();
     std::advance(it, i);
-    return snprintf(buf, buf_size, "%s", it->first.c_str());
+    return strdup(it->first.c_str());
 }
 
-int32_t llama_model_meta_val_str_by_index(const struct llama_model * model, int32_t i, char * buf, size_t buf_size) {
+char* llama_model_meta_val_str_by_index(const struct llama_model * model, int32_t i) {
     if (i < 0 || i >= (int)model->gguf_kv.size()) {
-        if (buf_size > 0) {
-            buf[0] = '\0';
-        }
-        return -1;
+        return NULL;
     }
     auto it = model->gguf_kv.begin();
     std::advance(it, i);
-    return snprintf(buf, buf_size, "%s", it->second.c_str());
+    return strdup(it->second.c_str());
 }
 
 int32_t llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size) {
@@ -22118,12 +22109,13 @@ int32_t llama_chat_apply_template(
         // load template from model
         std::vector<char> model_template(2048, 0); // longest known template is about 1200 bytes
         std::string template_key = "tokenizer.chat_template";
-        int32_t res = llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size());
-        if (res < 0) {
+        char* tmpl = llama_model_meta_val_str(model, template_key.c_str());
+        if (tmpl == NULL) {
             // worst case: there is no information about template, we will use chatml by default
             curr_tmpl = "chatml"; // see llama_chat_apply_template_internal
         } else {
-            curr_tmpl = std::string(model_template.data(), model_template.size());
+            curr_tmpl = tmpl;
+            free(tmpl);
         }
     }