diff --git a/examples/cvector-generator/cvector-generator.cpp b/examples/cvector-generator/cvector-generator.cpp index d1731bba6..73cfbadbe 100644 --- a/examples/cvector-generator/cvector-generator.cpp +++ b/examples/cvector-generator/cvector-generator.cpp @@ -422,8 +422,7 @@ int main(int argc, char ** argv) { int n_layers = llama_n_layer(model); int n_embd = llama_n_embd(model); // get model hint param (a.k.a model arch name) - char model_hint[128]; - llama_model_meta_val_str(model, "general.architecture", model_hint, 128); + char* model_hint = llama_model_meta_val_str(model, "general.architecture"); // init train_context train_context ctx_train(n_embd, n_layers); @@ -496,6 +495,7 @@ int main(int argc, char ** argv) { // write output vectors to gguf export_gguf(ctx_train.v_final, params.cvector_outfile, model_hint); + free(model_hint); llama_backend_free(); diff --git a/examples/server/server.cpp b/examples/server/server.cpp index b8e003be9..a4740a719 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -661,13 +661,12 @@ struct server_context { } bool validate_model_chat_template() const { - std::vector model_template(2048, 0); // longest known template is about 1200 bytes std::string template_key = "tokenizer.chat_template"; - int32_t res = llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size()); - if (res >= 0) { + char* tmpl = llama_model_meta_val_str(model, template_key.c_str()); + if (tmpl) { llama_chat_message chat[] = {{"user", "test"}}; - std::string tmpl = std::string(model_template.data(), model_template.size()); - int32_t chat_res = llama_chat_apply_template(model, tmpl.c_str(), chat, 1, true, nullptr, 0); + int32_t chat_res = llama_chat_apply_template(model, tmpl, chat, 1, true, nullptr, 0); + free(tmpl); return chat_res > 0; } return false; diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index c47ed3e47..9d9bf5509 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -336,15 +336,13 @@ inline std::string format_chat(const struct llama_model * model, const std::stri static std::string llama_get_chat_template(const struct llama_model * model) { std::string template_key = "tokenizer.chat_template"; - // call with NULL buffer to get the total size of the string - int32_t res = llama_model_meta_val_str(model, template_key.c_str(), NULL, 0); - if (res < 0) { + char* model_template = llama_model_meta_val_str(model, template_key.c_str()); + if (model_template == NULL) { return ""; - } else { - std::vector model_template(res, 0); - llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size()); - return std::string(model_template.data(), model_template.size()); } + std::string rv = model_template; + free(model_template); + return rv; } // diff --git a/include/llama.h b/include/llama.h index 90791d5f5..90d473b2e 100644 --- a/include/llama.h +++ b/include/llama.h @@ -449,21 +449,21 @@ extern "C" { LLAMA_API float llama_rope_freq_scale_train(const struct llama_model * model); // Functions to access the model's GGUF metadata scalar values - // - The functions return the length of the string on success, or -1 on failure - // - The output string is always null-terminated and cleared on failure + // - The functions return a copy of the string on success, and NULL on failure + // - The returned string must be deallocated // - GGUF array values are not supported by these functions // Get metadata value as a string by key name - LLAMA_API int32_t llama_model_meta_val_str(const struct llama_model * model, const char * key, char * buf, size_t buf_size); + LLAMA_API char* llama_model_meta_val_str(const struct llama_model * model, const char * key); // Get the number of metadata key/value pairs LLAMA_API int32_t llama_model_meta_count(const struct llama_model * model); // Get metadata key name by index - LLAMA_API int32_t llama_model_meta_key_by_index(const struct llama_model * model, int32_t i, char * buf, size_t buf_size); + LLAMA_API char* llama_model_meta_key_by_index(const struct llama_model * model, int32_t i); // Get metadata value as a string by index - LLAMA_API int32_t llama_model_meta_val_str_by_index(const struct llama_model * model, int32_t i, char * buf, size_t buf_size); + LLAMA_API char* llama_model_meta_val_str_by_index(const struct llama_model * model, int32_t i); // Get a string describing the model type LLAMA_API int32_t llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size); diff --git a/src/llama.cpp b/src/llama.cpp index c51b36e66..c02d4afd9 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -20097,43 +20097,34 @@ float llama_rope_freq_scale_train(const struct llama_model * model) { return model->hparams.rope_freq_scale_train; } -int32_t llama_model_meta_val_str(const struct llama_model * model, const char * key, char * buf, size_t buf_size) { +char* llama_model_meta_val_str(const struct llama_model * model, const char * key) { const auto & it = model->gguf_kv.find(key); if (it == model->gguf_kv.end()) { - if (buf_size > 0) { - buf[0] = '\0'; - } - return -1; + return NULL; } - return snprintf(buf, buf_size, "%s", it->second.c_str()); + return strdup(it->second.c_str()); } int32_t llama_model_meta_count(const struct llama_model * model) { return (int)model->gguf_kv.size(); } -int32_t llama_model_meta_key_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size) { +char* llama_model_meta_key_by_index(const struct llama_model * model, int i) { if (i < 0 || i >= (int)model->gguf_kv.size()) { - if (buf_size > 0) { - buf[0] = '\0'; - } - return -1; + return NULL; } auto it = model->gguf_kv.begin(); std::advance(it, i); - return snprintf(buf, buf_size, "%s", it->first.c_str()); + return strdup(it->first.c_str()); } -int32_t llama_model_meta_val_str_by_index(const struct llama_model * model, int32_t i, char * buf, size_t buf_size) { +char* llama_model_meta_val_str_by_index(const struct llama_model * model, int32_t i) { if (i < 0 || i >= (int)model->gguf_kv.size()) { - if (buf_size > 0) { - buf[0] = '\0'; - } - return -1; + return NULL; } auto it = model->gguf_kv.begin(); std::advance(it, i); - return snprintf(buf, buf_size, "%s", it->second.c_str()); + return strdup(it->second.c_str()); } int32_t llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size) { @@ -22118,12 +22109,13 @@ int32_t llama_chat_apply_template( // load template from model std::vector model_template(2048, 0); // longest known template is about 1200 bytes std::string template_key = "tokenizer.chat_template"; - int32_t res = llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size()); - if (res < 0) { + char* tmpl = llama_model_meta_val_str(model, template_key.c_str()); + if (tmpl == NULL) { // worst case: there is no information about template, we will use chatml by default curr_tmpl = "chatml"; // see llama_chat_apply_template_internal } else { - curr_tmpl = std::string(model_template.data(), model_template.size()); + curr_tmpl = tmpl; + free(tmpl); } }