metadata: use char* under the hood to avoid conversion round trips

This commit is contained in:
Karl-Johan Alm 2024-11-20 22:26:45 +09:00
parent 3952a221af
commit 17a800be68
No known key found for this signature in database
GPG key ID: CF78C98086AB1ECA
5 changed files with 23 additions and 40 deletions

View file

@ -422,8 +422,7 @@ int main(int argc, char ** argv) {
int n_layers = llama_n_layer(model); int n_layers = llama_n_layer(model);
int n_embd = llama_n_embd(model); int n_embd = llama_n_embd(model);
// get model hint param (a.k.a model arch name) // get model hint param (a.k.a model arch name)
char model_hint[128]; const char* model_hint = llama_model_meta_val_str(model, "general.architecture");
llama_model_meta_val_str(model, "general.architecture", model_hint, 128);
// init train_context // init train_context
train_context ctx_train(n_embd, n_layers); train_context ctx_train(n_embd, n_layers);

View file

@ -663,15 +663,14 @@ struct server_context {
bool validate_model_chat_template() const { bool validate_model_chat_template() const {
std::vector<char> model_template(2048, 0); // longest known template is about 1200 bytes std::vector<char> model_template(2048, 0); // longest known template is about 1200 bytes
std::string template_key = "tokenizer.chat_template"; std::string template_key = "tokenizer.chat_template";
int32_t res = llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size()); const char* tmpl = llama_model_meta_val_str(model, template_key.c_str());
if (res >= 0) { if (tmpl == NULL) {
llama_chat_message chat[] = {{"user", "test"}};
std::string tmpl = std::string(model_template.data(), model_template.size());
int32_t chat_res = llama_chat_apply_template(model, tmpl.c_str(), chat, 1, true, nullptr, 0);
return chat_res > 0;
}
return false; return false;
} }
llama_chat_message chat[] = {{"user", "test"}};
int32_t chat_res = llama_chat_apply_template(model, tmpl, chat, 1, true, nullptr, 0);
return chat_res > 0;
}
void init() { void init() {
const int32_t n_ctx_slot = n_ctx / params.n_parallel; const int32_t n_ctx_slot = n_ctx / params.n_parallel;

View file

@ -336,15 +336,8 @@ inline std::string format_chat(const struct llama_model * model, const std::stri
static std::string llama_get_chat_template(const struct llama_model * model) { static std::string llama_get_chat_template(const struct llama_model * model) {
std::string template_key = "tokenizer.chat_template"; std::string template_key = "tokenizer.chat_template";
// call with NULL buffer to get the total size of the string const char* tmpl = llama_model_meta_val_str(model, template_key.c_str());
int32_t res = llama_model_meta_val_str(model, template_key.c_str(), NULL, 0); return tmpl ? tmpl : "";
if (res < 0) {
return "";
} else {
std::vector<char> model_template(res, 0);
llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size());
return std::string(model_template.data(), model_template.size());
}
} }
// //

View file

@ -449,12 +449,11 @@ extern "C" {
LLAMA_API float llama_rope_freq_scale_train(const struct llama_model * model); LLAMA_API float llama_rope_freq_scale_train(const struct llama_model * model);
// Functions to access the model's GGUF metadata scalar values // Functions to access the model's GGUF metadata scalar values
// - The functions return the length of the string on success, or -1 on failure // - The functions return a const char* of the value on success, and NULL on failure.
// - The output string is always null-terminated and cleared on failure
// - GGUF array values are not supported by these functions // - GGUF array values are not supported by these functions
// Get metadata value as a string by key name // Get metadata value as a string by key name
LLAMA_API int32_t llama_model_meta_val_str(const struct llama_model * model, const char * key, char * buf, size_t buf_size); LLAMA_API const char* llama_model_meta_val_str(const struct llama_model * model, const char * key);
// Get the number of metadata key/value pairs // Get the number of metadata key/value pairs
LLAMA_API int32_t llama_model_meta_count(const struct llama_model * model); LLAMA_API int32_t llama_model_meta_count(const struct llama_model * model);
@ -463,7 +462,7 @@ extern "C" {
LLAMA_API int32_t llama_model_meta_key_by_index(const struct llama_model * model, int32_t i, char * buf, size_t buf_size); LLAMA_API int32_t llama_model_meta_key_by_index(const struct llama_model * model, int32_t i, char * buf, size_t buf_size);
// Get metadata value as a string by index // Get metadata value as a string by index
LLAMA_API int32_t llama_model_meta_val_str_by_index(const struct llama_model * model, int32_t i, char * buf, size_t buf_size); LLAMA_API const char* llama_model_meta_val_str_by_index(const struct llama_model * model, int32_t i);
// Get a string describing the model type // Get a string describing the model type
LLAMA_API int32_t llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size); LLAMA_API int32_t llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size);

View file

@ -2887,7 +2887,7 @@ struct llama_model {
std::vector<llama_layer> layers; std::vector<llama_layer> layers;
// gguf metadata // gguf metadata
std::unordered_map<std::string, std::string> gguf_kv; std::unordered_map<std::string, char*> gguf_kv;
llama_split_mode split_mode; llama_split_mode split_mode;
int main_gpu; int main_gpu;
@ -5389,7 +5389,7 @@ static void llm_load_hparams(
} }
const char * name = gguf_get_key(ctx, i); const char * name = gguf_get_key(ctx, i);
const std::string value = gguf_kv_to_str(ctx, i); const std::string value = gguf_kv_to_str(ctx, i);
model.gguf_kv.emplace(name, value); model.gguf_kv.emplace(name, strdup(value.c_str()));
} }
// get general kv // get general kv
@ -20097,15 +20097,12 @@ float llama_rope_freq_scale_train(const struct llama_model * model) {
return model->hparams.rope_freq_scale_train; return model->hparams.rope_freq_scale_train;
} }
int32_t llama_model_meta_val_str(const struct llama_model * model, const char * key, char * buf, size_t buf_size) { const char* llama_model_meta_val_str(const struct llama_model * model, const char * key) {
const auto & it = model->gguf_kv.find(key); const auto & it = model->gguf_kv.find(key);
if (it == model->gguf_kv.end()) { if (it == model->gguf_kv.end()) {
if (buf_size > 0) { return NULL;
buf[0] = '\0';
} }
return -1; return it->second;
}
return snprintf(buf, buf_size, "%s", it->second.c_str());
} }
int32_t llama_model_meta_count(const struct llama_model * model) { int32_t llama_model_meta_count(const struct llama_model * model) {
@ -20124,16 +20121,13 @@ int32_t llama_model_meta_key_by_index(const struct llama_model * model, int i, c
return snprintf(buf, buf_size, "%s", it->first.c_str()); return snprintf(buf, buf_size, "%s", it->first.c_str());
} }
int32_t llama_model_meta_val_str_by_index(const struct llama_model * model, int32_t i, char * buf, size_t buf_size) { const char* llama_model_meta_val_str_by_index(const struct llama_model * model, int32_t i) {
if (i < 0 || i >= (int)model->gguf_kv.size()) { if (i < 0 || i >= (int)model->gguf_kv.size()) {
if (buf_size > 0) { return NULL;
buf[0] = '\0';
}
return -1;
} }
auto it = model->gguf_kv.begin(); auto it = model->gguf_kv.begin();
std::advance(it, i); std::advance(it, i);
return snprintf(buf, buf_size, "%s", it->second.c_str()); return it->second;
} }
int32_t llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size) { int32_t llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size) {
@ -22116,14 +22110,13 @@ int32_t llama_chat_apply_template(
if (tmpl == nullptr) { if (tmpl == nullptr) {
GGML_ASSERT(model != nullptr); GGML_ASSERT(model != nullptr);
// load template from model // load template from model
std::vector<char> model_template(2048, 0); // longest known template is about 1200 bytes
std::string template_key = "tokenizer.chat_template"; std::string template_key = "tokenizer.chat_template";
int32_t res = llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size()); const char* tmpl = llama_model_meta_val_str(model, template_key.c_str());
if (res < 0) { if (tmpl == NULL) {
// worst case: there is no information about template, we will use chatml by default // worst case: there is no information about template, we will use chatml by default
curr_tmpl = "chatml"; // see llama_chat_apply_template_internal curr_tmpl = "chatml"; // see llama_chat_apply_template_internal
} else { } else {
curr_tmpl = std::string(model_template.data(), model_template.size()); curr_tmpl = tmpl;
} }
} }