allocate c strings in metadata functions
This commit is contained in:
parent
3952a221af
commit
f4bd7cdd2b
5 changed files with 29 additions and 40 deletions
|
@ -422,8 +422,7 @@ int main(int argc, char ** argv) {
|
||||||
int n_layers = llama_n_layer(model);
|
int n_layers = llama_n_layer(model);
|
||||||
int n_embd = llama_n_embd(model);
|
int n_embd = llama_n_embd(model);
|
||||||
// get model hint param (a.k.a model arch name)
|
// get model hint param (a.k.a model arch name)
|
||||||
char model_hint[128];
|
char* model_hint = llama_model_meta_val_str(model, "general.architecture");
|
||||||
llama_model_meta_val_str(model, "general.architecture", model_hint, 128);
|
|
||||||
|
|
||||||
// init train_context
|
// init train_context
|
||||||
train_context ctx_train(n_embd, n_layers);
|
train_context ctx_train(n_embd, n_layers);
|
||||||
|
@ -496,6 +495,7 @@ int main(int argc, char ** argv) {
|
||||||
|
|
||||||
// write output vectors to gguf
|
// write output vectors to gguf
|
||||||
export_gguf(ctx_train.v_final, params.cvector_outfile, model_hint);
|
export_gguf(ctx_train.v_final, params.cvector_outfile, model_hint);
|
||||||
|
free(model_hint);
|
||||||
|
|
||||||
llama_backend_free();
|
llama_backend_free();
|
||||||
|
|
||||||
|
|
|
@ -661,13 +661,12 @@ struct server_context {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool validate_model_chat_template() const {
|
bool validate_model_chat_template() const {
|
||||||
std::vector<char> model_template(2048, 0); // longest known template is about 1200 bytes
|
|
||||||
std::string template_key = "tokenizer.chat_template";
|
std::string template_key = "tokenizer.chat_template";
|
||||||
int32_t res = llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size());
|
char* tmpl = llama_model_meta_val_str(model, template_key.c_str());
|
||||||
if (res >= 0) {
|
if (tmpl) {
|
||||||
llama_chat_message chat[] = {{"user", "test"}};
|
llama_chat_message chat[] = {{"user", "test"}};
|
||||||
std::string tmpl = std::string(model_template.data(), model_template.size());
|
int32_t chat_res = llama_chat_apply_template(model, tmpl, chat, 1, true, nullptr, 0);
|
||||||
int32_t chat_res = llama_chat_apply_template(model, tmpl.c_str(), chat, 1, true, nullptr, 0);
|
free(tmpl);
|
||||||
return chat_res > 0;
|
return chat_res > 0;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -336,15 +336,13 @@ inline std::string format_chat(const struct llama_model * model, const std::stri
|
||||||
|
|
||||||
static std::string llama_get_chat_template(const struct llama_model * model) {
|
static std::string llama_get_chat_template(const struct llama_model * model) {
|
||||||
std::string template_key = "tokenizer.chat_template";
|
std::string template_key = "tokenizer.chat_template";
|
||||||
// call with NULL buffer to get the total size of the string
|
char* model_template = llama_model_meta_val_str(model, template_key.c_str());
|
||||||
int32_t res = llama_model_meta_val_str(model, template_key.c_str(), NULL, 0);
|
if (model_template == NULL) {
|
||||||
if (res < 0) {
|
|
||||||
return "";
|
return "";
|
||||||
} else {
|
|
||||||
std::vector<char> model_template(res, 0);
|
|
||||||
llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size());
|
|
||||||
return std::string(model_template.data(), model_template.size());
|
|
||||||
}
|
}
|
||||||
|
std::string rv = model_template;
|
||||||
|
free(model_template);
|
||||||
|
return rv;
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
|
@ -449,21 +449,21 @@ extern "C" {
|
||||||
LLAMA_API float llama_rope_freq_scale_train(const struct llama_model * model);
|
LLAMA_API float llama_rope_freq_scale_train(const struct llama_model * model);
|
||||||
|
|
||||||
// Functions to access the model's GGUF metadata scalar values
|
// Functions to access the model's GGUF metadata scalar values
|
||||||
// - The functions return the length of the string on success, or -1 on failure
|
// - The functions return a copy of the string on success, and NULL on failure
|
||||||
// - The output string is always null-terminated and cleared on failure
|
// - The returned string must be deallocated
|
||||||
// - GGUF array values are not supported by these functions
|
// - GGUF array values are not supported by these functions
|
||||||
|
|
||||||
// Get metadata value as a string by key name
|
// Get metadata value as a string by key name
|
||||||
LLAMA_API int32_t llama_model_meta_val_str(const struct llama_model * model, const char * key, char * buf, size_t buf_size);
|
LLAMA_API char* llama_model_meta_val_str(const struct llama_model * model, const char * key);
|
||||||
|
|
||||||
// Get the number of metadata key/value pairs
|
// Get the number of metadata key/value pairs
|
||||||
LLAMA_API int32_t llama_model_meta_count(const struct llama_model * model);
|
LLAMA_API int32_t llama_model_meta_count(const struct llama_model * model);
|
||||||
|
|
||||||
// Get metadata key name by index
|
// Get metadata key name by index
|
||||||
LLAMA_API int32_t llama_model_meta_key_by_index(const struct llama_model * model, int32_t i, char * buf, size_t buf_size);
|
LLAMA_API char* llama_model_meta_key_by_index(const struct llama_model * model, int32_t i);
|
||||||
|
|
||||||
// Get metadata value as a string by index
|
// Get metadata value as a string by index
|
||||||
LLAMA_API int32_t llama_model_meta_val_str_by_index(const struct llama_model * model, int32_t i, char * buf, size_t buf_size);
|
LLAMA_API char* llama_model_meta_val_str_by_index(const struct llama_model * model, int32_t i);
|
||||||
|
|
||||||
// Get a string describing the model type
|
// Get a string describing the model type
|
||||||
LLAMA_API int32_t llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size);
|
LLAMA_API int32_t llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size);
|
||||||
|
|
|
@ -20097,43 +20097,34 @@ float llama_rope_freq_scale_train(const struct llama_model * model) {
|
||||||
return model->hparams.rope_freq_scale_train;
|
return model->hparams.rope_freq_scale_train;
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t llama_model_meta_val_str(const struct llama_model * model, const char * key, char * buf, size_t buf_size) {
|
char* llama_model_meta_val_str(const struct llama_model * model, const char * key) {
|
||||||
const auto & it = model->gguf_kv.find(key);
|
const auto & it = model->gguf_kv.find(key);
|
||||||
if (it == model->gguf_kv.end()) {
|
if (it == model->gguf_kv.end()) {
|
||||||
if (buf_size > 0) {
|
return NULL;
|
||||||
buf[0] = '\0';
|
|
||||||
}
|
|
||||||
return -1;
|
|
||||||
}
|
}
|
||||||
return snprintf(buf, buf_size, "%s", it->second.c_str());
|
return strdup(it->second.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t llama_model_meta_count(const struct llama_model * model) {
|
int32_t llama_model_meta_count(const struct llama_model * model) {
|
||||||
return (int)model->gguf_kv.size();
|
return (int)model->gguf_kv.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t llama_model_meta_key_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size) {
|
char* llama_model_meta_key_by_index(const struct llama_model * model, int i) {
|
||||||
if (i < 0 || i >= (int)model->gguf_kv.size()) {
|
if (i < 0 || i >= (int)model->gguf_kv.size()) {
|
||||||
if (buf_size > 0) {
|
return NULL;
|
||||||
buf[0] = '\0';
|
|
||||||
}
|
|
||||||
return -1;
|
|
||||||
}
|
}
|
||||||
auto it = model->gguf_kv.begin();
|
auto it = model->gguf_kv.begin();
|
||||||
std::advance(it, i);
|
std::advance(it, i);
|
||||||
return snprintf(buf, buf_size, "%s", it->first.c_str());
|
return strdup(it->first.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t llama_model_meta_val_str_by_index(const struct llama_model * model, int32_t i, char * buf, size_t buf_size) {
|
char* llama_model_meta_val_str_by_index(const struct llama_model * model, int32_t i) {
|
||||||
if (i < 0 || i >= (int)model->gguf_kv.size()) {
|
if (i < 0 || i >= (int)model->gguf_kv.size()) {
|
||||||
if (buf_size > 0) {
|
return NULL;
|
||||||
buf[0] = '\0';
|
|
||||||
}
|
|
||||||
return -1;
|
|
||||||
}
|
}
|
||||||
auto it = model->gguf_kv.begin();
|
auto it = model->gguf_kv.begin();
|
||||||
std::advance(it, i);
|
std::advance(it, i);
|
||||||
return snprintf(buf, buf_size, "%s", it->second.c_str());
|
return strdup(it->second.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size) {
|
int32_t llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size) {
|
||||||
|
@ -22118,12 +22109,13 @@ int32_t llama_chat_apply_template(
|
||||||
// load template from model
|
// load template from model
|
||||||
std::vector<char> model_template(2048, 0); // longest known template is about 1200 bytes
|
std::vector<char> model_template(2048, 0); // longest known template is about 1200 bytes
|
||||||
std::string template_key = "tokenizer.chat_template";
|
std::string template_key = "tokenizer.chat_template";
|
||||||
int32_t res = llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size());
|
char* tmpl = llama_model_meta_val_str(model, template_key.c_str());
|
||||||
if (res < 0) {
|
if (tmpl == NULL) {
|
||||||
// worst case: there is no information about template, we will use chatml by default
|
// worst case: there is no information about template, we will use chatml by default
|
||||||
curr_tmpl = "chatml"; // see llama_chat_apply_template_internal
|
curr_tmpl = "chatml"; // see llama_chat_apply_template_internal
|
||||||
} else {
|
} else {
|
||||||
curr_tmpl = std::string(model_template.data(), model_template.size());
|
curr_tmpl = tmpl;
|
||||||
|
free(tmpl);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue