From c9e7cbb08b70b73729f5e73ec27064ccb51b271c Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Mon, 20 Jan 2025 16:58:29 +0100 Subject: [PATCH] safer jinja `llama_chat_templates` struct --- common/common.cpp | 13 ++++++------- common/common.h | 7 ++++++- examples/server/server.cpp | 11 +++++------ 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 9c535a176..05826c974 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1855,13 +1855,12 @@ llama_chat_templates llama_chat_templates_from_model(const struct llama_model * )"; } } - return { - has_explicit_template, - std::make_unique(default_template_src, bos_token, eos_token), - tool_use_template_src.empty() - ? nullptr - : std::make_unique(tool_use_template_src, bos_token, eos_token) - }; + llama_chat_templates ret(default_template_src, bos_token, eos_token); + ret.has_explicit_template = has_explicit_template; + ret.tool_use_template.reset(tool_use_template_src.empty() + ? nullptr + : new minja::chat_template(tool_use_template_src, bos_token, eos_token)); + return ret; } // diff --git a/common/common.h b/common/common.h index a96a99531..5e80ccb43 100644 --- a/common/common.h +++ b/common/common.h @@ -607,8 +607,13 @@ typedef minja::chat_template llama_chat_template; struct llama_chat_templates { bool has_explicit_template; // Model had builtin template or template overridde was specified. - std::unique_ptr default_template; // always set (defaults to chatml) + llama_chat_template default_template; // always set (defaults to chatml) std::unique_ptr tool_use_template; + + llama_chat_templates( + const std::string & source, + const std::string & bos_token, + const std::string & eos_token) : default_template(source, bos_token, eos_token) {} }; // CPP wrapper for llama_chat_apply_template diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 189290df9..5b5a8f2df 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1748,7 +1748,7 @@ struct server_context { auto templates = llama_chat_templates_from_model(model, ""); GGML_ASSERT(templates.default_template); try { - templates.default_template->apply({{ + templates.default_template.apply({{ {"role", "user"}, {"content", "test"}, }}, json(), true); @@ -3632,7 +3632,6 @@ int main(int argc, char ** argv) { std::lock_guard lock(chat_templates_mutex); if (!chat_templates) { chat_templates = llama_chat_templates_from_model(ctx_server.model, ctx_server.params_base.chat_template); - GGML_ASSERT(chat_templates->default_template); } return *chat_templates; }; @@ -3644,7 +3643,7 @@ int main(int argc, char ** argv) { { "default_generation_settings", ctx_server.default_generation_settings_for_props }, { "total_slots", ctx_server.params_base.n_parallel }, { "model_path", ctx_server.params_base.model }, - { "chat_template", templates.default_template->source() }, + { "chat_template", templates.default_template.source() }, { "build_info", build_info }, }; if (ctx_server.params_base.use_jinja && templates.tool_use_template) { @@ -3871,7 +3870,7 @@ int main(int argc, char ** argv) { auto body = json::parse(req.body); const auto & templates = get_chat_templates(); - const auto & chat_template = body.contains("tools") && templates.tool_use_template ? *templates.tool_use_template : *templates.default_template; + const auto & chat_template = body.contains("tools") && templates.tool_use_template ? *templates.tool_use_template : templates.default_template; json data = oaicompat_completion_params_parse(body, chat_template, params.use_jinja); return handle_completions_impl( @@ -4290,8 +4289,8 @@ int main(int argc, char ** argv) { // print sample chat example to make it clear which template is used LOG_INF("%s: chat template, chat_template: %s, example_format: '%s'\n", __func__, - get_chat_templates().default_template->source().c_str(), - common_chat_format_example(*get_chat_templates().default_template, ctx_server.params_base.use_jinja).c_str()); + get_chat_templates().default_template.source().c_str(), + common_chat_format_example(get_chat_templates().default_template, ctx_server.params_base.use_jinja).c_str()); ctx_server.queue_tasks.on_new_task(std::bind( &server_context::process_single_task, &ctx_server, std::placeholders::_1));