From c9e7cbb08b70b73729f5e73ec27064ccb51b271c Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <son@huggingface.co>
Date: Mon, 20 Jan 2025 16:58:29 +0100
Subject: [PATCH] safer jinja `llama_chat_templates` struct

---
 common/common.cpp          | 13 ++++++-------
 common/common.h            |  7 ++++++-
 examples/server/server.cpp | 11 +++++------
 3 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/common/common.cpp b/common/common.cpp
index 9c535a176..05826c974 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -1855,13 +1855,12 @@ llama_chat_templates llama_chat_templates_from_model(const struct llama_model *
             )";
         }
     }
-    return {
-        has_explicit_template,
-        std::make_unique<minja::chat_template>(default_template_src, bos_token, eos_token),
-        tool_use_template_src.empty()
-            ? nullptr
-            : std::make_unique<minja::chat_template>(tool_use_template_src, bos_token, eos_token)
-    };
+    llama_chat_templates ret(default_template_src, bos_token, eos_token);
+    ret.has_explicit_template = has_explicit_template;
+    ret.tool_use_template.reset(tool_use_template_src.empty()
+        ? nullptr
+        : new minja::chat_template(tool_use_template_src, bos_token, eos_token));
+    return ret;
 }
 
 //
diff --git a/common/common.h b/common/common.h
index a96a99531..5e80ccb43 100644
--- a/common/common.h
+++ b/common/common.h
@@ -607,8 +607,13 @@ typedef minja::chat_template llama_chat_template;
 
 struct llama_chat_templates {
     bool has_explicit_template; // Model had builtin template or template overridde was specified.
-    std::unique_ptr<llama_chat_template> default_template; // always set (defaults to chatml)
+    llama_chat_template default_template; // always set (defaults to chatml)
     std::unique_ptr<llama_chat_template> tool_use_template;
+
+    llama_chat_templates(
+            const std::string & source,
+            const std::string & bos_token,
+            const std::string & eos_token) : default_template(source, bos_token, eos_token) {}
 };
 
 // CPP wrapper for llama_chat_apply_template
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 189290df9..5b5a8f2df 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -1748,7 +1748,7 @@ struct server_context {
             auto templates = llama_chat_templates_from_model(model, "");
             GGML_ASSERT(templates.default_template);
             try {
-                templates.default_template->apply({{
+                templates.default_template.apply({{
                     {"role", "user"},
                     {"content", "test"},
                 }}, json(), true);
@@ -3632,7 +3632,6 @@ int main(int argc, char ** argv) {
         std::lock_guard<std::mutex> lock(chat_templates_mutex);
         if (!chat_templates) {
             chat_templates = llama_chat_templates_from_model(ctx_server.model, ctx_server.params_base.chat_template);
-            GGML_ASSERT(chat_templates->default_template);
         }
         return *chat_templates;
     };
@@ -3644,7 +3643,7 @@ int main(int argc, char ** argv) {
             { "default_generation_settings", ctx_server.default_generation_settings_for_props },
             { "total_slots",                 ctx_server.params_base.n_parallel },
             { "model_path",                  ctx_server.params_base.model },
-            { "chat_template",               templates.default_template->source() },
+            { "chat_template",               templates.default_template.source() },
             { "build_info",                  build_info },
         };
         if (ctx_server.params_base.use_jinja && templates.tool_use_template) {
@@ -3871,7 +3870,7 @@ int main(int argc, char ** argv) {
 
         auto body = json::parse(req.body);
         const auto & templates = get_chat_templates();
-        const auto & chat_template = body.contains("tools") && templates.tool_use_template ? *templates.tool_use_template : *templates.default_template;
+        const auto & chat_template = body.contains("tools") && templates.tool_use_template ? *templates.tool_use_template : templates.default_template;
         json data = oaicompat_completion_params_parse(body, chat_template, params.use_jinja);
 
         return handle_completions_impl(
@@ -4290,8 +4289,8 @@ int main(int argc, char ** argv) {
 
     // print sample chat example to make it clear which template is used
     LOG_INF("%s: chat template, chat_template: %s, example_format: '%s'\n", __func__,
-        get_chat_templates().default_template->source().c_str(),
-        common_chat_format_example(*get_chat_templates().default_template, ctx_server.params_base.use_jinja).c_str());
+        get_chat_templates().default_template.source().c_str(),
+        common_chat_format_example(get_chat_templates().default_template, ctx_server.params_base.use_jinja).c_str());
 
     ctx_server.queue_tasks.on_new_task(std::bind(
                 &server_context::process_single_task, &ctx_server, std::placeholders::_1));