From c88f4a798d15c5d46525108b80f2efaaa2a2ea58 Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <son@huggingface.co>
Date: Thu, 30 Jan 2025 12:00:54 +0100
Subject: [PATCH] simplify handle_apply_template

---
 examples/server/server.cpp                   | 18 ++++++++----------
 examples/server/tests/unit/test_tool_call.py |  2 +-
 examples/server/utils.hpp                    |  7 +++++--
 3 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index ab548d541..754710c68 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -4016,8 +4016,7 @@ int main(int argc, char ** argv) {
         }
 
         auto body = json::parse(req.body);
-        const auto & chat_template = body.contains("tools") && ctx_server.chat_templates.template_tool_use ? *ctx_server.chat_templates.template_tool_use : *ctx_server.chat_templates.template_default;
-        json data = oaicompat_completion_params_parse(body, chat_template, params.use_jinja);
+        json data = oaicompat_completion_params_parse(body, params.use_jinja, ctx_server.chat_templates);
 
         return handle_completions_impl(
             SERVER_TASK_TYPE_COMPLETION,
@@ -4027,6 +4026,13 @@ int main(int argc, char ** argv) {
             OAICOMPAT_TYPE_CHAT);
     };
 
+    // same with handle_chat_completions, but without inference part
+    const auto handle_apply_template = [&ctx_server, &params, &res_ok](const httplib::Request & req, httplib::Response & res) {
+        auto body = json::parse(req.body);
+        json data = oaicompat_completion_params_parse(body, params.use_jinja, ctx_server.chat_templates);
+        res_ok(res, {{ "prompt", std::move(data.at("prompt")) }});
+    };
+
     const auto handle_models = [&params, &ctx_server, &res_ok](const httplib::Request &, httplib::Response & res) {
         json models = {
             {"object", "list"},
@@ -4185,14 +4191,6 @@ int main(int argc, char ** argv) {
         res_ok(res, root);
     };
 
-    const auto handle_apply_template = [&ctx_server, &params, &res_ok](const httplib::Request & req, httplib::Response & res) {
-        auto body = json::parse(req.body);
-        const auto & chat_template = body.contains("tools") && ctx_server.chat_templates.template_tool_use ? *ctx_server.chat_templates.template_tool_use : *ctx_server.chat_templates.template_default;
-        json data = oaicompat_completion_params_parse(body, chat_template, params.use_jinja);
-
-        res_ok(res, {{ "prompt", data.at("prompt") }});
-    };
-
     const auto handle_embeddings = [&handle_embeddings_impl](const httplib::Request & req, httplib::Response & res) {
         handle_embeddings_impl(req, res, OAICOMPAT_TYPE_NONE);
     };
diff --git a/examples/server/tests/unit/test_tool_call.py b/examples/server/tests/unit/test_tool_call.py
index bb25c6435..b72d92cbd 100644
--- a/examples/server/tests/unit/test_tool_call.py
+++ b/examples/server/tests/unit/test_tool_call.py
@@ -71,7 +71,7 @@ def do_test_completion_with_required_tool_tiny(template_name: str, tool: dict, a
     server.jinja = True
     server.n_predict = n_predict
     server.chat_template_file = f'../../../models/templates/{template_name}.jinja'
-    server.start()
+    server.start(timeout_seconds=TIMEOUT_SERVER_START)
     res = server.make_request("POST", "/chat/completions", data={
         "max_tokens": n_predict,
         "messages": [
diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
index 3c23bbeff..3d2c04666 100644
--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@@ -580,10 +580,13 @@ static json oaicompat_completion_params_parse(const json & body) {
 
 static json oaicompat_completion_params_parse(
     const json & body, /* openai api json semantics */
-    const common_chat_template & tmpl,
-    bool use_jinja)
+    bool use_jinja,
+    const common_chat_templates & chat_templates)
 {
     json llama_params;
+    const auto & tmpl = body.contains("tools") && chat_templates.template_tool_use
+        ? *chat_templates.template_tool_use
+        : *chat_templates.template_default;
 
     auto tools = json_value(body, "tools", json());
     auto stream = json_value(body, "stream", false);