From c88f4a798d15c5d46525108b80f2efaaa2a2ea58 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Thu, 30 Jan 2025 12:00:54 +0100 Subject: [PATCH] simplify handle_apply_template --- examples/server/server.cpp | 18 ++++++++---------- examples/server/tests/unit/test_tool_call.py | 2 +- examples/server/utils.hpp | 7 +++++-- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index ab548d541..754710c68 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -4016,8 +4016,7 @@ int main(int argc, char ** argv) { } auto body = json::parse(req.body); - const auto & chat_template = body.contains("tools") && ctx_server.chat_templates.template_tool_use ? *ctx_server.chat_templates.template_tool_use : *ctx_server.chat_templates.template_default; - json data = oaicompat_completion_params_parse(body, chat_template, params.use_jinja); + json data = oaicompat_completion_params_parse(body, params.use_jinja, ctx_server.chat_templates); return handle_completions_impl( SERVER_TASK_TYPE_COMPLETION, @@ -4027,6 +4026,13 @@ int main(int argc, char ** argv) { OAICOMPAT_TYPE_CHAT); }; + // same with handle_chat_completions, but without inference part + const auto handle_apply_template = [&ctx_server, ¶ms, &res_ok](const httplib::Request & req, httplib::Response & res) { + auto body = json::parse(req.body); + json data = oaicompat_completion_params_parse(body, params.use_jinja, ctx_server.chat_templates); + res_ok(res, {{ "prompt", std::move(data.at("prompt")) }}); + }; + const auto handle_models = [¶ms, &ctx_server, &res_ok](const httplib::Request &, httplib::Response & res) { json models = { {"object", "list"}, @@ -4185,14 +4191,6 @@ int main(int argc, char ** argv) { res_ok(res, root); }; - const auto handle_apply_template = [&ctx_server, ¶ms, &res_ok](const httplib::Request & req, httplib::Response & res) { - auto body = json::parse(req.body); - const auto & chat_template = body.contains("tools") && ctx_server.chat_templates.template_tool_use ? *ctx_server.chat_templates.template_tool_use : *ctx_server.chat_templates.template_default; - json data = oaicompat_completion_params_parse(body, chat_template, params.use_jinja); - - res_ok(res, {{ "prompt", data.at("prompt") }}); - }; - const auto handle_embeddings = [&handle_embeddings_impl](const httplib::Request & req, httplib::Response & res) { handle_embeddings_impl(req, res, OAICOMPAT_TYPE_NONE); }; diff --git a/examples/server/tests/unit/test_tool_call.py b/examples/server/tests/unit/test_tool_call.py index bb25c6435..b72d92cbd 100644 --- a/examples/server/tests/unit/test_tool_call.py +++ b/examples/server/tests/unit/test_tool_call.py @@ -71,7 +71,7 @@ def do_test_completion_with_required_tool_tiny(template_name: str, tool: dict, a server.jinja = True server.n_predict = n_predict server.chat_template_file = f'../../../models/templates/{template_name}.jinja' - server.start() + server.start(timeout_seconds=TIMEOUT_SERVER_START) res = server.make_request("POST", "/chat/completions", data={ "max_tokens": n_predict, "messages": [ diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index 3c23bbeff..3d2c04666 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -580,10 +580,13 @@ static json oaicompat_completion_params_parse(const json & body) { static json oaicompat_completion_params_parse( const json & body, /* openai api json semantics */ - const common_chat_template & tmpl, - bool use_jinja) + bool use_jinja, + const common_chat_templates & chat_templates) { json llama_params; + const auto & tmpl = body.contains("tools") && chat_templates.template_tool_use + ? *chat_templates.template_tool_use + : *chat_templates.template_default; auto tools = json_value(body, "tools", json()); auto stream = json_value(body, "stream", false);