diff --git a/common/chat.cpp b/common/chat.cpp index 66bbfe993..c33a3c991 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -534,7 +534,7 @@ static common_chat_msg common_chat_parse_llama_3_1(const std::string & input, bo return parse_json_tool_calls(input, std::nullopt, function_regex, close_regex); } -static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_template & tmpl, const struct common_chat_inputs & inputs, const llama_vocab * vocab) { +static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) { common_chat_params data; data.grammar_lazy = inputs.tool_choice != "required"; data.grammar = build_grammar([&](const common_grammar_builder & builder) { @@ -904,7 +904,7 @@ static common_chat_params common_chat_params_init_without_tools(const common_cha return data; } -common_chat_params common_chat_params_init(const common_chat_template & tmpl, const struct common_chat_inputs & inputs, const llama_vocab * vocab) { +common_chat_params common_chat_params_init(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) { auto has_tools = !inputs.tools.is_null() && inputs.tool_choice != "none"; LOG_DBG("[%s] has_tools=%s\n", __func__, has_tools ? "true" : "false"); @@ -938,7 +938,7 @@ common_chat_params common_chat_params_init(const common_chat_template & tmpl, co return common_chat_params_init_llama_3_1_tool_calls(tmpl, inputs, allow_python_tag_builtin_tools); } if (src.find("<|tool▁calls▁begin|>") != std::string::npos) { - return common_chat_params_init_deepseek_r1(tmpl, inputs, vocab); + return common_chat_params_init_deepseek_r1(tmpl, inputs); } if (src.find("[TOOL_CALLS]") != std::string::npos) { return common_chat_params_init_mistral_nemo(tmpl, inputs); diff --git a/common/chat.hpp b/common/chat.hpp index b34d4dab2..33e64a430 100644 --- a/common/chat.hpp +++ b/common/chat.hpp @@ -47,6 +47,6 @@ struct common_chat_params { std::vector additional_stops; }; -struct common_chat_params common_chat_params_init(const common_chat_template & tmpl, const struct common_chat_inputs & params, const llama_vocab * vocab = nullptr); +struct common_chat_params common_chat_params_init(const common_chat_template & tmpl, const struct common_chat_inputs & params); std::string common_chat_format_name(common_chat_format format); common_chat_msg common_chat_parse( const std::string & input, common_chat_format format); diff --git a/examples/server/server.cpp b/examples/server/server.cpp index f5452b90b..5e440eb0c 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1911,9 +1911,9 @@ struct server_context { }}); GGML_ASSERT(templates.template_default); try { - common_chat_params_init(*templates.template_default, inputs, vocab); + common_chat_params_init(*templates.template_default, inputs); if (templates.template_tool_use) { - common_chat_params_init(*templates.template_tool_use, inputs, vocab); + common_chat_params_init(*templates.template_tool_use, inputs); } return true; } catch (const std::exception & e) { @@ -4052,7 +4052,7 @@ int main(int argc, char ** argv) { } auto body = json::parse(req.body); - json data = oaicompat_completion_params_parse(body, params.use_jinja, ctx_server.chat_templates, llama_model_get_vocab(ctx_server.model)); + json data = oaicompat_completion_params_parse(body, params.use_jinja, ctx_server.chat_templates); return handle_completions_impl( SERVER_TASK_TYPE_COMPLETION, @@ -4065,7 +4065,7 @@ int main(int argc, char ** argv) { // same with handle_chat_completions, but without inference part const auto handle_apply_template = [&ctx_server, ¶ms, &res_ok](const httplib::Request & req, httplib::Response & res) { auto body = json::parse(req.body); - json data = oaicompat_completion_params_parse(body, params.use_jinja, ctx_server.chat_templates, llama_model_get_vocab(ctx_server.model)); + json data = oaicompat_completion_params_parse(body, params.use_jinja, ctx_server.chat_templates); res_ok(res, {{ "prompt", std::move(data.at("prompt")) }}); }; diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index c2779d194..fefdce55b 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -582,8 +582,7 @@ static json oaicompat_completion_params_parse(const json & body) { static json oaicompat_completion_params_parse( const json & body, /* openai api json semantics */ bool use_jinja, - const common_chat_templates & chat_templates, - const llama_vocab * vocab) + const common_chat_templates & chat_templates) { json llama_params; const auto & tmpl = body.contains("tools") && chat_templates.template_tool_use @@ -649,7 +648,7 @@ static json oaicompat_completion_params_parse( inputs.stream = stream; // TODO: support mixing schema w/ tools beyond generic format. inputs.json_schema = json_value(llama_params, "json_schema", json()); - auto chat_params = common_chat_params_init(tmpl, inputs, vocab); + auto chat_params = common_chat_params_init(tmpl, inputs); llama_params["chat_format"] = static_cast(chat_params.format); llama_params["prompt"] = chat_params.prompt;