rm unneeded vocab

This commit is contained in:
Olivier Chafik 2025-02-03 19:59:50 +00:00
parent 7dc271fb37
commit c6214ee9d6
4 changed files with 10 additions and 11 deletions

View file

@ -534,7 +534,7 @@ static common_chat_msg common_chat_parse_llama_3_1(const std::string & input, bo
return parse_json_tool_calls(input, std::nullopt, function_regex, close_regex); return parse_json_tool_calls(input, std::nullopt, function_regex, close_regex);
} }
static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_template & tmpl, const struct common_chat_inputs & inputs, const llama_vocab * vocab) { static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
common_chat_params data; common_chat_params data;
data.grammar_lazy = inputs.tool_choice != "required"; data.grammar_lazy = inputs.tool_choice != "required";
data.grammar = build_grammar([&](const common_grammar_builder & builder) { data.grammar = build_grammar([&](const common_grammar_builder & builder) {
@ -904,7 +904,7 @@ static common_chat_params common_chat_params_init_without_tools(const common_cha
return data; return data;
} }
common_chat_params common_chat_params_init(const common_chat_template & tmpl, const struct common_chat_inputs & inputs, const llama_vocab * vocab) { common_chat_params common_chat_params_init(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
auto has_tools = !inputs.tools.is_null() && inputs.tool_choice != "none"; auto has_tools = !inputs.tools.is_null() && inputs.tool_choice != "none";
LOG_DBG("[%s] has_tools=%s\n", __func__, has_tools ? "true" : "false"); LOG_DBG("[%s] has_tools=%s\n", __func__, has_tools ? "true" : "false");
@ -938,7 +938,7 @@ common_chat_params common_chat_params_init(const common_chat_template & tmpl, co
return common_chat_params_init_llama_3_1_tool_calls(tmpl, inputs, allow_python_tag_builtin_tools); return common_chat_params_init_llama_3_1_tool_calls(tmpl, inputs, allow_python_tag_builtin_tools);
} }
if (src.find("<tool▁calls▁begin>") != std::string::npos) { if (src.find("<tool▁calls▁begin>") != std::string::npos) {
return common_chat_params_init_deepseek_r1(tmpl, inputs, vocab); return common_chat_params_init_deepseek_r1(tmpl, inputs);
} }
if (src.find("[TOOL_CALLS]") != std::string::npos) { if (src.find("[TOOL_CALLS]") != std::string::npos) {
return common_chat_params_init_mistral_nemo(tmpl, inputs); return common_chat_params_init_mistral_nemo(tmpl, inputs);

View file

@ -47,6 +47,6 @@ struct common_chat_params {
std::vector<std::string> additional_stops; std::vector<std::string> additional_stops;
}; };
struct common_chat_params common_chat_params_init(const common_chat_template & tmpl, const struct common_chat_inputs & params, const llama_vocab * vocab = nullptr); struct common_chat_params common_chat_params_init(const common_chat_template & tmpl, const struct common_chat_inputs & params);
std::string common_chat_format_name(common_chat_format format); std::string common_chat_format_name(common_chat_format format);
common_chat_msg common_chat_parse( const std::string & input, common_chat_format format); common_chat_msg common_chat_parse( const std::string & input, common_chat_format format);

View file

@ -1911,9 +1911,9 @@ struct server_context {
}}); }});
GGML_ASSERT(templates.template_default); GGML_ASSERT(templates.template_default);
try { try {
common_chat_params_init(*templates.template_default, inputs, vocab); common_chat_params_init(*templates.template_default, inputs);
if (templates.template_tool_use) { if (templates.template_tool_use) {
common_chat_params_init(*templates.template_tool_use, inputs, vocab); common_chat_params_init(*templates.template_tool_use, inputs);
} }
return true; return true;
} catch (const std::exception & e) { } catch (const std::exception & e) {
@ -4052,7 +4052,7 @@ int main(int argc, char ** argv) {
} }
auto body = json::parse(req.body); auto body = json::parse(req.body);
json data = oaicompat_completion_params_parse(body, params.use_jinja, ctx_server.chat_templates, llama_model_get_vocab(ctx_server.model)); json data = oaicompat_completion_params_parse(body, params.use_jinja, ctx_server.chat_templates);
return handle_completions_impl( return handle_completions_impl(
SERVER_TASK_TYPE_COMPLETION, SERVER_TASK_TYPE_COMPLETION,
@ -4065,7 +4065,7 @@ int main(int argc, char ** argv) {
// same with handle_chat_completions, but without inference part // same with handle_chat_completions, but without inference part
const auto handle_apply_template = [&ctx_server, &params, &res_ok](const httplib::Request & req, httplib::Response & res) { const auto handle_apply_template = [&ctx_server, &params, &res_ok](const httplib::Request & req, httplib::Response & res) {
auto body = json::parse(req.body); auto body = json::parse(req.body);
json data = oaicompat_completion_params_parse(body, params.use_jinja, ctx_server.chat_templates, llama_model_get_vocab(ctx_server.model)); json data = oaicompat_completion_params_parse(body, params.use_jinja, ctx_server.chat_templates);
res_ok(res, {{ "prompt", std::move(data.at("prompt")) }}); res_ok(res, {{ "prompt", std::move(data.at("prompt")) }});
}; };

View file

@ -582,8 +582,7 @@ static json oaicompat_completion_params_parse(const json & body) {
static json oaicompat_completion_params_parse( static json oaicompat_completion_params_parse(
const json & body, /* openai api json semantics */ const json & body, /* openai api json semantics */
bool use_jinja, bool use_jinja,
const common_chat_templates & chat_templates, const common_chat_templates & chat_templates)
const llama_vocab * vocab)
{ {
json llama_params; json llama_params;
const auto & tmpl = body.contains("tools") && chat_templates.template_tool_use const auto & tmpl = body.contains("tools") && chat_templates.template_tool_use
@ -649,7 +648,7 @@ static json oaicompat_completion_params_parse(
inputs.stream = stream; inputs.stream = stream;
// TODO: support mixing schema w/ tools beyond generic format. // TODO: support mixing schema w/ tools beyond generic format.
inputs.json_schema = json_value(llama_params, "json_schema", json()); inputs.json_schema = json_value(llama_params, "json_schema", json());
auto chat_params = common_chat_params_init(tmpl, inputs, vocab); auto chat_params = common_chat_params_init(tmpl, inputs);
llama_params["chat_format"] = static_cast<int>(chat_params.format); llama_params["chat_format"] = static_cast<int>(chat_params.format);
llama_params["prompt"] = chat_params.prompt; llama_params["prompt"] = chat_params.prompt;