tool-calls: add deepseek r1 template + accommodate broken official template slightly better

This commit is contained in:
Olivier Chafik 2025-02-03 19:59:33 +00:00
parent 0be7f652e9
commit 7dc271fb37
3 changed files with 102 additions and 22 deletions

View file

@ -545,8 +545,17 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
auto parameters = function["parameters"]; auto parameters = function["parameters"];
auto args_rule = builder.add_schema(name + "-args", parameters); auto args_rule = builder.add_schema(name + "-args", parameters);
tool_rules.push_back(builder.add_rule(name + "-call", tool_rules.push_back(builder.add_rule(name + "-call",
"\"<tool▁call▁begin>function<tool▁sep>" + name + "\\n```json\\n\" " + args_rule + " \"```<tool▁call▁end>\"")); "\"<tool▁call▁begin>function<tool▁sep>" + name + "\\n"
"```json\\n\" " + args_rule + " \"```"
"<tool▁call▁end>\""));
}); });
// Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag,
// so we accept common variants (then it's all constrained)
builder.add_rule("root",
"( \"<tool▁calls▁begin>\" | \"<tool_calls_begin>\" | \"<tool calls begin>\" ) "
"(" +string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
"\"<tool▁calls▁end>\""
" space");
data.grammar_triggers.push_back({"<tool▁calls▁begin>", /* .at_start = */ false}); data.grammar_triggers.push_back({"<tool▁calls▁begin>", /* .at_start = */ false});
data.grammar_triggers.push_back({"<tool_calls_begin>", /* .at_start = */ false}); data.grammar_triggers.push_back({"<tool_calls_begin>", /* .at_start = */ false});
data.grammar_triggers.push_back({"<tool calls begin>", /* .at_start = */ false}); data.grammar_triggers.push_back({"<tool calls begin>", /* .at_start = */ false});
@ -558,27 +567,14 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
"<tool▁call▁begin>", "<tool▁call▁begin>",
"<tool▁call▁end>", "<tool▁call▁end>",
}; };
builder.add_rule("root",
// Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag,
// so we accept common variants (then it's all constrained)
"( \"<tool▁calls▁begin>\" | \"<tool_calls_begin>\" | \"<tool calls begin>\" ) "
"(" +string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
"\"<tool▁calls▁end>\""
" space");
}, grammar_options); }, grammar_options);
/*
Note: we do not feed the thoughts back to the template for a few reasons:
- the template doesn't use them explicitly
- if content isn't null, tool calls arent rendered
- not having the thoughts will locally reset the KV cache (losing the hot tokens of the tool calls) but will save up a lot long term.
*/
auto prompt = tmpl.apply(inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); auto prompt = tmpl.apply(inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
std::string suffix = "<Assistant>"; // Hack to fix the official prompt, which leaves the chat dangling after tool results.
if (vocab && !llama_vocab_get_add_eos(vocab) && if (string_ends_with(prompt, "<tool▁outputs▁end>")) {
inputs.add_generation_prompt &&
!string_ends_with(prompt, suffix))
{
prompt += "<end▁of▁sentence>"; prompt += "<end▁of▁sentence>";
if (inputs.add_generation_prompt) {
prompt += "<Assistant>";
}
} }
data.prompt = prompt; data.prompt = prompt;
data.format = COMMON_CHAT_FORMAT_DEEPSEEK_R1; data.format = COMMON_CHAT_FORMAT_DEEPSEEK_R1;
@ -588,14 +584,14 @@ static common_chat_msg common_chat_parse_deepseek_r1(const std::string & input)
static std::regex trigger_regex("<tool▁calls▁begin>|<tool_calls_begin>|<tool calls begin>"); static std::regex trigger_regex("<tool▁calls▁begin>|<tool_calls_begin>|<tool calls begin>");
static std::regex function_regex("<tool▁call▁begin>function<tool▁sep>([^\n]+)\n```json\n"); static std::regex function_regex("<tool▁call▁begin>function<tool▁sep>([^\n]+)\n```json\n");
static std::regex close_regex("```<tool▁call▁end>"); static std::regex close_regex("```<tool▁call▁end>");
static std::regex think_regex(R"(<think>([\s\S\n]*)</think>([\s\S\r\n]*))"); static std::regex think_regex(R"(<think>([\s\S\n]*)(</think>)?([\s\S\r\n]*))");
auto msg = parse_json_tool_calls(input, trigger_regex, function_regex, close_regex); auto msg = parse_json_tool_calls(input, trigger_regex, function_regex, close_regex);
std::smatch match; std::smatch match;
if (std::regex_match(msg.content, match, think_regex)) { if (std::regex_match(msg.content, match, think_regex)) {
msg.thoughts = string_trim(match[1].str()); msg.thoughts = string_trim(match[1].str());
msg.content = string_trim(match[2].str()); msg.content = string_trim(match[2].str());
} }
if (msg.content == "<tool▁calls▁end>") { if (string_trim(msg.content) == "<tool▁calls▁end>") {
msg.content = ""; msg.content = "";
} }
return msg; return msg;

View file

@ -1202,11 +1202,19 @@ curl http://localhost:8080/v1/chat/completions \
```shell ```shell
# Native support: # Native support:
llama-server --jinja -fa -hf bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M llama-server --jinja -fa -hf bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M
llama-server --jinja -fa -hf bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q6_K_L llama-server --jinja -fa -hf bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q6_K_L
llama-server --jinja -fa -hf bartowski/functionary-small-v3.2-GGUF:Q4_K_M llama-server --jinja -fa -hf bartowski/functionary-small-v3.2-GGUF:Q4_K_M
llama-server --jinja -fa -hf bartowski/Llama-3.3-70B-Instruct-GGUF:Q4_K_M llama-server --jinja -fa -hf bartowski/Llama-3.3-70B-Instruct-GGUF:Q4_K_M
llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q4_K_M
# Native support for DeepSeek R1 works best w/ our own template (official template buggy)
llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q6_K_L \
--chat-template-file models/templates/llama-cpp-deepseek-r1.jinja
llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q4_K_M \
--chat-template-file models/templates/llama-cpp-deepseek-r1.jinja
# Native support requires the right template for these GGUFs: # Native support requires the right template for these GGUFs:

View file

@ -0,0 +1,76 @@
{%- if not add_generation_prompt is defined -%}
{%- set add_generation_prompt = false -%}
{%- endif -%}
{%- set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{%- set ns.system_prompt = message['content'] -%}
{%- endif -%}
{%- endfor -%}
{{bos_token}}
{%- if tools %}
You can call any of the following function tools to satisfy the user's requests: {{tools | map(attribute='function') | tojson(indent=4)}}
Example function tool call syntax:
<tool▁calls▁begin><tool▁call▁begin>function<tool▁sep>example_function_name
```json
{
"arg1": "some_value"
...
}
```
<tool▁call▁end><tool▁calls▁end>
{% endif -%}
{{ns.system_prompt}}
{%- macro flush_tool_outputs() -%}
{%- if ns.is_tool -%}
{{- '<tool▁outputs▁end><end▁of▁sentence>' -}}
{%- set ns.is_tool = false -%}
{%- endif -%}
{%- endmacro -%}
{{- flush_tool_outputs() -}}
{%- for message in messages -%}
{%- if message['role'] != 'tool' -%}
{{- flush_tool_outputs() -}}
{%- endif -%}
{%- if message['role'] == 'user' -%}
{#- {{- '<User>' + message['content']}} #}
{{- '<User>' + content + '<end▁of▁sentence>'}}
{%- endif -%}
{%- if message['role'] == 'assistant' and message['content'] is none -%}
{{- '<Assistant><tool▁calls▁begin>'}}
{%- for tc in message['tool_calls']%}
{%- if ns.is_first -%}
{%- set ns.is_first = false -%}
{%- else -%}
{{- '\n' -}}
{%- endif -%}
{%- set tool_name = tc['function']['name'] -%}
{%- set tool_args = tc['function']['arguments'] -%}
{{- '<tool▁call▁begin>' + tc['type'] + '<tool▁sep>' + tool_name + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<tool▁call▁end>'}}
{%- endfor -%}
{{- '<tool▁calls▁end><end▁of▁sentence>'}}
{%- endif -%}
{%- if message['role'] == 'assistant' and message['content'] is not none -%}
{{- flush_tool_outputs() -}}
{%- set content = message['content'] -%}
{%- if '</think>' in content -%}
{%- set content = content.split('</think>')[-1] -%}
{%- endif -%}
{{- '<Assistant>' + content + '<end▁of▁sentence>'}}
{%- endif -%}
{%- if message['role'] == 'tool' -%}
{%- set ns.is_tool = true -%}
{%- if ns.is_output_first -%}
{{- '<tool▁outputs▁begin>' -}}
{%- set ns.is_output_first = false -%}
{%- endif -%}
{{- '\n<tool▁output▁begin>' + message['content'] + '<tool▁output▁end>'}}
{%- endif -%}
{%- endfor -%}
{{- flush_tool_outputs() -}}
{%- if add_generation_prompt and not ns.is_tool -%}
{{- '<Assistant>' -}}
{%- endif -%}