tool-calls: add deepseek r1 template + accommodate broken official template slightly better
This commit is contained in:
parent
0be7f652e9
commit
7dc271fb37
3 changed files with 102 additions and 22 deletions
|
@ -545,8 +545,17 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
|
|||
auto parameters = function["parameters"];
|
||||
auto args_rule = builder.add_schema(name + "-args", parameters);
|
||||
tool_rules.push_back(builder.add_rule(name + "-call",
|
||||
"\"<|tool▁call▁begin|>function<|tool▁sep|>" + name + "\\n```json\\n\" " + args_rule + " \"```<|tool▁call▁end|>\""));
|
||||
"\"<|tool▁call▁begin|>function<|tool▁sep|>" + name + "\\n"
|
||||
"```json\\n\" " + args_rule + " \"```"
|
||||
"<|tool▁call▁end|>\""));
|
||||
});
|
||||
// Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag,
|
||||
// so we accept common variants (then it's all constrained)
|
||||
builder.add_rule("root",
|
||||
"( \"<|tool▁calls▁begin|>\" | \"<|tool_calls_begin|>\" | \"<|tool calls begin|>\" ) "
|
||||
"(" +string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
|
||||
"\"<|tool▁calls▁end|>\""
|
||||
" space");
|
||||
data.grammar_triggers.push_back({"<|tool▁calls▁begin|>", /* .at_start = */ false});
|
||||
data.grammar_triggers.push_back({"<|tool_calls_begin|>", /* .at_start = */ false});
|
||||
data.grammar_triggers.push_back({"<|tool calls begin|>", /* .at_start = */ false});
|
||||
|
@ -558,27 +567,14 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
|
|||
"<|tool▁call▁begin|>",
|
||||
"<|tool▁call▁end|>",
|
||||
};
|
||||
builder.add_rule("root",
|
||||
// Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag,
|
||||
// so we accept common variants (then it's all constrained)
|
||||
"( \"<|tool▁calls▁begin|>\" | \"<|tool_calls_begin|>\" | \"<|tool calls begin|>\" ) "
|
||||
"(" +string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
|
||||
"\"<|tool▁calls▁end|>\""
|
||||
" space");
|
||||
}, grammar_options);
|
||||
/*
|
||||
Note: we do not feed the thoughts back to the template for a few reasons:
|
||||
- the template doesn't use them explicitly
|
||||
- if content isn't null, tool calls arent rendered
|
||||
- not having the thoughts will locally reset the KV cache (losing the hot tokens of the tool calls) but will save up a lot long term.
|
||||
*/
|
||||
auto prompt = tmpl.apply(inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
||||
std::string suffix = "<|Assistant|>";
|
||||
if (vocab && !llama_vocab_get_add_eos(vocab) &&
|
||||
inputs.add_generation_prompt &&
|
||||
!string_ends_with(prompt, suffix))
|
||||
{
|
||||
// Hack to fix the official prompt, which leaves the chat dangling after tool results.
|
||||
if (string_ends_with(prompt, "<|tool▁outputs▁end|>")) {
|
||||
prompt += "<|end▁of▁sentence|>";
|
||||
if (inputs.add_generation_prompt) {
|
||||
prompt += "<|Assistant|>";
|
||||
}
|
||||
}
|
||||
data.prompt = prompt;
|
||||
data.format = COMMON_CHAT_FORMAT_DEEPSEEK_R1;
|
||||
|
@ -588,14 +584,14 @@ static common_chat_msg common_chat_parse_deepseek_r1(const std::string & input)
|
|||
static std::regex trigger_regex("<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>");
|
||||
static std::regex function_regex("<|tool▁call▁begin|>function<|tool▁sep|>([^\n]+)\n```json\n");
|
||||
static std::regex close_regex("```<|tool▁call▁end|>");
|
||||
static std::regex think_regex(R"(<think>([\s\S\n]*)</think>([\s\S\r\n]*))");
|
||||
static std::regex think_regex(R"(<think>([\s\S\n]*)(</think>)?([\s\S\r\n]*))");
|
||||
auto msg = parse_json_tool_calls(input, trigger_regex, function_regex, close_regex);
|
||||
std::smatch match;
|
||||
if (std::regex_match(msg.content, match, think_regex)) {
|
||||
msg.thoughts = string_trim(match[1].str());
|
||||
msg.content = string_trim(match[2].str());
|
||||
}
|
||||
if (msg.content == "<|tool▁calls▁end|>") {
|
||||
if (string_trim(msg.content) == "<|tool▁calls▁end|>") {
|
||||
msg.content = "";
|
||||
}
|
||||
return msg;
|
||||
|
|
|
@ -1202,11 +1202,19 @@ curl http://localhost:8080/v1/chat/completions \
|
|||
|
||||
```shell
|
||||
# Native support:
|
||||
|
||||
llama-server --jinja -fa -hf bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M
|
||||
llama-server --jinja -fa -hf bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q6_K_L
|
||||
llama-server --jinja -fa -hf bartowski/functionary-small-v3.2-GGUF:Q4_K_M
|
||||
llama-server --jinja -fa -hf bartowski/Llama-3.3-70B-Instruct-GGUF:Q4_K_M
|
||||
llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q4_K_M
|
||||
|
||||
# Native support for DeepSeek R1 works best w/ our own template (official template buggy)
|
||||
|
||||
llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q6_K_L \
|
||||
--chat-template-file models/templates/llama-cpp-deepseek-r1.jinja
|
||||
|
||||
llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q4_K_M \
|
||||
--chat-template-file models/templates/llama-cpp-deepseek-r1.jinja
|
||||
|
||||
# Native support requires the right template for these GGUFs:
|
||||
|
||||
|
|
76
models/templates/llama-cpp-deepseek-r1.jinja
Normal file
76
models/templates/llama-cpp-deepseek-r1.jinja
Normal file
|
@ -0,0 +1,76 @@
|
|||
{%- if not add_generation_prompt is defined -%}
|
||||
{%- set add_generation_prompt = false -%}
|
||||
{%- endif -%}
|
||||
{%- set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') -%}
|
||||
{%- for message in messages -%}
|
||||
{%- if message['role'] == 'system' -%}
|
||||
{%- set ns.system_prompt = message['content'] -%}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
{{bos_token}}
|
||||
{%- if tools %}
|
||||
You can call any of the following function tools to satisfy the user's requests: {{tools | map(attribute='function') | tojson(indent=4)}}
|
||||
|
||||
Example function tool call syntax:
|
||||
|
||||
<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>example_function_name
|
||||
```json
|
||||
{
|
||||
"arg1": "some_value"
|
||||
...
|
||||
}
|
||||
```
|
||||
<|tool▁call▁end|><|tool▁calls▁end|>
|
||||
|
||||
{% endif -%}
|
||||
{{ns.system_prompt}}
|
||||
{%- macro flush_tool_outputs() -%}
|
||||
{%- if ns.is_tool -%}
|
||||
{{- '<|tool▁outputs▁end|><|end▁of▁sentence|>' -}}
|
||||
{%- set ns.is_tool = false -%}
|
||||
{%- endif -%}
|
||||
{%- endmacro -%}
|
||||
{{- flush_tool_outputs() -}}
|
||||
{%- for message in messages -%}
|
||||
{%- if message['role'] != 'tool' -%}
|
||||
{{- flush_tool_outputs() -}}
|
||||
{%- endif -%}
|
||||
{%- if message['role'] == 'user' -%}
|
||||
{#- {{- '<|User|>' + message['content']}} #}
|
||||
{{- '<|User|>' + content + '<|end▁of▁sentence|>'}}
|
||||
{%- endif -%}
|
||||
{%- if message['role'] == 'assistant' and message['content'] is none -%}
|
||||
{{- '<|Assistant|><|tool▁calls▁begin|>'}}
|
||||
{%- for tc in message['tool_calls']%}
|
||||
{%- if ns.is_first -%}
|
||||
{%- set ns.is_first = false -%}
|
||||
{%- else -%}
|
||||
{{- '\n' -}}
|
||||
{%- endif -%}
|
||||
{%- set tool_name = tc['function']['name'] -%}
|
||||
{%- set tool_args = tc['function']['arguments'] -%}
|
||||
{{- '<|tool▁call▁begin|>' + tc['type'] + '<|tool▁sep|>' + tool_name + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}
|
||||
{%- endfor -%}
|
||||
{{- '<|tool▁calls▁end|><|end▁of▁sentence|>'}}
|
||||
{%- endif -%}
|
||||
{%- if message['role'] == 'assistant' and message['content'] is not none -%}
|
||||
{{- flush_tool_outputs() -}}
|
||||
{%- set content = message['content'] -%}
|
||||
{%- if '</think>' in content -%}
|
||||
{%- set content = content.split('</think>')[-1] -%}
|
||||
{%- endif -%}
|
||||
{{- '<|Assistant|>' + content + '<|end▁of▁sentence|>'}}
|
||||
{%- endif -%}
|
||||
{%- if message['role'] == 'tool' -%}
|
||||
{%- set ns.is_tool = true -%}
|
||||
{%- if ns.is_output_first -%}
|
||||
{{- '<|tool▁outputs▁begin|>' -}}
|
||||
{%- set ns.is_output_first = false -%}
|
||||
{%- endif -%}
|
||||
{{- '\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
{{- flush_tool_outputs() -}}
|
||||
{%- if add_generation_prompt and not ns.is_tool -%}
|
||||
{{- '<|Assistant|>' -}}
|
||||
{%- endif -%}
|
Loading…
Add table
Add a link
Reference in a new issue