tool-calls: add deepseek r1 template + accommodate broken official template slightly better

2025-02-03 19:59:33 +00:00 · 2025-02-03 19:59:33 +00:00 · 7dc271fb37
commit 7dc271fb37
parent 0be7f652e9
3 changed files with 102 additions and 22 deletions
--- a/common/chat.cpp
+++ b/common/chat.cpp
@ -545,8 +545,17 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
            auto parameters = function["parameters"];
            auto args_rule = builder.add_schema(name + "-args", parameters);
            tool_rules.push_back(builder.add_rule(name + "-call",
-                "\"<｜tool▁call▁begin｜>function<｜tool▁sep｜>" + name + "\\n```json\\n\" " + args_rule + " \"```<｜tool▁call▁end｜>\""));
+                "\"<｜tool▁call▁begin｜>function<｜tool▁sep｜>" + name + "\\n"
                "```json\\n\" " + args_rule + " \"```"
                "<｜tool▁call▁end｜>\""));
        });
        // Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag,
        // so we accept common variants (then it's all constrained)
        builder.add_rule("root",
            "( \"<｜tool▁calls▁begin｜>\" | \"<｜tool_calls_begin｜>\" | \"<｜tool calls begin｜>\" ) "
            "(" +string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
            "\"<｜tool▁calls▁end｜>\""
            " space");
        data.grammar_triggers.push_back({"<｜tool▁calls▁begin｜>", /* .at_start = */ false});
        data.grammar_triggers.push_back({"<｜tool_calls_begin｜>", /* .at_start = */ false});
        data.grammar_triggers.push_back({"<｜tool calls begin｜>", /* .at_start = */ false});
@ -558,27 +567,14 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
            "<｜tool▁call▁begin｜>",
            "<｜tool▁call▁end｜>",
        };
        builder.add_rule("root",
            // Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag,
            // so we accept common variants (then it's all constrained)
            "( \"<｜tool▁calls▁begin｜>\" | \"<｜tool_calls_begin｜>\" | \"<｜tool calls begin｜>\" ) "
            "(" +string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
            "\"<｜tool▁calls▁end｜>\""
            " space");
    }, grammar_options);
    /*
        Note: we do not feed the thoughts back to the template for a few reasons:
        - the template doesn't use them explicitly
        - if content isn't null, tool calls arent rendered
        - not having the thoughts will locally reset the KV cache (losing the hot tokens of the tool calls) but will save up a lot long term.
    */
    auto prompt = tmpl.apply(inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
-    std::string suffix = "<｜Assistant｜>";
+    // Hack to fix the official prompt, which leaves the chat dangling after tool results.
-    if (vocab && !llama_vocab_get_add_eos(vocab) &&
+    if (string_ends_with(prompt, "<｜tool▁outputs▁end｜>")) {
        inputs.add_generation_prompt &&
        !string_ends_with(prompt, suffix))
    {
        prompt += "<｜end▁of▁sentence｜>";
        if (inputs.add_generation_prompt) {
            prompt += "<｜Assistant｜>";
        }
    }
    data.prompt = prompt;
    data.format = COMMON_CHAT_FORMAT_DEEPSEEK_R1;
@ -588,14 +584,14 @@ static common_chat_msg common_chat_parse_deepseek_r1(const std::string & input)
    static std::regex trigger_regex("<｜tool▁calls▁begin｜>|<｜tool_calls_begin｜>|<｜tool calls begin｜>");
    static std::regex function_regex("<｜tool▁call▁begin｜>function<｜tool▁sep｜>([^\n]+)\n```json\n");
    static std::regex close_regex("```<｜tool▁call▁end｜>");
-    static std::regex think_regex(R"(<think>([\s\S\n]*)</think>([\s\S\r\n]*))");
+    static std::regex think_regex(R"(<think>([\s\S\n]*)(</think>)?([\s\S\r\n]*))");
    auto msg = parse_json_tool_calls(input, trigger_regex, function_regex, close_regex);
    std::smatch match;
    if (std::regex_match(msg.content, match, think_regex)) {
        msg.thoughts = string_trim(match[1].str());
        msg.content = string_trim(match[2].str());
    }
-    if (msg.content == "<｜tool▁calls▁end｜>") {
+    if (string_trim(msg.content) == "<｜tool▁calls▁end｜>") {
        msg.content = "";
    }
    return msg;
--- a/examples/server/README.md
+++ b/examples/server/README.md
@ -1202,11 +1202,19 @@ curl http://localhost:8080/v1/chat/completions \
  ```shell
  # Native support:
  llama-server --jinja -fa -hf bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M
  llama-server --jinja -fa -hf bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q6_K_L
  llama-server --jinja -fa -hf bartowski/functionary-small-v3.2-GGUF:Q4_K_M
  llama-server --jinja -fa -hf bartowski/Llama-3.3-70B-Instruct-GGUF:Q4_K_M
-  llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q4_K_M
+
  # Native support for DeepSeek R1 works best w/ our own template (official template buggy)
  llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q6_K_L \
    --chat-template-file models/templates/llama-cpp-deepseek-r1.jinja
  llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q4_K_M \
    --chat-template-file models/templates/llama-cpp-deepseek-r1.jinja
  # Native support requires the right template for these GGUFs:
--- a/models/templates/llama-cpp-deepseek-r1.jinja
+++ b/models/templates/llama-cpp-deepseek-r1.jinja
@ -0,0 +1,76 @@
 {%- if not add_generation_prompt is defined -%}
    {%- set add_generation_prompt = false -%}
 {%- endif -%}
 {%- set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') -%}
 {%- for message in messages -%}
    {%- if message['role'] == 'system' -%}
        {%- set ns.system_prompt = message['content'] -%}
    {%- endif -%}
 {%- endfor -%}
 {{bos_token}}
 {%- if tools %}
 You can call any of the following function tools to satisfy the user's requests: {{tools | map(attribute='function') | tojson(indent=4)}}
 Example function tool call syntax:
 <｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>example_function_name
 ```json
 {
  "arg1": "some_value"
  ...
 }
 ```
 <｜tool▁call▁end｜><｜tool▁calls▁end｜>
 {% endif -%}
 {{ns.system_prompt}}
 {%- macro flush_tool_outputs() -%}
    {%- if ns.is_tool -%}
        {{- '<｜tool▁outputs▁end｜><｜end▁of▁sentence｜>' -}}
        {%- set ns.is_tool = false -%}
    {%- endif -%}
 {%- endmacro -%}
 {{- flush_tool_outputs() -}}
 {%- for message in messages -%}
    {%- if message['role'] != 'tool' -%}
        {{- flush_tool_outputs() -}}
    {%- endif -%}
    {%- if message['role'] == 'user' -%}
        {#- {{- '<｜User｜>' + message['content']}} #}
        {{- '<｜User｜>' + content + '<｜end▁of▁sentence｜>'}}
    {%- endif -%}
    {%- if message['role'] == 'assistant' and message['content'] is none -%}
        {{- '<｜Assistant｜><｜tool▁calls▁begin｜>'}}
        {%- for tc in message['tool_calls']%}
            {%- if ns.is_first -%}
                {%- set ns.is_first = false -%}
            {%- else -%}
                {{- '\n' -}}
            {%- endif -%}
            {%- set tool_name = tc['function']['name'] -%}
            {%- set tool_args = tc['function']['arguments'] -%}
            {{- '<｜tool▁call▁begin｜>' + tc['type'] + '<｜tool▁sep｜>' + tool_name + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}
        {%- endfor -%}
        {{- '<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}
    {%- endif -%}
    {%- if message['role'] == 'assistant' and message['content'] is not none -%}
        {{- flush_tool_outputs() -}}
        {%- set content = message['content'] -%}
        {%- if '</think>' in content -%}
            {%- set content = content.split('</think>')[-1] -%}
        {%- endif -%}
        {{- '<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}
    {%- endif -%}
    {%- if message['role'] == 'tool' -%}
        {%- set ns.is_tool = true -%}
        {%- if ns.is_output_first -%}
            {{- '<｜tool▁outputs▁begin｜>' -}}
            {%- set ns.is_output_first = false -%}
        {%- endif -%}
        {{- '\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}
    {%- endif -%}
 {%- endfor -%}
 {{- flush_tool_outputs() -}}
 {%- if add_generation_prompt and not ns.is_tool -%}
    {{- '<｜Assistant｜>' -}}
 {%- endif -%}