tool-calls: accommodate variety of wrong tool call opening tags both Qwen 32B and 7B distills like to spit out

2025-02-03 18:57:55 +00:00 · 2025-02-03 18:57:55 +00:00 · 569610ee77
commit 569610ee77
parent c397bd1f5f
2 changed files with 11 additions and 5 deletions
--- a/common/chat.cpp
+++ b/common/chat.cpp
@ -548,6 +548,8 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
                "\"<｜tool▁call▁begin｜>function<｜tool▁sep｜>" + name + "\\n```json\\n\" " + args_rule + " \"```<｜tool▁call▁end｜>\""));
        });
        data.grammar_triggers.push_back({"<｜tool▁calls▁begin｜>", /* .at_start = */ false});
        data.grammar_triggers.push_back({"<｜tool_calls_begin｜>", /* .at_start = */ false});
        data.grammar_triggers.push_back({"<｜tool calls begin｜>", /* .at_start = */ false});
        data.preserved_tokens = {
            "<think>",
            "</think>",
@ -557,8 +559,10 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
            "<｜tool▁call▁end｜>",
        };
        builder.add_rule("root",
-            "\"<｜tool▁calls▁begin｜>\""
+            // Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag,
-            " (" +string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + 
+            // so we accept common variants (then it's all constrained)
            "( \"<｜tool▁calls▁begin｜>\" | \"<｜tool_calls_begin｜>\" | \"<｜tool calls begin｜>\" ) "
            "(" +string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
            "\"<｜tool▁calls▁end｜>\""
            " space");
    }, grammar_options);
@ -581,7 +585,7 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
    return data;
 }
 static common_chat_msg common_chat_parse_deepseek_r1(const std::string & input) {
-    static std::regex trigger_regex("<｜tool▁calls▁begin｜>");
+    static std::regex trigger_regex("<｜tool▁calls▁begin｜>|<｜tool_calls_begin｜>|<｜tool calls begin｜>");
    static std::regex function_regex("<｜tool▁call▁begin｜>function<｜tool▁sep｜>([^\n]+)\n```json\n");
    static std::regex close_regex("```<｜tool▁call▁end｜>");
    static std::regex think_regex(R"(<think>([\s\S\n]*)</think>([\s\S\r\n]*))");
@ -591,6 +595,9 @@ static common_chat_msg common_chat_parse_deepseek_r1(const std::string & input)
        msg.thoughts = string_trim(match[1].str());
        msg.content = string_trim(match[2].str());
    }
    if (msg.content == "<｜tool▁calls▁end｜>") {
        msg.content = "";
    }
    return msg;
 }
--- a/examples/server/README.md
+++ b/examples/server/README.md
@ -1206,8 +1206,7 @@ curl http://localhost:8080/v1/chat/completions \
  llama-server --jinja -fa -hf bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q6_K_L
  llama-server --jinja -fa -hf bartowski/functionary-small-v3.2-GGUF:Q4_K_M
  llama-server --jinja -fa -hf bartowski/Llama-3.3-70B-Instruct-GGUF:Q4_K_M
-  llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M
+  llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q4_K_M
  llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q6_K_L
  # Native support requires the right template for these GGUFs: