From 569610ee77a9cbb6c8101e5e031ad3d0bc535c25 Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Mon, 3 Feb 2025 18:57:55 +0000 Subject: [PATCH] tool-calls: accommodate variety of wrong tool call opening tags both Qwen 32B and 7B distills like to spit out --- common/chat.cpp | 13 ++++++++++--- examples/server/README.md | 3 +-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/common/chat.cpp b/common/chat.cpp index 1b9bc798c..c97c9e087 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -548,6 +548,8 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_ "\"<|tool▁call▁begin|>function<|tool▁sep|>" + name + "\\n```json\\n\" " + args_rule + " \"```<|tool▁call▁end|>\"")); }); data.grammar_triggers.push_back({"<|tool▁calls▁begin|>", /* .at_start = */ false}); + data.grammar_triggers.push_back({"<|tool_calls_begin|>", /* .at_start = */ false}); + data.grammar_triggers.push_back({"<|tool calls begin|>", /* .at_start = */ false}); data.preserved_tokens = { "", "", @@ -557,8 +559,10 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_ "<|tool▁call▁end|>", }; builder.add_rule("root", - "\"<|tool▁calls▁begin|>\"" - " (" +string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + + // Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag, + // so we accept common variants (then it's all constrained) + "( \"<|tool▁calls▁begin|>\" | \"<|tool_calls_begin|>\" | \"<|tool calls begin|>\" ) " + "(" +string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " " "\"<|tool▁calls▁end|>\"" " space"); }, grammar_options); @@ -581,7 +585,7 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_ return data; } static common_chat_msg common_chat_parse_deepseek_r1(const std::string & input) { - static std::regex trigger_regex("<|tool▁calls▁begin|>"); + static std::regex trigger_regex("<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>"); static std::regex function_regex("<|tool▁call▁begin|>function<|tool▁sep|>([^\n]+)\n```json\n"); static std::regex close_regex("```<|tool▁call▁end|>"); static std::regex think_regex(R"(([\s\S\n]*)([\s\S\r\n]*))"); @@ -591,6 +595,9 @@ static common_chat_msg common_chat_parse_deepseek_r1(const std::string & input) msg.thoughts = string_trim(match[1].str()); msg.content = string_trim(match[2].str()); } + if (msg.content == "<|tool▁calls▁end|>") { + msg.content = ""; + } return msg; } diff --git a/examples/server/README.md b/examples/server/README.md index d3392524d..4a8ba4d69 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -1206,8 +1206,7 @@ curl http://localhost:8080/v1/chat/completions \ llama-server --jinja -fa -hf bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q6_K_L llama-server --jinja -fa -hf bartowski/functionary-small-v3.2-GGUF:Q4_K_M llama-server --jinja -fa -hf bartowski/Llama-3.3-70B-Instruct-GGUF:Q4_K_M - llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M - llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q6_K_L + llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q4_K_M # Native support requires the right template for these GGUFs: