diff --git a/common/chat.cpp b/common/chat.cpp
index 1b9bc798c..c97c9e087 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -548,6 +548,8 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
"\"<|tool▁call▁begin|>function<|tool▁sep|>" + name + "\\n```json\\n\" " + args_rule + " \"```<|tool▁call▁end|>\""));
});
data.grammar_triggers.push_back({"<|tool▁calls▁begin|>", /* .at_start = */ false});
+ data.grammar_triggers.push_back({"<|tool_calls_begin|>", /* .at_start = */ false});
+ data.grammar_triggers.push_back({"<|tool calls begin|>", /* .at_start = */ false});
data.preserved_tokens = {
"",
"",
@@ -557,8 +559,10 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
"<|tool▁call▁end|>",
};
builder.add_rule("root",
- "\"<|tool▁calls▁begin|>\""
- " (" +string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") +
+ // Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag,
+ // so we accept common variants (then it's all constrained)
+ "( \"<|tool▁calls▁begin|>\" | \"<|tool_calls_begin|>\" | \"<|tool calls begin|>\" ) "
+ "(" +string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
"\"<|tool▁calls▁end|>\""
" space");
}, grammar_options);
@@ -581,7 +585,7 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
return data;
}
static common_chat_msg common_chat_parse_deepseek_r1(const std::string & input) {
- static std::regex trigger_regex("<|tool▁calls▁begin|>");
+ static std::regex trigger_regex("<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>");
static std::regex function_regex("<|tool▁call▁begin|>function<|tool▁sep|>([^\n]+)\n```json\n");
static std::regex close_regex("```<|tool▁call▁end|>");
static std::regex think_regex(R"(([\s\S\n]*)([\s\S\r\n]*))");
@@ -591,6 +595,9 @@ static common_chat_msg common_chat_parse_deepseek_r1(const std::string & input)
msg.thoughts = string_trim(match[1].str());
msg.content = string_trim(match[2].str());
}
+ if (msg.content == "<|tool▁calls▁end|>") {
+ msg.content = "";
+ }
return msg;
}
diff --git a/examples/server/README.md b/examples/server/README.md
index d3392524d..4a8ba4d69 100644
--- a/examples/server/README.md
+++ b/examples/server/README.md
@@ -1206,8 +1206,7 @@ curl http://localhost:8080/v1/chat/completions \
llama-server --jinja -fa -hf bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q6_K_L
llama-server --jinja -fa -hf bartowski/functionary-small-v3.2-GGUF:Q4_K_M
llama-server --jinja -fa -hf bartowski/Llama-3.3-70B-Instruct-GGUF:Q4_K_M
- llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M
- llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q6_K_L
+ llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q4_K_M
# Native support requires the right template for these GGUFs: