tool-calls: accommodate variety of wrong tool call opening tags both Qwen 32B and 7B distills like to spit out

This commit is contained in:
Olivier Chafik 2025-02-03 18:57:55 +00:00
parent c397bd1f5f
commit 569610ee77
2 changed files with 11 additions and 5 deletions

View file

@ -548,6 +548,8 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
"\"<tool▁call▁begin>function<tool▁sep>" + name + "\\n```json\\n\" " + args_rule + " \"```<tool▁call▁end>\"")); "\"<tool▁call▁begin>function<tool▁sep>" + name + "\\n```json\\n\" " + args_rule + " \"```<tool▁call▁end>\""));
}); });
data.grammar_triggers.push_back({"<tool▁calls▁begin>", /* .at_start = */ false}); data.grammar_triggers.push_back({"<tool▁calls▁begin>", /* .at_start = */ false});
data.grammar_triggers.push_back({"<tool_calls_begin>", /* .at_start = */ false});
data.grammar_triggers.push_back({"<tool calls begin>", /* .at_start = */ false});
data.preserved_tokens = { data.preserved_tokens = {
"<think>", "<think>",
"</think>", "</think>",
@ -557,8 +559,10 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
"<tool▁call▁end>", "<tool▁call▁end>",
}; };
builder.add_rule("root", builder.add_rule("root",
"\"<tool▁calls▁begin>\"" // Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag,
" (" +string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + // so we accept common variants (then it's all constrained)
"( \"<tool▁calls▁begin>\" | \"<tool_calls_begin>\" | \"<tool calls begin>\" ) "
"(" +string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
"\"<tool▁calls▁end>\"" "\"<tool▁calls▁end>\""
" space"); " space");
}, grammar_options); }, grammar_options);
@ -581,7 +585,7 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
return data; return data;
} }
static common_chat_msg common_chat_parse_deepseek_r1(const std::string & input) { static common_chat_msg common_chat_parse_deepseek_r1(const std::string & input) {
static std::regex trigger_regex("<tool▁calls▁begin>"); static std::regex trigger_regex("<tool▁calls▁begin>|<tool_calls_begin>|<tool calls begin>");
static std::regex function_regex("<tool▁call▁begin>function<tool▁sep>([^\n]+)\n```json\n"); static std::regex function_regex("<tool▁call▁begin>function<tool▁sep>([^\n]+)\n```json\n");
static std::regex close_regex("```<tool▁call▁end>"); static std::regex close_regex("```<tool▁call▁end>");
static std::regex think_regex(R"(<think>([\s\S\n]*)</think>([\s\S\r\n]*))"); static std::regex think_regex(R"(<think>([\s\S\n]*)</think>([\s\S\r\n]*))");
@ -591,6 +595,9 @@ static common_chat_msg common_chat_parse_deepseek_r1(const std::string & input)
msg.thoughts = string_trim(match[1].str()); msg.thoughts = string_trim(match[1].str());
msg.content = string_trim(match[2].str()); msg.content = string_trim(match[2].str());
} }
if (msg.content == "<tool▁calls▁end>") {
msg.content = "";
}
return msg; return msg;
} }

View file

@ -1206,8 +1206,7 @@ curl http://localhost:8080/v1/chat/completions \
llama-server --jinja -fa -hf bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q6_K_L llama-server --jinja -fa -hf bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q6_K_L
llama-server --jinja -fa -hf bartowski/functionary-small-v3.2-GGUF:Q4_K_M llama-server --jinja -fa -hf bartowski/functionary-small-v3.2-GGUF:Q4_K_M
llama-server --jinja -fa -hf bartowski/Llama-3.3-70B-Instruct-GGUF:Q4_K_M llama-server --jinja -fa -hf bartowski/Llama-3.3-70B-Instruct-GGUF:Q4_K_M
llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q4_K_M
llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q6_K_L
# Native support requires the right template for these GGUFs: # Native support requires the right template for these GGUFs: