From ce28224de843e04b7f30cd3908a758ef1f30bf4a Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Tue, 4 Feb 2025 00:28:40 +0000 Subject: [PATCH] =?UTF-8?q?tool-call:=20r1:=20add=20one=20more=20trigger?= =?UTF-8?q?=20approx=20"<=EF=BD=9Ctool=20calls=20begin=EF=BD=9C>"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- common/chat.cpp | 10 +++++----- models/templates/llama-cpp-deepseek-r1.jinja | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/common/chat.cpp b/common/chat.cpp index 2f114a24c..cb6a922bd 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -117,7 +117,6 @@ static common_chat_msg parse_json_tool_calls( std::sregex_iterator rend; std::sregex_iterator rit(it, end, function_regex); if (rit == rend) { - fprintf(stderr, "No more tool calls found\n"); result.content += std::string(it, end); break; } @@ -127,10 +126,10 @@ static common_chat_msg parse_json_tool_calls( json arguments; if (!parse_json(it, end, arguments)) { - throw std::runtime_error("Failed to parse json tool call arguments"); + throw std::runtime_error("Failed to parse json tool call arguments: " + input); } if (!std::regex_search(it, end, match, close_regex)) { - throw std::runtime_error("Malformed input, missing closing pattern"); + throw std::runtime_error("Malformed input, missing closing pattern: " + input); } it = match.suffix().first; result.tool_calls.push_back({name, arguments.is_string() ? arguments.get() : arguments.dump(), /* id= */ ""}); @@ -574,13 +573,14 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_ // Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag, // so we accept common variants (then it's all constrained) builder.add_rule("root", - "( \"<|tool▁calls▁begin|>\" | \"<|tool_calls_begin|>\" | \"<|tool calls begin|>\" ) " + "( \"<|tool▁calls▁begin|>\" | \"<|tool_calls_begin|>\" | \"<|tool calls begin|>\" | \"<|tool\\_calls\\_begin|>\" ) " "(" +string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " " "\"<|tool▁calls▁end|>\"" " space"); data.grammar_triggers.push_back({"<|tool▁calls▁begin|>", /* .at_start = */ false}); data.grammar_triggers.push_back({"<|tool_calls_begin|>", /* .at_start = */ false}); data.grammar_triggers.push_back({"<|tool calls begin|>", /* .at_start = */ false}); + data.grammar_triggers.push_back({"<|tool\\_calls\\_begin|>", /* .at_start = */ false}); data.preserved_tokens = { "", "", @@ -614,7 +614,7 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_ return data; } static common_chat_msg common_chat_parse_deepseek_r1(const std::string & input) { - static std::regex trigger_regex("<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>"); + static std::regex trigger_regex("<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>"); static std::regex function_regex("<|tool▁call▁begin|>function<|tool▁sep|>([^\n]+)\n```json\n"); static std::regex close_regex("```<|tool▁call▁end|>"); static std::regex think_regex(R"(([\s\S\n]*)()?([\s\S\r\n]*))"); diff --git a/models/templates/llama-cpp-deepseek-r1.jinja b/models/templates/llama-cpp-deepseek-r1.jinja index 598113b4a..1b029fd14 100644 --- a/models/templates/llama-cpp-deepseek-r1.jinja +++ b/models/templates/llama-cpp-deepseek-r1.jinja @@ -9,7 +9,7 @@ {%- endfor -%} {{bos_token}} {%- if tools %} -You can call any of the following function tools to satisfy the user's requests: {{tools | map(attribute='function') | tojson(indent=4)}} +You can call any of the following function tools to satisfy the user's requests: {{tools | map(attribute='function') | tojson(indent=2)}} Example function tool call syntax: