diff --git a/common/chat.cpp b/common/chat.cpp index c97c9e087..66bbfe993 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -545,8 +545,17 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_ auto parameters = function["parameters"]; auto args_rule = builder.add_schema(name + "-args", parameters); tool_rules.push_back(builder.add_rule(name + "-call", - "\"<|tool▁call▁begin|>function<|tool▁sep|>" + name + "\\n```json\\n\" " + args_rule + " \"```<|tool▁call▁end|>\"")); + "\"<|tool▁call▁begin|>function<|tool▁sep|>" + name + "\\n" + "```json\\n\" " + args_rule + " \"```" + "<|tool▁call▁end|>\"")); }); + // Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag, + // so we accept common variants (then it's all constrained) + builder.add_rule("root", + "( \"<|tool▁calls▁begin|>\" | \"<|tool_calls_begin|>\" | \"<|tool calls begin|>\" ) " + "(" +string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " " + "\"<|tool▁calls▁end|>\"" + " space"); data.grammar_triggers.push_back({"<|tool▁calls▁begin|>", /* .at_start = */ false}); data.grammar_triggers.push_back({"<|tool_calls_begin|>", /* .at_start = */ false}); data.grammar_triggers.push_back({"<|tool calls begin|>", /* .at_start = */ false}); @@ -558,27 +567,14 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_ "<|tool▁call▁begin|>", "<|tool▁call▁end|>", }; - builder.add_rule("root", - // Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag, - // so we accept common variants (then it's all constrained) - "( \"<|tool▁calls▁begin|>\" | \"<|tool_calls_begin|>\" | \"<|tool calls begin|>\" ) " - "(" +string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " " - "\"<|tool▁calls▁end|>\"" - " space"); }, grammar_options); - /* - Note: we do not feed the thoughts back to the template for a few reasons: - - the template doesn't use them explicitly - - if content isn't null, tool calls arent rendered - - not having the thoughts will locally reset the KV cache (losing the hot tokens of the tool calls) but will save up a lot long term. - */ auto prompt = tmpl.apply(inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); - std::string suffix = "<|Assistant|>"; - if (vocab && !llama_vocab_get_add_eos(vocab) && - inputs.add_generation_prompt && - !string_ends_with(prompt, suffix)) - { + // Hack to fix the official prompt, which leaves the chat dangling after tool results. + if (string_ends_with(prompt, "<|tool▁outputs▁end|>")) { prompt += "<|end▁of▁sentence|>"; + if (inputs.add_generation_prompt) { + prompt += "<|Assistant|>"; + } } data.prompt = prompt; data.format = COMMON_CHAT_FORMAT_DEEPSEEK_R1; @@ -588,14 +584,14 @@ static common_chat_msg common_chat_parse_deepseek_r1(const std::string & input) static std::regex trigger_regex("<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>"); static std::regex function_regex("<|tool▁call▁begin|>function<|tool▁sep|>([^\n]+)\n```json\n"); static std::regex close_regex("```<|tool▁call▁end|>"); - static std::regex think_regex(R"(([\s\S\n]*)([\s\S\r\n]*))"); + static std::regex think_regex(R"(([\s\S\n]*)()?([\s\S\r\n]*))"); auto msg = parse_json_tool_calls(input, trigger_regex, function_regex, close_regex); std::smatch match; if (std::regex_match(msg.content, match, think_regex)) { msg.thoughts = string_trim(match[1].str()); msg.content = string_trim(match[2].str()); } - if (msg.content == "<|tool▁calls▁end|>") { + if (string_trim(msg.content) == "<|tool▁calls▁end|>") { msg.content = ""; } return msg; diff --git a/examples/server/README.md b/examples/server/README.md index 4a8ba4d69..f733f0fd1 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -1202,11 +1202,19 @@ curl http://localhost:8080/v1/chat/completions \ ```shell # Native support: + llama-server --jinja -fa -hf bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M llama-server --jinja -fa -hf bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q6_K_L llama-server --jinja -fa -hf bartowski/functionary-small-v3.2-GGUF:Q4_K_M llama-server --jinja -fa -hf bartowski/Llama-3.3-70B-Instruct-GGUF:Q4_K_M - llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q4_K_M + + # Native support for DeepSeek R1 works best w/ our own template (official template buggy) + + llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q6_K_L \ + --chat-template-file models/templates/llama-cpp-deepseek-r1.jinja + + llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q4_K_M \ + --chat-template-file models/templates/llama-cpp-deepseek-r1.jinja # Native support requires the right template for these GGUFs: diff --git a/models/templates/llama-cpp-deepseek-r1.jinja b/models/templates/llama-cpp-deepseek-r1.jinja new file mode 100644 index 000000000..94b41f09b --- /dev/null +++ b/models/templates/llama-cpp-deepseek-r1.jinja @@ -0,0 +1,76 @@ +{%- if not add_generation_prompt is defined -%} + {%- set add_generation_prompt = false -%} +{%- endif -%} +{%- set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') -%} +{%- for message in messages -%} + {%- if message['role'] == 'system' -%} + {%- set ns.system_prompt = message['content'] -%} + {%- endif -%} +{%- endfor -%} +{{bos_token}} +{%- if tools %} +You can call any of the following function tools to satisfy the user's requests: {{tools | map(attribute='function') | tojson(indent=4)}} + +Example function tool call syntax: + +<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>example_function_name +```json +{ + "arg1": "some_value" + ... +} +``` +<|tool▁call▁end|><|tool▁calls▁end|> + +{% endif -%} +{{ns.system_prompt}} +{%- macro flush_tool_outputs() -%} + {%- if ns.is_tool -%} + {{- '<|tool▁outputs▁end|><|end▁of▁sentence|>' -}} + {%- set ns.is_tool = false -%} + {%- endif -%} +{%- endmacro -%} +{{- flush_tool_outputs() -}} +{%- for message in messages -%} + {%- if message['role'] != 'tool' -%} + {{- flush_tool_outputs() -}} + {%- endif -%} + {%- if message['role'] == 'user' -%} + {#- {{- '<|User|>' + message['content']}} #} + {{- '<|User|>' + content + '<|end▁of▁sentence|>'}} + {%- endif -%} + {%- if message['role'] == 'assistant' and message['content'] is none -%} + {{- '<|Assistant|><|tool▁calls▁begin|>'}} + {%- for tc in message['tool_calls']%} + {%- if ns.is_first -%} + {%- set ns.is_first = false -%} + {%- else -%} + {{- '\n' -}} + {%- endif -%} + {%- set tool_name = tc['function']['name'] -%} + {%- set tool_args = tc['function']['arguments'] -%} + {{- '<|tool▁call▁begin|>' + tc['type'] + '<|tool▁sep|>' + tool_name + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}} + {%- endfor -%} + {{- '<|tool▁calls▁end|><|end▁of▁sentence|>'}} + {%- endif -%} + {%- if message['role'] == 'assistant' and message['content'] is not none -%} + {{- flush_tool_outputs() -}} + {%- set content = message['content'] -%} + {%- if '' in content -%} + {%- set content = content.split('')[-1] -%} + {%- endif -%} + {{- '<|Assistant|>' + content + '<|end▁of▁sentence|>'}} + {%- endif -%} + {%- if message['role'] == 'tool' -%} + {%- set ns.is_tool = true -%} + {%- if ns.is_output_first -%} + {{- '<|tool▁outputs▁begin|>' -}} + {%- set ns.is_output_first = false -%} + {%- endif -%} + {{- '\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}} + {%- endif -%} +{%- endfor -%} +{{- flush_tool_outputs() -}} +{%- if add_generation_prompt and not ns.is_tool -%} + {{- '<|Assistant|>' -}} +{%- endif -%} \ No newline at end of file