Merge 01db429161
into d7b31a9d84
This commit is contained in:
commit
41de60d728
17 changed files with 979 additions and 272 deletions
|
@ -1975,6 +1975,17 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
params.use_jinja = true;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN}).set_env("LLAMA_ARG_JINJA"));
|
||||
add_opt(common_arg(
|
||||
{"--reasoning-format"}, "FORMAT",
|
||||
"reasoning format (default: deepseek; allowed values: deepseek, none)\n"
|
||||
"controls whether thought tags are extracted from the response, and in which format they're returned. 'none' leaves thoughts unparsed in `message.content`, 'deepseek' puts them in `message.reasoning_content` (for DeepSeek R1 & Command R7B only).\n"
|
||||
"only supported for non-streamed responses",
|
||||
[](common_params & params, const std::string & value) {
|
||||
/**/ if (value == "deepseek") { params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; }
|
||||
else if (value == "none") { params.reasoning_format = COMMON_REASONING_FORMAT_NONE; }
|
||||
else { std::invalid_argument("invalid value"); }
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN}).set_env("LLAMA_ARG_THINK"));
|
||||
add_opt(common_arg(
|
||||
{"--chat-template"}, "JINJA_TEMPLATE",
|
||||
string_format(
|
||||
|
|
231
common/chat.cpp
231
common/chat.cpp
|
@ -12,11 +12,13 @@ std::string common_chat_format_name(common_chat_format format) {
|
|||
case COMMON_CHAT_FORMAT_LLAMA_3_X: return "Llama 3.x";
|
||||
case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS: return "Llama 3.x with builtin tools";
|
||||
case COMMON_CHAT_FORMAT_DEEPSEEK_R1: return "DeepSeek R1";
|
||||
case COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING: return "DeepSeek R1 (extract reasoning)";
|
||||
case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: return "FireFunction v2";
|
||||
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return "Functionary v3.2";
|
||||
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return "Functionary v3.1 Llama 3.1";
|
||||
case COMMON_CHAT_FORMAT_HERMES_2_PRO: return "Hermes 2 Pro";
|
||||
case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B";
|
||||
case COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING: return "Command R7B (extract reasoning)";
|
||||
default:
|
||||
throw std::runtime_error("Unknown chat format");
|
||||
}
|
||||
|
@ -105,7 +107,6 @@ static common_chat_msg parse_json_tool_calls(
|
|||
std::sregex_iterator rend;
|
||||
std::sregex_iterator rit(it, end, function_regex);
|
||||
if (rit == rend) {
|
||||
fprintf(stderr, "No more tool calls found\n");
|
||||
result.content += std::string(it, end);
|
||||
break;
|
||||
}
|
||||
|
@ -115,14 +116,21 @@ static common_chat_msg parse_json_tool_calls(
|
|||
|
||||
json arguments;
|
||||
if (!parse_json(it, end, arguments)) {
|
||||
throw std::runtime_error("Failed to parse json tool call arguments");
|
||||
throw std::runtime_error("Failed to parse json tool call arguments: " + input);
|
||||
}
|
||||
if (!std::regex_search(it, end, match, close_regex)) {
|
||||
throw std::runtime_error("Malformed input, missing closing pattern");
|
||||
throw std::runtime_error("Malformed input, missing closing pattern: " + input);
|
||||
}
|
||||
it = match.suffix().first;
|
||||
result.tool_calls.push_back({name, arguments.is_string() ? arguments.get<std::string>() : arguments.dump(), /* id= */ ""});
|
||||
}
|
||||
|
||||
if (!result.tool_calls.empty()) {
|
||||
if (!string_strip(result.content).empty()) {
|
||||
LOG_WRN("Content found with tool calls: %s", result.content.c_str());
|
||||
}
|
||||
result.content = "";
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -382,22 +390,46 @@ static common_chat_params common_chat_params_init_command_r7b(const common_chat_
|
|||
"<|END_THINKING|>",
|
||||
"<|END_ACTION|>",
|
||||
};
|
||||
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
||||
data.format = COMMON_CHAT_FORMAT_COMMAND_R7B;
|
||||
auto adjusted_messages = json::array();
|
||||
for (const auto & msg : inputs.messages) {
|
||||
auto has_reasoning_content = msg.contains("reasoning_content") && msg["reasoning_content"].is_string();
|
||||
auto has_tool_calls = msg.contains("tool_calls") && msg["tool_calls"].is_array();
|
||||
if (has_reasoning_content && has_tool_calls) {
|
||||
auto adjusted_message = msg;
|
||||
adjusted_message["tool_plan"] = msg["reasoning_content"];
|
||||
adjusted_message.erase("reasoning_content");
|
||||
adjusted_messages.push_back(adjusted_message);
|
||||
} else {
|
||||
adjusted_messages.push_back(msg);
|
||||
}
|
||||
}
|
||||
data.prompt = apply(tmpl, adjusted_messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt, {});
|
||||
data.format = inputs.extract_reasoning ? COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING : COMMON_CHAT_FORMAT_COMMAND_R7B;
|
||||
return data;
|
||||
}
|
||||
static common_chat_msg common_chat_parse_command_r7b(const std::string & input) {
|
||||
static std::regex response_regex("<\\|START_RESPONSE\\|>([\\s\\S\\n\\r]*?)<\\|END_RESPONSE\\|>");
|
||||
static std::regex thought_action_regex("<\\|START_THINKING\\|>([\\s\\S\\n\\r]*?)<\\|END_THINKING\\|><\\|START_ACTION\\|>([\\s\\S\\n\\r]*?)<\\|END_ACTION\\|>");
|
||||
static common_chat_msg common_chat_parse_command_r7b(const std::string & input, bool extract_reasoning) {
|
||||
static std::regex thought_regex("(<\\|START_THINKING\\|>([\\s\\S\\n\\r]*?)<\\|END_THINKING\\|>)([\\s\\S\\n\\r]*)");
|
||||
static std::regex action_regex("<\\|START_ACTION\\|>([\\s\\S\\n\\r]*?)<\\|END_ACTION\\|>");
|
||||
static std::regex response_regex("(?:<\\|START_RESPONSE\\|>)?([\\s\\S\\n\\r]*?)<\\|END_RESPONSE\\|>");
|
||||
|
||||
std::smatch match;
|
||||
|
||||
common_chat_msg result;
|
||||
result.role = "assistant";
|
||||
if (std::regex_match(input, match, response_regex)) {
|
||||
result.content = match[1].str();
|
||||
} else if (std::regex_match(input, match, thought_action_regex)) {
|
||||
result.tool_plan = match[1].str();
|
||||
auto actions_str = match[2].str();
|
||||
|
||||
std::string rest = input;
|
||||
|
||||
if (std::regex_match(rest, match, thought_regex)) {
|
||||
if (extract_reasoning) {
|
||||
result.reasoning_content = match[2].str();
|
||||
} else if (!match[2].str().empty()) {
|
||||
// Let the unparsed thinking tags through in content only if their insides aren't empty.
|
||||
result.content = match[1].str();
|
||||
}
|
||||
rest = match[3].str();
|
||||
}
|
||||
if (std::regex_match(rest, match, action_regex)) {
|
||||
auto actions_str = match[1].str();
|
||||
auto actions = json::parse(actions_str);
|
||||
for (const auto & action : actions) {
|
||||
result.tool_calls.push_back({
|
||||
|
@ -406,9 +438,11 @@ static common_chat_msg common_chat_parse_command_r7b(const std::string & input)
|
|||
/* .id = */ action["tool_call_id"],
|
||||
});
|
||||
}
|
||||
} else if (std::regex_match(rest, match, response_regex)) {
|
||||
auto response = match[1].str();
|
||||
result.content += response;
|
||||
} else {
|
||||
LOG_ERR("Failed to parse command_r output");
|
||||
result.content = input;
|
||||
result.content += rest;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
@ -546,34 +580,90 @@ static common_chat_msg common_chat_parse_llama_3_1(const std::string & input, bo
|
|||
|
||||
static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
|
||||
common_chat_params data;
|
||||
data.grammar_lazy = inputs.tool_choice != "required";
|
||||
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
||||
std::vector<std::string> tool_rules;
|
||||
foreach_function(inputs.tools, [&](const json & tool) {
|
||||
const auto & function = tool["function"];
|
||||
std::string name = function["name"];
|
||||
auto parameters = function["parameters"];
|
||||
auto args_rule = builder.add_schema(name + "-args", parameters);
|
||||
tool_rules.push_back(builder.add_rule(name + "-call",
|
||||
"\"<|tool▁call▁begin|>function<|tool▁sep|>" + name + "\\n```json\\n\" " + args_rule + " \"```<|tool▁call▁end|>\""));
|
||||
});
|
||||
data.grammar_triggers.push_back({"<|tool▁calls▁begin|>", /* .at_start = */ false});
|
||||
data.preserved_tokens = {
|
||||
"<|tool▁sep|>",
|
||||
"<|tool▁call▁end|>",
|
||||
};
|
||||
builder.add_rule("root", "\"<|tool▁calls▁begin|>\" (" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " space");
|
||||
}, grammar_options);
|
||||
if (inputs.tools.is_array() && !inputs.tools.empty()) {
|
||||
data.grammar_lazy = inputs.tool_choice != "required" && inputs.json_schema.is_null();
|
||||
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
||||
std::vector<std::string> tool_rules;
|
||||
foreach_function(inputs.tools, [&](const json & tool) {
|
||||
const auto & function = tool["function"];
|
||||
std::string name = function["name"];
|
||||
auto parameters = function["parameters"];
|
||||
auto args_rule = builder.add_schema(name + "-args", parameters);
|
||||
tool_rules.push_back(builder.add_rule(name + "-call",
|
||||
"\"<|tool▁call▁begin|>function<|tool▁sep|>" + name + "\\n"
|
||||
"```json\\n\" " + args_rule + " \"```<|tool▁call▁end|>\""));
|
||||
});
|
||||
// Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag,
|
||||
// so we accept common variants (then it's all constrained)
|
||||
builder.add_rule("root",
|
||||
"( \"<|tool▁calls▁begin|>\" | \"<|tool_calls_begin|>\" | \"<|tool calls begin|>\" | \"<|tool\\\\_calls\\\\_begin|>\" ) "
|
||||
"(" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
|
||||
"\"<|tool▁calls▁end|>\""
|
||||
" space");
|
||||
data.grammar_triggers.push_back({"<|tool▁calls▁begin|>", /* .at_start = */ false});
|
||||
data.grammar_triggers.push_back({"<|tool_calls_begin|>", /* .at_start = */ false});
|
||||
data.grammar_triggers.push_back({"<|tool calls begin|>", /* .at_start = */ false});
|
||||
data.grammar_triggers.push_back({"<|tool\\_calls\\_begin|>", /* .at_start = */ false});
|
||||
data.preserved_tokens = {
|
||||
"<think>",
|
||||
"</think>",
|
||||
"<|tool▁sep|>",
|
||||
"<|tool▁calls▁end|",
|
||||
"<|tool▁call▁end|>",
|
||||
};
|
||||
}, grammar_options);
|
||||
}
|
||||
auto prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
||||
|
||||
// Hacks to fix the official (broken) prompt.
|
||||
// It is advisable to use --chat-template-file models/templates/llama-cpp-deepseek-r1.jinja instead,
|
||||
// until the official template is fixed.
|
||||
if (tmpl.source().find("{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}") != std::string::npos) {
|
||||
// Don't leave the chat dangling after tool results
|
||||
if (string_ends_with(prompt, "<|tool▁outputs▁end|>")) {
|
||||
prompt += "<|end▁of▁sentence|>";
|
||||
if (inputs.add_generation_prompt) {
|
||||
prompt += "<|Assistant|>";
|
||||
}
|
||||
}
|
||||
// Fix up tool call delta example added by Minja
|
||||
prompt = std::regex_replace(
|
||||
prompt,
|
||||
std::regex("(<|tool▁call▁end|>)[\\s\\r\\n]*(<|tool▁outputs▁begin|>|<|User|>)"),
|
||||
"$1<|tool▁calls▁end|><|end▁of▁sentence|>$2");
|
||||
}
|
||||
data.prompt = prompt;
|
||||
data.format = COMMON_CHAT_FORMAT_DEEPSEEK_R1;
|
||||
data.format = inputs.extract_reasoning ? COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING : COMMON_CHAT_FORMAT_DEEPSEEK_R1;
|
||||
return data;
|
||||
}
|
||||
static common_chat_msg common_chat_parse_deepseek_r1(const std::string & input) {
|
||||
static std::regex trigger_regex("<|tool▁calls▁begin|>");
|
||||
static common_chat_msg common_chat_parse_deepseek_r1(const std::string & input, bool extract_reasoning) {
|
||||
static std::regex function_regex("<|tool▁call▁begin|>function<|tool▁sep|>([^\n]+)\n```json\n");
|
||||
static std::regex close_regex("```<|tool▁call▁end|>");
|
||||
return parse_json_tool_calls(input, trigger_regex, function_regex, close_regex);
|
||||
static std::regex close_regex("```[\\s\\r\\n]*<|tool▁call▁end|>");
|
||||
static std::regex reasoning_content_regex("((?:<think>)?([\\s\\S\\r\\n]*?)</think>)?([\\s\\S\\r\\n]*)");
|
||||
static std::regex tool_calls_regex("[\\s\\r\\n]*(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>)([\\s\\S\\r\\n]*?)<|tool▁calls▁end|>");
|
||||
common_chat_msg msg;
|
||||
msg.role = "assistant";
|
||||
std::smatch match;
|
||||
if (std::regex_match(input, match, reasoning_content_regex)) {
|
||||
std::string rest;
|
||||
if (extract_reasoning) {
|
||||
msg.reasoning_content = string_strip(match[2].str());
|
||||
} else {
|
||||
msg.content = match[1].str();
|
||||
}
|
||||
rest = match[3].str();
|
||||
|
||||
if (std::regex_search(rest, match, tool_calls_regex)) {
|
||||
auto tool_calls = match[1].str();
|
||||
auto msg2 = parse_json_tool_calls(tool_calls, std::nullopt, function_regex, close_regex);
|
||||
msg.tool_calls = std::move(msg2.tool_calls);
|
||||
} else {
|
||||
msg.content += std::string(rest.begin() + rest.find_first_not_of(" \r\n"), rest.end());
|
||||
}
|
||||
} else {
|
||||
msg.content = input;
|
||||
}
|
||||
return msg;
|
||||
}
|
||||
|
||||
static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
|
||||
|
@ -583,7 +673,7 @@ static common_chat_params common_chat_params_init_firefunction_v2(const common_c
|
|||
{"datetime", "Jan 29 2025 13:00:00 GMT"},
|
||||
{"functions", json(inputs.tools.empty() ? "" : inputs.tools.dump(2))},
|
||||
});
|
||||
if (!inputs.tools.is_null() && !inputs.tools.empty()) {
|
||||
if (inputs.tools.is_array() && !inputs.tools.empty()) {
|
||||
data.grammar_lazy = inputs.tool_choice != "required";
|
||||
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
||||
auto schemas = json::array();
|
||||
|
@ -628,7 +718,7 @@ static common_chat_params common_chat_params_init_functionary_v3_2(const common_
|
|||
common_chat_params data;
|
||||
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
||||
data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2;
|
||||
if (!inputs.tools.is_null() && !inputs.tools.empty()) {
|
||||
if (inputs.tools.is_array() && !inputs.tools.empty()) {
|
||||
data.grammar_lazy = inputs.tool_choice != "required";
|
||||
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
||||
std::vector<std::string> first_tool_rules;
|
||||
|
@ -884,47 +974,72 @@ static common_chat_params common_chat_params_init_without_tools(const common_cha
|
|||
}
|
||||
|
||||
common_chat_params common_chat_params_init(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
|
||||
auto has_tools = !inputs.tools.is_null() && inputs.tool_choice != "none";
|
||||
LOG_DBG("[%s] has_tools=%s\n", __func__, has_tools ? "true" : "false");
|
||||
const auto & src = tmpl.source();
|
||||
const auto & caps = tmpl.original_caps();
|
||||
|
||||
if (has_tools && !inputs.grammar.empty()) {
|
||||
throw std::runtime_error("Cannot specify grammar with tools");
|
||||
if (inputs.tools.is_array()) {
|
||||
if (inputs.tool_choice != "none" && !inputs.grammar.empty()) {
|
||||
throw std::runtime_error("Cannot specify grammar with tools");
|
||||
}
|
||||
if (caps.supports_tool_calls && !caps.supports_tools) {
|
||||
LOG_WRN("Template supports tool calls but does not natively describe tools. The fallback behaviour used may produce bad results, inspect prompt w/ --verbose & consider overriding the template.");
|
||||
}
|
||||
}
|
||||
|
||||
const auto & src = tmpl.source();
|
||||
// DeepSeek R1: use handler in all cases except json schema (thinking / tools).
|
||||
if (src.find("<|tool▁calls▁begin|>") != std::string::npos && inputs.json_schema.is_null()) {
|
||||
return common_chat_params_init_deepseek_r1(tmpl, inputs);
|
||||
}
|
||||
|
||||
// Command R7B: : use handler in all cases except json schema (thinking / tools).
|
||||
if (src.find("<|END_THINKING|><|START_ACTION|>") != std::string::npos && inputs.json_schema.is_null()) {
|
||||
return common_chat_params_init_command_r7b(tmpl, inputs);
|
||||
}
|
||||
|
||||
// Use generic handler when mixing tools + JSON schema.
|
||||
// TODO: support that mix in handlers below.
|
||||
if ((!inputs.tools.is_array() && inputs.json_schema.is_object())) {
|
||||
return common_chat_params_init_generic(tmpl, inputs);
|
||||
}
|
||||
|
||||
// Functionary prepends "all\n" to plain content outputs, so we use its handler in all cases.
|
||||
if (src.find(">>>all") != std::string::npos) {
|
||||
// Functionary prepends "all\n" to plain content outputs, so we use the parser no matter when
|
||||
return common_chat_params_init_functionary_v3_2(tmpl, inputs);
|
||||
}
|
||||
|
||||
// Firefunction v2 requires datetime and functions in the context even w/o tools, so we also use its handler in all cases.
|
||||
if (src.find(" functools[") != std::string::npos) {
|
||||
// Firefunction v2 requires datetime and functions in the context, even w/o tools.
|
||||
return common_chat_params_init_firefunction_v2(tmpl, inputs);
|
||||
}
|
||||
|
||||
if (!has_tools) {
|
||||
// Plain handler (no tools)
|
||||
if (inputs.tools.is_null() || inputs.tool_choice == "none") {
|
||||
return common_chat_params_init_without_tools(tmpl, inputs);
|
||||
}
|
||||
|
||||
// Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools)
|
||||
if (src.find("<tool_call>") != std::string::npos) {
|
||||
return common_chat_params_init_hermes_2_pro(tmpl, inputs);
|
||||
}
|
||||
|
||||
// Functionary v3.1 (w/ tools)
|
||||
if (src.find("<|start_header_id|>") != std::string::npos
|
||||
&& src.find("<function=") != std::string::npos) {
|
||||
return common_chat_params_init_functionary_v3_1_llama_3_1(tmpl, inputs);
|
||||
}
|
||||
|
||||
// Llama 3.1, 3.2, 3.3 (w/ tools)
|
||||
if (src.find("<|start_header_id|>ipython<|end_header_id|>") != std::string::npos) {
|
||||
auto allow_python_tag_builtin_tools = src.find("<|python_tag|>") != std::string::npos;
|
||||
return common_chat_params_init_llama_3_1_tool_calls(tmpl, inputs, allow_python_tag_builtin_tools);
|
||||
}
|
||||
if (src.find("<|tool▁calls▁begin|>") != std::string::npos) {
|
||||
return common_chat_params_init_deepseek_r1(tmpl, inputs);
|
||||
}
|
||||
|
||||
// Mistral Nemo (w/ tools)
|
||||
if (src.find("[TOOL_CALLS]") != std::string::npos) {
|
||||
return common_chat_params_init_mistral_nemo(tmpl, inputs);
|
||||
}
|
||||
if (src.find("<|END_THINKING|><|START_ACTION|>") != std::string::npos) {
|
||||
return common_chat_params_init_command_r7b(tmpl, inputs);
|
||||
}
|
||||
|
||||
// Generic fallback
|
||||
return common_chat_params_init_generic(tmpl, inputs);
|
||||
}
|
||||
|
||||
|
@ -949,7 +1064,9 @@ common_chat_msg common_chat_parse(const std::string & input, common_chat_format
|
|||
case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS:
|
||||
return common_chat_parse_llama_3_1(input, /* with_builtin_tools= */ true);
|
||||
case COMMON_CHAT_FORMAT_DEEPSEEK_R1:
|
||||
return common_chat_parse_deepseek_r1(input);
|
||||
return common_chat_parse_deepseek_r1(input, /* extract_reasoning= */ false);
|
||||
case COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING:
|
||||
return common_chat_parse_deepseek_r1(input, /* extract_reasoning= */ true);
|
||||
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2:
|
||||
return common_chat_parse_functionary_v3_2(input);
|
||||
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1:
|
||||
|
@ -959,7 +1076,9 @@ common_chat_msg common_chat_parse(const std::string & input, common_chat_format
|
|||
case COMMON_CHAT_FORMAT_FIREFUNCTION_V2:
|
||||
return common_chat_parse_firefunction_v2(input);
|
||||
case COMMON_CHAT_FORMAT_COMMAND_R7B:
|
||||
return common_chat_parse_command_r7b(input);
|
||||
return common_chat_parse_command_r7b(input, /* extract_reasoning= */ false);
|
||||
case COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING:
|
||||
return common_chat_parse_command_r7b(input, /* extract_reasoning= */ true);
|
||||
default:
|
||||
throw std::runtime_error("Unsupported format: " + common_chat_format_name(format));
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@ struct common_chat_inputs {
|
|||
bool stream;
|
||||
std::string grammar;
|
||||
bool add_generation_prompt = true;
|
||||
bool extract_reasoning = true;
|
||||
};
|
||||
|
||||
enum common_chat_format {
|
||||
|
@ -28,11 +29,13 @@ enum common_chat_format {
|
|||
COMMON_CHAT_FORMAT_LLAMA_3_X,
|
||||
COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
|
||||
COMMON_CHAT_FORMAT_DEEPSEEK_R1,
|
||||
COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING,
|
||||
COMMON_CHAT_FORMAT_FIREFUNCTION_V2,
|
||||
COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2,
|
||||
COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
|
||||
COMMON_CHAT_FORMAT_HERMES_2_PRO,
|
||||
COMMON_CHAT_FORMAT_COMMAND_R7B,
|
||||
COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING,
|
||||
|
||||
COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
|
||||
};
|
||||
|
|
|
@ -202,6 +202,11 @@ struct common_params_vocoder {
|
|||
bool use_guide_tokens = false; // enable guide tokens to improve TTS accuracy // NOLINT
|
||||
};
|
||||
|
||||
enum common_reasoning_format {
|
||||
COMMON_REASONING_FORMAT_NONE,
|
||||
COMMON_REASONING_FORMAT_DEEPSEEK, // Extract thinking tag contents and return as `message.reasoning_content`
|
||||
};
|
||||
|
||||
struct common_params {
|
||||
int32_t n_predict = -1; // new tokens to predict
|
||||
int32_t n_ctx = 4096; // context size
|
||||
|
@ -346,6 +351,7 @@ struct common_params {
|
|||
std::string chat_template = ""; // NOLINT
|
||||
bool use_jinja = false; // NOLINT
|
||||
bool enable_chat_template = true;
|
||||
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
||||
|
||||
std::vector<std::string> api_keys;
|
||||
|
||||
|
@ -623,7 +629,7 @@ struct common_chat_msg {
|
|||
std::string role;
|
||||
std::string content;
|
||||
std::vector<common_tool_call> tool_calls;
|
||||
std::string tool_plan = "";
|
||||
std::string reasoning_content = "";
|
||||
};
|
||||
|
||||
// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
|
||||
|
|
|
@ -151,12 +151,6 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
|
|||
|
||||
lparams.no_perf = params.no_perf;
|
||||
|
||||
std::vector<const char *> trigger_words;
|
||||
trigger_words.reserve(params.grammar_trigger_words.size());
|
||||
for (const auto & str : params.grammar_trigger_words) {
|
||||
trigger_words.push_back(str.word.c_str());
|
||||
}
|
||||
|
||||
struct llama_sampler * grmr;
|
||||
if (params.grammar.compare(0, 11, "%llguidance") == 0) {
|
||||
#ifdef LLAMA_USE_LLGUIDANCE
|
||||
|
@ -165,6 +159,12 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
|
|||
GGML_ABORT("llguidance (cmake -DLLAMA_LLGUIDANCE=ON) is not enabled");
|
||||
#endif // LLAMA_USE_LLGUIDANCE
|
||||
} else {
|
||||
std::vector<const char *> trigger_words;
|
||||
trigger_words.reserve(params.grammar_trigger_words.size());
|
||||
for (const auto & str : params.grammar_trigger_words) {
|
||||
trigger_words.push_back(str.word.c_str());
|
||||
}
|
||||
|
||||
grmr = params.grammar_lazy
|
||||
? llama_sampler_init_grammar_lazy(vocab, params.grammar.c_str(), "root",
|
||||
trigger_words.data(), trigger_words.size(),
|
||||
|
|
|
@ -127,6 +127,7 @@ The project is under active development, and we are [looking for feedback and co
|
|||
| `--grammar-file FNAME` | file to read grammar from |
|
||||
| `-j, --json-schema SCHEMA` | JSON schema to constrain generations (https://json-schema.org/), e.g. `{}` for any JSON object<br/>For schemas w/ external $refs, use --grammar + example/json_schema_to_grammar.py instead |
|
||||
| `--jinja` | Enable experimental Jinja templating engine (required for tool use) |
|
||||
| `--reasoning-format FORMAT` | Controls extraction of model thinking traces and the format / field in which they are returned (default: `deepseek`; allowed values: `deepseek`, `none`; requires `--jinja`). `none` will leave thinking traces inline in `message.content` in a model-specific format, while `deepseek` will return them separately under `message.reasoning_content` |
|
||||
|
||||
**Example-specific params**
|
||||
|
||||
|
@ -1136,61 +1137,252 @@ curl http://localhost:8080/v1/chat/completions \
|
|||
|
||||
| Template | Format |
|
||||
|----------|--------|
|
||||
| CohereForAI-c4ai-command-r-plus-default.jinja | generic tool calls |
|
||||
| CohereForAI-c4ai-command-r-plus-rag.jinja | generic tool calls |
|
||||
| CohereForAI-c4ai-command-r-plus-tool_use.jinja | generic tool calls |
|
||||
| MiniMaxAI-MiniMax-Text-01.jinja | generic tool calls |
|
||||
| NexaAIDev-Octopus-v2.jinja | generic tool calls |
|
||||
| NousResearch-Hermes-2-Pro-Llama-3-8B-default.jinja | generic tool calls |
|
||||
| NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja | hermes 2 pro tool calls |
|
||||
| NousResearch-Hermes-2-Pro-Mistral-7B-default.jinja | generic tool calls |
|
||||
| NousResearch-Hermes-2-Pro-Mistral-7B-tool_use.jinja | hermes 2 pro tool calls |
|
||||
| NousResearch-Hermes-3-Llama-3.1-70B-default.jinja | generic tool calls |
|
||||
| NousResearch-Hermes-3-Llama-3.1-70B-tool_use.jinja | hermes 2 pro tool calls |
|
||||
| OrionStarAI-Orion-14B-Chat.jinja | generic tool calls |
|
||||
| Qwen-QwQ-32B-Preview.jinja | hermes 2 pro tool calls |
|
||||
| Qwen-Qwen2-7B-Instruct.jinja | generic tool calls |
|
||||
| Qwen-Qwen2-VL-7B-Instruct.jinja | generic tool calls |
|
||||
| Qwen-Qwen2.5-7B-Instruct.jinja | hermes 2 pro tool calls |
|
||||
| Qwen-Qwen2.5-Math-7B-Instruct.jinja | hermes 2 pro tool calls |
|
||||
| TheBloke-FusionNet_34Bx2_MoE-AWQ.jinja | generic tool calls |
|
||||
| abacusai-Fewshot-Metamath-OrcaVicuna-Mistral.jinja | generic tool calls |
|
||||
| bofenghuang-vigogne-2-70b-chat.jinja | generic tool calls |
|
||||
| databricks-dbrx-instruct.jinja | generic tool calls |
|
||||
| deepseek-ai-DeepSeek-Coder-V2-Instruct.jinja | generic tool calls |
|
||||
| deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja | deepseek r1 tool calls |
|
||||
| deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja | deepseek r1 tool calls |
|
||||
| deepseek-ai-DeepSeek-R1-Distill-Qwen-7B.jinja | deepseek r1 tool calls |
|
||||
| deepseek-ai-DeepSeek-V2.5.jinja | deepseek r1 tool calls |
|
||||
| deepseek-ai-deepseek-coder-33b-instruct.jinja | generic tool calls |
|
||||
| google-gemma-2-2b-it.jinja | generic tool calls |
|
||||
| google-gemma-7b-it.jinja | generic tool calls |
|
||||
| indischepartij-MiniCPM-3B-OpenHermes-2.5-v2.jinja | generic tool calls |
|
||||
| mattshumer-Reflection-Llama-3.1-70B.jinja | generic tool calls |
|
||||
| meetkai-functionary-medium-v3.2.jinja | functionary v3.2 tool calls |
|
||||
| meta-llama-Llama-3.1-8B-Instruct.jinja | llama 3.x tool calls (w/ builtin tools) |
|
||||
| meta-llama-Llama-3.2-3B-Instruct.jinja | llama 3.x tool calls |
|
||||
| meta-llama-Llama-3.3-70B-Instruct.jinja | llama 3.x tool calls (w/ builtin tools) |
|
||||
| meta-llama-Meta-Llama-3.1-8B-Instruct.jinja | llama 3.x tool calls (w/ builtin tools) |
|
||||
| microsoft-Phi-3-medium-4k-instruct.jinja | generic tool calls |
|
||||
| microsoft-Phi-3-mini-4k-instruct.jinja | generic tool calls |
|
||||
| microsoft-Phi-3-small-8k-instruct.jinja | generic tool calls |
|
||||
| microsoft-Phi-3.5-mini-instruct.jinja | generic tool calls |
|
||||
| microsoft-Phi-3.5-vision-instruct.jinja | generic tool calls |
|
||||
| mistralai-Mistral-7B-Instruct-v0.2.jinja | generic tool calls |
|
||||
| mistralai-Mistral-Large-Instruct-2407.jinja | mistral nemo tool calls |
|
||||
| mistralai-Mistral-Large-Instruct-2411.jinja | generic tool calls |
|
||||
| mistralai-Mistral-Nemo-Instruct-2407.jinja | mistral nemo tool calls |
|
||||
| mistralai-Mixtral-8x7B-Instruct-v0.1.jinja | generic tool calls |
|
||||
| mlabonne-AlphaMonarch-7B.jinja | generic tool calls |
|
||||
| nvidia-Llama-3.1-Nemotron-70B-Instruct-HF.jinja | llama 3.x tool calls (w/ builtin tools) |
|
||||
| openchat-openchat-3.5-0106.jinja | generic tool calls |
|
||||
| teknium-OpenHermes-2.5-Mistral-7B.jinja | generic tool calls |
|
||||
| Almawave-Velvet-14B.jinja | Hermes 2 Pro |
|
||||
| AtlaAI-Selene-1-Mini-Llama-3.1-8B.jinja | Llama 3.x |
|
||||
| CohereForAI-aya-expanse-8b.jinja | Generic |
|
||||
| CohereForAI-c4ai-command-r-plus-default.jinja | Generic |
|
||||
| CohereForAI-c4ai-command-r-plus-rag.jinja | Generic |
|
||||
| CohereForAI-c4ai-command-r-plus-tool_use.jinja | Generic |
|
||||
| CohereForAI-c4ai-command-r7b-12-2024-default.jinja | Command R7B (extract reasoning) |
|
||||
| CohereForAI-c4ai-command-r7b-12-2024-rag.jinja | Command R7B (extract reasoning) |
|
||||
| CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja | Command R7B (extract reasoning) |
|
||||
| CohereForAI-c4ai-command-r7b-12-2024.jinja | Generic |
|
||||
| DavieLion-Llama-3.2-1B-SPIN-iter3.jinja | Generic |
|
||||
| Delta-Vector-Rei-12B.jinja | Mistral Nemo |
|
||||
| EpistemeAI-Mistral-Nemo-Instruct-12B-Philosophy-Math.jinja | Mistral Nemo |
|
||||
| FlofloB-83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit.jinja | Hermes 2 Pro |
|
||||
| FlofloB-test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit.jinja | Generic |
|
||||
| HelpingAI-HAI-SER.jinja | Generic |
|
||||
| HuggingFaceTB-SmolLM2-1.7B-Instruct.jinja | Generic |
|
||||
| HuggingFaceTB-SmolLM2-135M-Instruct.jinja | Generic |
|
||||
| HuggingFaceTB-SmolLM2-360M-Instruct.jinja | Generic |
|
||||
| INSAIT-Institute-BgGPT-Gemma-2-27B-IT-v1.0.jinja | Generic |
|
||||
| Ihor-Text2Graph-R1-Qwen2.5-0.5b.jinja | Hermes 2 Pro |
|
||||
| Infinigence-Megrez-3B-Instruct.jinja | Generic |
|
||||
| Josephgflowers-TinyLlama_v1.1_math_code-world-test-1.jinja | Generic |
|
||||
| LGAI-EXAONE-EXAONE-3.5-2.4B-Instruct.jinja | Generic |
|
||||
| LGAI-EXAONE-EXAONE-3.5-7.8B-Instruct.jinja | Generic |
|
||||
| LatitudeGames-Wayfarer-12B.jinja | Generic |
|
||||
| Magpie-Align-Llama-3-8B-Magpie-Align-v0.1.jinja | Generic |
|
||||
| Magpie-Align-Llama-3.1-8B-Magpie-Align-v0.1.jinja | Generic |
|
||||
| MaziyarPanahi-calme-3.2-instruct-78b.jinja | Generic |
|
||||
| MiniMaxAI-MiniMax-Text-01.jinja | Generic |
|
||||
| MiniMaxAI-MiniMax-VL-01.jinja | Generic |
|
||||
| NaniDAO-deepseek-r1-qwen-2.5-32B-ablated.jinja | DeepSeek R1 (extract reasoning) |
|
||||
| NexaAIDev-Octopus-v2.jinja | Generic |
|
||||
| NousResearch-Hermes-2-Pro-Llama-3-8B-default.jinja | Generic |
|
||||
| NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja | Hermes 2 Pro |
|
||||
| NousResearch-Hermes-2-Pro-Mistral-7B-default.jinja | Generic |
|
||||
| NousResearch-Hermes-2-Pro-Mistral-7B-tool_use.jinja | Hermes 2 Pro |
|
||||
| NousResearch-Hermes-3-Llama-3.1-70B-default.jinja | Generic |
|
||||
| NousResearch-Hermes-3-Llama-3.1-70B-tool_use.jinja | Hermes 2 Pro |
|
||||
| NovaSky-AI-Sky-T1-32B-Flash.jinja | Hermes 2 Pro |
|
||||
| NovaSky-AI-Sky-T1-32B-Preview.jinja | Hermes 2 Pro |
|
||||
| OnlyCheeini-greesychat-turbo.jinja | Generic |
|
||||
| Orenguteng-Llama-3.1-8B-Lexi-Uncensored-V2.jinja | Llama 3.x |
|
||||
| OrionStarAI-Orion-14B-Chat.jinja | Generic |
|
||||
| PowerInfer-SmallThinker-3B-Preview.jinja | Generic |
|
||||
| PrimeIntellect-INTELLECT-1-Instruct.jinja | Generic |
|
||||
| Qwen-QVQ-72B-Preview.jinja | Generic |
|
||||
| Qwen-QwQ-32B-Preview.jinja | Hermes 2 Pro |
|
||||
| Qwen-Qwen1.5-7B-Chat.jinja | Generic |
|
||||
| Qwen-Qwen2-7B-Instruct.jinja | Generic |
|
||||
| Qwen-Qwen2-VL-72B-Instruct.jinja | Generic |
|
||||
| Qwen-Qwen2-VL-7B-Instruct.jinja | Generic |
|
||||
| Qwen-Qwen2.5-0.5B.jinja | Hermes 2 Pro |
|
||||
| Qwen-Qwen2.5-1.5B-Instruct.jinja | Hermes 2 Pro |
|
||||
| Qwen-Qwen2.5-14B-Instruct-1M.jinja | Hermes 2 Pro |
|
||||
| Qwen-Qwen2.5-14B.jinja | Hermes 2 Pro |
|
||||
| Qwen-Qwen2.5-32B-Instruct.jinja | Hermes 2 Pro |
|
||||
| Qwen-Qwen2.5-32B.jinja | Hermes 2 Pro |
|
||||
| Qwen-Qwen2.5-3B-Instruct.jinja | Hermes 2 Pro |
|
||||
| Qwen-Qwen2.5-72B-Instruct.jinja | Hermes 2 Pro |
|
||||
| Qwen-Qwen2.5-7B-Instruct-1M.jinja | Hermes 2 Pro |
|
||||
| Qwen-Qwen2.5-7B-Instruct.jinja | Hermes 2 Pro |
|
||||
| Qwen-Qwen2.5-7B.jinja | Hermes 2 Pro |
|
||||
| Qwen-Qwen2.5-Coder-32B-Instruct.jinja | Hermes 2 Pro |
|
||||
| Qwen-Qwen2.5-Coder-7B-Instruct.jinja | Hermes 2 Pro |
|
||||
| Qwen-Qwen2.5-Math-1.5B.jinja | Hermes 2 Pro |
|
||||
| Qwen-Qwen2.5-Math-7B-Instruct.jinja | Hermes 2 Pro |
|
||||
| Qwen-Qwen2.5-VL-3B-Instruct.jinja | Hermes 2 Pro |
|
||||
| Qwen-Qwen2.5-VL-72B-Instruct.jinja | Hermes 2 Pro |
|
||||
| Qwen-Qwen2.5-VL-7B-Instruct.jinja | Hermes 2 Pro |
|
||||
| RWKV-Red-Team-ARWKV-7B-Preview-0.1.jinja | Hermes 2 Pro |
|
||||
| SakanaAI-TinySwallow-1.5B-Instruct.jinja | Hermes 2 Pro |
|
||||
| SakanaAI-TinySwallow-1.5B.jinja | Hermes 2 Pro |
|
||||
| Sao10K-70B-L3.3-Cirrus-x1.jinja | Llama 3.x |
|
||||
| SentientAGI-Dobby-Mini-Leashed-Llama-3.1-8B.jinja | Llama 3.x |
|
||||
| SentientAGI-Dobby-Mini-Unhinged-Llama-3.1-8B.jinja | Llama 3.x |
|
||||
| Steelskull-L3.3-Damascus-R1.jinja | Llama 3.x |
|
||||
| Steelskull-L3.3-MS-Nevoria-70b.jinja | Llama 3.x |
|
||||
| Steelskull-L3.3-Nevoria-R1-70b.jinja | Llama 3.x |
|
||||
| THUDM-glm-4-9b-chat.jinja | Generic |
|
||||
| THUDM-glm-edge-1.5b-chat.jinja | Generic |
|
||||
| Tarek07-Progenitor-V1.1-LLaMa-70B.jinja | Llama 3.x |
|
||||
| TheBloke-FusionNet_34Bx2_MoE-AWQ.jinja | Generic |
|
||||
| TinyLlama-TinyLlama-1.1B-Chat-v1.0.jinja | Generic |
|
||||
| UCLA-AGI-Mistral7B-PairRM-SPPO-Iter3.jinja | Generic |
|
||||
| ValiantLabs-Llama3.1-8B-Enigma.jinja | Llama 3.x |
|
||||
| abacusai-Fewshot-Metamath-OrcaVicuna-Mistral.jinja | Generic |
|
||||
| ai21labs-AI21-Jamba-1.5-Large.jinja | Generic |
|
||||
| allenai-Llama-3.1-Tulu-3-405B-SFT.jinja | Generic |
|
||||
| allenai-Llama-3.1-Tulu-3-405B.jinja | Generic |
|
||||
| allenai-Llama-3.1-Tulu-3-8B.jinja | Generic |
|
||||
| arcee-ai-Virtuoso-Lite.jinja | Hermes 2 Pro |
|
||||
| arcee-ai-Virtuoso-Medium-v2.jinja | Hermes 2 Pro |
|
||||
| arcee-ai-Virtuoso-Small-v2.jinja | Hermes 2 Pro |
|
||||
| avemio-GRAG-NEMO-12B-ORPO-HESSIAN-AI.jinja | Generic |
|
||||
| bespokelabs-Bespoke-Stratos-7B.jinja | Hermes 2 Pro |
|
||||
| bfuzzy1-acheron-m1a-llama.jinja | Generic |
|
||||
| bofenghuang-vigogne-2-70b-chat.jinja | Generic |
|
||||
| bytedance-research-UI-TARS-72B-DPO.jinja | Generic |
|
||||
| bytedance-research-UI-TARS-7B-DPO.jinja | Generic |
|
||||
| bytedance-research-UI-TARS-7B-SFT.jinja | Generic |
|
||||
| carsenk-phi3.5_mini_exp_825_uncensored.jinja | Generic |
|
||||
| cyberagent-DeepSeek-R1-Distill-Qwen-14B-Japanese.jinja | DeepSeek R1 (extract reasoning) |
|
||||
| cyberagent-DeepSeek-R1-Distill-Qwen-32B-Japanese.jinja | DeepSeek R1 (extract reasoning) |
|
||||
| databricks-dbrx-instruct.jinja | Generic |
|
||||
| deepseek-ai-DeepSeek-Coder-V2-Instruct.jinja | Generic |
|
||||
| deepseek-ai-DeepSeek-Coder-V2-Lite-Base.jinja | Generic |
|
||||
| deepseek-ai-DeepSeek-Coder-V2-Lite-Instruct.jinja | Generic |
|
||||
| deepseek-ai-DeepSeek-R1-Distill-Llama-70B.jinja | DeepSeek R1 (extract reasoning) |
|
||||
| deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja | DeepSeek R1 (extract reasoning) |
|
||||
| deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B.jinja | DeepSeek R1 (extract reasoning) |
|
||||
| deepseek-ai-DeepSeek-R1-Distill-Qwen-14B.jinja | DeepSeek R1 (extract reasoning) |
|
||||
| deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja | DeepSeek R1 (extract reasoning) |
|
||||
| deepseek-ai-DeepSeek-R1-Distill-Qwen-7B.jinja | DeepSeek R1 (extract reasoning) |
|
||||
| deepseek-ai-DeepSeek-R1-Zero.jinja | DeepSeek R1 (extract reasoning) |
|
||||
| deepseek-ai-DeepSeek-R1.jinja | DeepSeek R1 (extract reasoning) |
|
||||
| deepseek-ai-DeepSeek-V2-Lite.jinja | Generic |
|
||||
| deepseek-ai-DeepSeek-V2.5.jinja | DeepSeek R1 (extract reasoning) |
|
||||
| deepseek-ai-DeepSeek-V3.jinja | DeepSeek R1 (extract reasoning) |
|
||||
| deepseek-ai-deepseek-coder-33b-instruct.jinja | Generic |
|
||||
| deepseek-ai-deepseek-coder-6.7b-instruct.jinja | Generic |
|
||||
| deepseek-ai-deepseek-coder-7b-instruct-v1.5.jinja | Generic |
|
||||
| deepseek-ai-deepseek-llm-67b-chat.jinja | Generic |
|
||||
| deepseek-ai-deepseek-llm-7b-chat.jinja | Generic |
|
||||
| dicta-il-dictalm2.0-instruct.jinja | Generic |
|
||||
| ehristoforu-Falcon3-8B-Franken-Basestruct.jinja | Hermes 2 Pro |
|
||||
| fireworks-ai-llama-3-firefunction-v2.jinja | FireFunction v2 |
|
||||
| godlikehhd-alpaca_data_sampled_ifd_new_5200.jinja | Hermes 2 Pro |
|
||||
| godlikehhd-alpaca_data_score_max_0.7_2600.jinja | Hermes 2 Pro |
|
||||
| google-gemma-2-27b-it.jinja | Generic |
|
||||
| google-gemma-2-2b-it.jinja | Generic |
|
||||
| google-gemma-2-2b-jpn-it.jinja | Generic |
|
||||
| google-gemma-7b-it.jinja | Generic |
|
||||
| huihui-ai-DeepSeek-R1-Distill-Llama-70B-abliterated.jinja | DeepSeek R1 (extract reasoning) |
|
||||
| huihui-ai-DeepSeek-R1-Distill-Llama-8B-abliterated.jinja | DeepSeek R1 (extract reasoning) |
|
||||
| huihui-ai-DeepSeek-R1-Distill-Qwen-14B-abliterated-v2.jinja | DeepSeek R1 (extract reasoning) |
|
||||
| huihui-ai-DeepSeek-R1-Distill-Qwen-32B-abliterated.jinja | DeepSeek R1 (extract reasoning) |
|
||||
| huihui-ai-DeepSeek-R1-Distill-Qwen-7B-abliterated-v2.jinja | DeepSeek R1 (extract reasoning) |
|
||||
| huihui-ai-Qwen2.5-14B-Instruct-1M-abliterated.jinja | Hermes 2 Pro |
|
||||
| ibm-granite-granite-3.1-8b-instruct.jinja | Generic |
|
||||
| indischepartij-MiniCPM-3B-OpenHermes-2.5-v2.jinja | Generic |
|
||||
| inflatebot-MN-12B-Mag-Mell-R1.jinja | Generic |
|
||||
| jinaai-ReaderLM-v2.jinja | Generic |
|
||||
| kms7530-chemeng_qwen-math-7b_24_1_100_1_nonmath.jinja | Hermes 2 Pro |
|
||||
| knifeayumu-Cydonia-v1.3-Magnum-v4-22B.jinja | Mistral Nemo |
|
||||
| langgptai-qwen1.5-7b-chat-sa-v0.1.jinja | Generic |
|
||||
| lightblue-DeepSeek-R1-Distill-Qwen-7B-Japanese.jinja | DeepSeek R1 (extract reasoning) |
|
||||
| mattshumer-Reflection-Llama-3.1-70B.jinja | Generic |
|
||||
| meetkai-functionary-medium-v3.1.jinja | Functionary v3.1 Llama 3.1 |
|
||||
| meetkai-functionary-medium-v3.2.jinja | Functionary v3.2 |
|
||||
| meta-llama-Llama-2-7b-chat-hf.jinja | Generic |
|
||||
| meta-llama-Llama-3.1-8B-Instruct.jinja | Llama 3.x |
|
||||
| meta-llama-Llama-3.2-11B-Vision-Instruct.jinja | Llama 3.x |
|
||||
| meta-llama-Llama-3.2-1B-Instruct.jinja | Llama 3.x |
|
||||
| meta-llama-Llama-3.2-3B-Instruct.jinja | Llama 3.x |
|
||||
| meta-llama-Llama-3.3-70B-Instruct.jinja | Llama 3.x |
|
||||
| meta-llama-Meta-Llama-3-8B-Instruct.jinja | Generic |
|
||||
| meta-llama-Meta-Llama-3.1-8B-Instruct.jinja | Llama 3.x |
|
||||
| microsoft-Phi-3-medium-4k-instruct.jinja | Generic |
|
||||
| microsoft-Phi-3-mini-4k-instruct.jinja | Generic |
|
||||
| microsoft-Phi-3-small-8k-instruct.jinja | Generic |
|
||||
| microsoft-Phi-3.5-mini-instruct.jinja | Generic |
|
||||
| microsoft-Phi-3.5-vision-instruct.jinja | Generic |
|
||||
| microsoft-phi-4.jinja | Generic |
|
||||
| migtissera-Tess-3-Mistral-Nemo-12B.jinja | Generic |
|
||||
| ministral-Ministral-3b-instruct.jinja | Generic |
|
||||
| mistralai-Codestral-22B-v0.1.jinja | Generic |
|
||||
| mistralai-Mistral-7B-Instruct-v0.1.jinja | Generic |
|
||||
| mistralai-Mistral-7B-Instruct-v0.2.jinja | Generic |
|
||||
| mistralai-Mistral-7B-Instruct-v0.3.jinja | Mistral Nemo |
|
||||
| mistralai-Mistral-Large-Instruct-2407.jinja | Mistral Nemo |
|
||||
| mistralai-Mistral-Large-Instruct-2411.jinja | Generic |
|
||||
| mistralai-Mistral-Nemo-Instruct-2407.jinja | Mistral Nemo |
|
||||
| mistralai-Mistral-Small-24B-Instruct-2501.jinja | Generic |
|
||||
| mistralai-Mixtral-8x7B-Instruct-v0.1.jinja | Generic |
|
||||
| mkurman-Qwen2.5-14B-DeepSeek-R1-1M.jinja | Hermes 2 Pro |
|
||||
| mlabonne-AlphaMonarch-7B.jinja | Generic |
|
||||
| mlx-community-Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1-float32.jinja | Hermes 2 Pro |
|
||||
| mlx-community-Qwen2.5-VL-7B-Instruct-8bit.jinja | Hermes 2 Pro |
|
||||
| mobiuslabsgmbh-DeepSeek-R1-ReDistill-Qwen-1.5B-v1.1.jinja | DeepSeek R1 (extract reasoning) |
|
||||
| netcat420-MFANNv0.20.jinja | Generic |
|
||||
| netcat420-MFANNv0.24.jinja | Generic |
|
||||
| netease-youdao-Confucius-o1-14B.jinja | Hermes 2 Pro |
|
||||
| nvidia-AceMath-7B-RM.jinja | Hermes 2 Pro |
|
||||
| nvidia-Eagle2-1B.jinja | Hermes 2 Pro |
|
||||
| nvidia-Eagle2-9B.jinja | Hermes 2 Pro |
|
||||
| nvidia-Llama-3.1-Nemotron-70B-Instruct-HF.jinja | Llama 3.x |
|
||||
| onnx-community-DeepSeek-R1-Distill-Qwen-1.5B-ONNX.jinja | DeepSeek R1 (extract reasoning) |
|
||||
| open-thoughts-OpenThinker-7B.jinja | Hermes 2 Pro |
|
||||
| openchat-openchat-3.5-0106.jinja | Generic |
|
||||
| pankajmathur-orca_mini_v6_8b.jinja | Generic |
|
||||
| princeton-nlp-Mistral-7B-Base-SFT-RDPO.jinja | Generic |
|
||||
| princeton-nlp-Mistral-7B-Instruct-DPO.jinja | Generic |
|
||||
| princeton-nlp-Mistral-7B-Instruct-RDPO.jinja | Generic |
|
||||
| prithivMLmods-Bellatrix-Tiny-1.5B-R1.jinja | Hermes 2 Pro |
|
||||
| prithivMLmods-Bellatrix-Tiny-1B-R1.jinja | Llama 3.x |
|
||||
| prithivMLmods-Bellatrix-Tiny-1B-v3.jinja | Generic |
|
||||
| prithivMLmods-Bellatrix-Tiny-3B-R1.jinja | Llama 3.x |
|
||||
| prithivMLmods-Blaze-14B-xElite.jinja | Generic |
|
||||
| prithivMLmods-Calcium-Opus-14B-Elite2-R1.jinja | Hermes 2 Pro |
|
||||
| prithivMLmods-Calme-Ties-78B.jinja | Generic |
|
||||
| prithivMLmods-Calme-Ties2-78B.jinja | Generic |
|
||||
| prithivMLmods-Calme-Ties3-78B.jinja | Generic |
|
||||
| prithivMLmods-ChemQwen2-vL.jinja | Generic |
|
||||
| prithivMLmods-GWQ2b.jinja | Generic |
|
||||
| prithivMLmods-LatexMind-2B-Codec.jinja | Generic |
|
||||
| prithivMLmods-Llama-3.2-6B-AlgoCode.jinja | Llama 3.x |
|
||||
| prithivMLmods-Megatron-Opus-14B-Exp.jinja | Hermes 2 Pro |
|
||||
| prithivMLmods-Megatron-Opus-14B-Stock.jinja | Hermes 2 Pro |
|
||||
| prithivMLmods-Megatron-Opus-7B-Exp.jinja | Hermes 2 Pro |
|
||||
| prithivMLmods-Omni-Reasoner-Merged.jinja | Hermes 2 Pro |
|
||||
| prithivMLmods-Omni-Reasoner4-Merged.jinja | Hermes 2 Pro |
|
||||
| prithivMLmods-Primal-Opus-14B-Optimus-v1.jinja | Hermes 2 Pro |
|
||||
| prithivMLmods-QwQ-Math-IO-500M.jinja | Hermes 2 Pro |
|
||||
| prithivMLmods-Qwen-7B-Distill-Reasoner.jinja | DeepSeek R1 (extract reasoning) |
|
||||
| prithivMLmods-Qwen2.5-1.5B-DeepSeek-R1-Instruct.jinja | Hermes 2 Pro |
|
||||
| prithivMLmods-Qwen2.5-14B-DeepSeek-R1-1M.jinja | Hermes 2 Pro |
|
||||
| prithivMLmods-Qwen2.5-32B-DeepSeek-R1-Instruct.jinja | Hermes 2 Pro |
|
||||
| prithivMLmods-Qwen2.5-7B-DeepSeek-R1-1M.jinja | Hermes 2 Pro |
|
||||
| prithivMLmods-Triangulum-v2-10B.jinja | Hermes 2 Pro |
|
||||
| qingy2024-Falcon3-2x10B-MoE-Instruct.jinja | Hermes 2 Pro |
|
||||
| rubenroy-Zurich-14B-GCv2-5m.jinja | Hermes 2 Pro |
|
||||
| rubenroy-Zurich-7B-GCv2-5m.jinja | Hermes 2 Pro |
|
||||
| silma-ai-SILMA-Kashif-2B-Instruct-v1.0.jinja | Generic |
|
||||
| simplescaling-s1-32B.jinja | Hermes 2 Pro |
|
||||
| sometimesanotion-Lamarck-14B-v0.7.jinja | Hermes 2 Pro |
|
||||
| sonthenguyen-zephyr-sft-bnb-4bit-DPO-mtbr-180steps.jinja | Generic |
|
||||
| sthenno-tempesthenno-icy-0130.jinja | Generic |
|
||||
| sumink-qwft.jinja | Hermes 2 Pro |
|
||||
| teknium-OpenHermes-2.5-Mistral-7B.jinja | Generic |
|
||||
| thirdeyeai-elevate360m.jinja | Generic |
|
||||
| tiiuae-Falcon3-10B-Instruct.jinja | Hermes 2 Pro |
|
||||
| unsloth-DeepSeek-R1-Distill-Llama-8B-unsloth-bnb-4bit.jinja | DeepSeek R1 (extract reasoning) |
|
||||
| unsloth-DeepSeek-R1-Distill-Llama-8B.jinja | DeepSeek R1 (extract reasoning) |
|
||||
| unsloth-DeepSeek-R1.jinja | DeepSeek R1 (extract reasoning) |
|
||||
| unsloth-Mistral-Small-24B-Instruct-2501-unsloth-bnb-4bit.jinja | Generic |
|
||||
| upstage-solar-pro-preview-instruct.jinja | Generic |
|
||||
| whyhow-ai-PatientSeek.jinja | Generic |
|
||||
| xwen-team-Xwen-72B-Chat.jinja | Hermes 2 Pro |
|
||||
| xwen-team-Xwen-7B-Chat.jinja | Hermes 2 Pro |
|
||||
|
||||
This table can be generated with:
|
||||
|
||||
```bash
|
||||
./build/bin/test-chat ../minja/build/tests/*.jinja 2>/dev/null
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
|
@ -1202,11 +1394,20 @@ curl http://localhost:8080/v1/chat/completions \
|
|||
|
||||
```shell
|
||||
# Native support:
|
||||
|
||||
llama-server --jinja -fa -hf bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M
|
||||
llama-server --jinja -fa -hf bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q6_K_L
|
||||
llama-server --jinja -fa -hf bartowski/functionary-small-v3.2-GGUF:Q4_K_M
|
||||
llama-server --jinja -fa -hf bartowski/Llama-3.3-70B-Instruct-GGUF:Q4_K_M
|
||||
|
||||
# Native support for DeepSeek R1 works best w/ our own template (official template buggy)
|
||||
|
||||
llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q6_K_L \
|
||||
--chat-template-file models/templates/llama-cpp-deepseek-r1.jinja
|
||||
|
||||
llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q4_K_M \
|
||||
--chat-template-file models/templates/llama-cpp-deepseek-r1.jinja
|
||||
|
||||
# Native support requires the right template for these GGUFs:
|
||||
|
||||
llama-server --jinja -fa -hf bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M \
|
||||
|
|
|
@ -173,6 +173,7 @@ struct slot_params {
|
|||
{"grammar_trigger_words", grammar_trigger_words},
|
||||
{"grammar_trigger_tokens", sampling.grammar_trigger_tokens},
|
||||
{"preserved_tokens", sampling.preserved_tokens},
|
||||
{"chat_format", common_chat_format_name(oaicompat_chat_format)},
|
||||
{"samplers", samplers},
|
||||
{"speculative.n_max", speculative.n_max},
|
||||
{"speculative.n_min", speculative.n_min},
|
||||
|
@ -724,9 +725,19 @@ struct server_task_result_cmpl_final : server_task_result {
|
|||
msg.content = content;
|
||||
}
|
||||
|
||||
json tool_calls;
|
||||
json message {
|
||||
{"role", "assistant"},
|
||||
};
|
||||
if (!msg.reasoning_content.empty()) {
|
||||
message["reasoning_content"] = msg.reasoning_content;
|
||||
}
|
||||
if (msg.content == "" && !msg.tool_calls.empty()) {
|
||||
message["content"] = json();
|
||||
} else {
|
||||
message["content"] = msg.content;
|
||||
}
|
||||
if (!msg.tool_calls.empty()) {
|
||||
tool_calls = json::array();
|
||||
auto tool_calls = json::array();
|
||||
for (const auto & tc : msg.tool_calls) {
|
||||
tool_calls.push_back({
|
||||
{"type", "function"},
|
||||
|
@ -737,15 +748,7 @@ struct server_task_result_cmpl_final : server_task_result {
|
|||
{"id", tc.id},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
json message {
|
||||
{"content", msg.content},
|
||||
{"tool_calls", tool_calls},
|
||||
{"role", "assistant"},
|
||||
};
|
||||
if (!msg.tool_plan.empty()) {
|
||||
message["tool_plan"] = msg.tool_plan;
|
||||
message["tool_calls"] = tool_calls;
|
||||
}
|
||||
|
||||
json choice {
|
||||
|
@ -4056,7 +4059,7 @@ int main(int argc, char ** argv) {
|
|||
}
|
||||
|
||||
auto body = json::parse(req.body);
|
||||
json data = oaicompat_completion_params_parse(body, params.use_jinja, ctx_server.chat_templates);
|
||||
json data = oaicompat_completion_params_parse(body, params.use_jinja, params.reasoning_format, ctx_server.chat_templates);
|
||||
|
||||
return handle_completions_impl(
|
||||
SERVER_TASK_TYPE_COMPLETION,
|
||||
|
@ -4069,7 +4072,7 @@ int main(int argc, char ** argv) {
|
|||
// same with handle_chat_completions, but without inference part
|
||||
const auto handle_apply_template = [&ctx_server, ¶ms, &res_ok](const httplib::Request & req, httplib::Response & res) {
|
||||
auto body = json::parse(req.body);
|
||||
json data = oaicompat_completion_params_parse(body, params.use_jinja, ctx_server.chat_templates);
|
||||
json data = oaicompat_completion_params_parse(body, params.use_jinja, params.reasoning_format, ctx_server.chat_templates);
|
||||
res_ok(res, {{ "prompt", std::move(data.at("prompt")) }});
|
||||
};
|
||||
|
||||
|
|
|
@ -92,6 +92,7 @@ def do_test_completion_with_required_tool_tiny(template_name: str, tool: dict, a
|
|||
tool_calls = choice["message"].get("tool_calls")
|
||||
assert tool_calls and len(tool_calls) == 1, f'Expected 1 tool call in {choice["message"]}'
|
||||
tool_call = tool_calls[0]
|
||||
assert choice["message"].get("content") is None, f'Expected no content in {choice["message"]}'
|
||||
expected_function_name = "python" if tool["type"] == "code_interpreter" else tool["function"]["name"]
|
||||
assert expected_function_name == tool_call["function"]["name"]
|
||||
actual_arguments = tool_call["function"]["arguments"]
|
||||
|
@ -155,11 +156,11 @@ def test_completion_with_required_tool_tiny_slow(template_name: str, tool: dict,
|
|||
|
||||
(TEST_TOOL, "success", "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")),
|
||||
(PYTHON_TOOL, "code", "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")),
|
||||
(PYTHON_TOOL, "code", "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", "chatml"),
|
||||
# (PYTHON_TOOL, "code", "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", "chatml"),
|
||||
|
||||
(TEST_TOOL, "success", "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-3-Llama-3.1-8B", "tool_use")),
|
||||
(PYTHON_TOOL, "code", "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-3-Llama-3.1-8B", "tool_use")),
|
||||
(PYTHON_TOOL, "code", "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", "chatml"),
|
||||
# (PYTHON_TOOL, "code", "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", "chatml"),
|
||||
|
||||
(TEST_TOOL, "success", "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None),
|
||||
(PYTHON_TOOL, "code", "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None),
|
||||
|
@ -175,7 +176,7 @@ def test_completion_with_required_tool_tiny_slow(template_name: str, tool: dict,
|
|||
|
||||
(TEST_TOOL, "success", "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)),
|
||||
(PYTHON_TOOL, "code", "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)),
|
||||
(PYTHON_TOOL, "code", "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", "chatml"),
|
||||
# (PYTHON_TOOL, "code", "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", "chatml"),
|
||||
# TODO: fix these
|
||||
# (TEST_TOOL, "success", "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
|
||||
# (PYTHON_TOOL, "code", "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
|
||||
|
@ -214,6 +215,7 @@ def test_completion_with_required_tool_real_model(tool: dict, argument_key: str
|
|||
tool_calls = choice["message"].get("tool_calls")
|
||||
assert tool_calls and len(tool_calls) == 1, f'Expected 1 tool call in {choice["message"]}'
|
||||
tool_call = tool_calls[0]
|
||||
assert choice["message"].get("content") is None, f'Expected no content in {choice["message"]}'
|
||||
expected_function_name = "python" if tool["type"] == "code_interpreter" else tool["function"]["name"]
|
||||
assert expected_function_name == tool_call["function"]["name"]
|
||||
actual_arguments = tool_call["function"]["arguments"]
|
||||
|
@ -273,7 +275,6 @@ def test_completion_without_tool_call_slow(template_name: str, n_predict: int, t
|
|||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.parametrize("hf_repo,template_override", [
|
||||
("bartowski/c4ai-command-r7b-12-2024-GGUF:Q4_K_M", ("CohereForAI/c4ai-command-r7b-12-2024", "tool_use")),
|
||||
("bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", None),
|
||||
("bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", "chatml"),
|
||||
|
||||
|
@ -298,13 +299,16 @@ def test_completion_without_tool_call_slow(template_name: str, n_predict: int, t
|
|||
("bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)),
|
||||
("bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", "chatml"),
|
||||
|
||||
("bartowski/c4ai-command-r7b-12-2024-GGUF:Q6_K_L", ("CohereForAI/c4ai-command-r7b-12-2024", "tool_use")),
|
||||
|
||||
("bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
|
||||
|
||||
# Note: gemma-2-2b-it knows itself as "model", not "assistant", so we don't test the ill-suited chatml on it.
|
||||
("bartowski/gemma-2-2b-it-GGUF:Q4_K_M", None),
|
||||
|
||||
# ("bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)),
|
||||
# ("bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
|
||||
])
|
||||
def test_weather(hf_repo: str, template_override: Tuple[str, str | None] | None):
|
||||
def test_weather(hf_repo: str, template_override: str | Tuple[str, str | None] | None):
|
||||
global server
|
||||
n_predict = 512
|
||||
server.n_slots = 1
|
||||
|
@ -323,6 +327,7 @@ def test_weather(hf_repo: str, template_override: Tuple[str, str | None] | None)
|
|||
res = server.make_request("POST", "/chat/completions", data={
|
||||
"max_tokens": n_predict,
|
||||
"messages": [
|
||||
{"role": "system", "content": "You are a chatbot that uses tools/functions. Dont overthink things."},
|
||||
{"role": "user", "content": "What is the weather in Istanbul?"},
|
||||
],
|
||||
"tools": [WEATHER_TOOL],
|
||||
|
@ -332,6 +337,7 @@ def test_weather(hf_repo: str, template_override: Tuple[str, str | None] | None)
|
|||
tool_calls = choice["message"].get("tool_calls")
|
||||
assert tool_calls and len(tool_calls) == 1, f'Expected 1 tool call in {choice["message"]}'
|
||||
tool_call = tool_calls[0]
|
||||
assert choice["message"].get("content") is None, f'Expected no content in {choice["message"]}'
|
||||
assert tool_call["function"]["name"] == WEATHER_TOOL["function"]["name"]
|
||||
actual_arguments = json.loads(tool_call["function"]["arguments"])
|
||||
assert 'location' in actual_arguments, f"location not found in {json.dumps(actual_arguments)}"
|
||||
|
@ -340,22 +346,166 @@ def test_weather(hf_repo: str, template_override: Tuple[str, str | None] | None)
|
|||
assert re.match('^Istanbul(, (TR|Turkey|Türkiye))?$', location), f'Expected Istanbul for location, got {location}'
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.parametrize("result_override,n_predict,hf_repo,template_override", [
|
||||
(None, 128, "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", "chatml"),
|
||||
(None, 128, "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", None),
|
||||
(None, 128, "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", "chatml"),
|
||||
(None, 128, "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")),
|
||||
(None, 128, "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-3-Llama-3.1-8B", "tool_use")),
|
||||
(None, 128, "bartowski/functionary-small-v3.2-GGUF:Q8_0", ("meetkai/functionary-medium-v3.2", None)),
|
||||
(None, 128, "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", None),
|
||||
(None, 128, "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None),
|
||||
("^> 0.56$", 128, "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", "chatml"),
|
||||
(None, 128, "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None),
|
||||
|
||||
# TODO: fix these (wrong results, either didn't respect decimal instruction or got wrong value)
|
||||
("^The y-coordinate [\\s\\S]*?\\*\\*0.5\\*\\*", 8192, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
|
||||
("[\\s\\S]*?\\*\\*0\\.5\\*\\*", 8192, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", ("llama-cpp-deepseek-r1", None)),
|
||||
])
|
||||
def test_calc_result(result_override: str | None, n_predict: int, hf_repo: str, template_override: str | Tuple[str, str | None] | None):
|
||||
global server
|
||||
# n_predict = 512
|
||||
server.n_slots = 1
|
||||
server.jinja = True
|
||||
server.n_ctx = 8192 * 2
|
||||
server.n_predict = n_predict
|
||||
server.model_hf_repo = hf_repo
|
||||
server.model_hf_file = None
|
||||
if isinstance(template_override, tuple):
|
||||
(template_hf_repo, template_variant) = template_override
|
||||
server.chat_template_file = f"../../../models/templates/{template_hf_repo.replace('/', '-') + ('-' + template_variant if template_variant else '')}.jinja"
|
||||
assert os.path.exists(server.chat_template_file), f"Template file {server.chat_template_file} does not exist. Run `python scripts/get_chat_template.py {template_hf_repo} {template_variant} > {server.chat_template_file}` to download the template."
|
||||
elif isinstance(template_override, str):
|
||||
server.chat_template = template_override
|
||||
server.start(timeout_seconds=TIMEOUT_SERVER_START)
|
||||
res = server.make_request("POST", "/chat/completions", data={
|
||||
"max_tokens": n_predict,
|
||||
"messages": [
|
||||
{"role": "system", "content": "You are a chatbot that uses tools/functions. Dont overthink things, and provide very concise answers. Do not explain your reasoning to the user. Provide any numerical values back to the user with at most two decimals."},
|
||||
{"role": "user", "content": "What's the y coordinate of a point on the unit sphere at angle 30 degrees?"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": None,
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "call_6789",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "calculate",
|
||||
"arguments": "{\"expression\":\"sin(30 * pi / 180)\"}"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"role": "tool",
|
||||
"name": "calculate",
|
||||
"content": 0.55644242476,
|
||||
"tool_call_id": "call_6789"
|
||||
}
|
||||
],
|
||||
"tools": [
|
||||
{
|
||||
"type":"function",
|
||||
"function":{
|
||||
"name":"calculate",
|
||||
"description":"A calculator function that computes values of arithmetic expressions in the Python syntax",
|
||||
"parameters":{
|
||||
"type":"object",
|
||||
"properties":{
|
||||
"expression":{
|
||||
"type":"string",
|
||||
"description":"An arithmetic expression to compute the value of (Python syntad, assuming all floats)"
|
||||
}
|
||||
},
|
||||
"required":["expression"]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}, timeout=TIMEOUT_HTTP_REQUEST)
|
||||
assert res.status_code == 200, f"Expected status code 200, got {res.status_code}"
|
||||
choice = res.body["choices"][0]
|
||||
tool_calls = choice["message"].get("tool_calls")
|
||||
assert tool_calls is None, f'Expected no tool call in {choice["message"]}'
|
||||
content = choice["message"].get("content")
|
||||
assert content is not None, f'Expected content in {choice["message"]}'
|
||||
if result_override is not None:
|
||||
assert re.match(result_override, content), f'Expected {result_override}, got {content}'
|
||||
else:
|
||||
assert re.match('^[\\s\\S]*?The (y[ -])?coordinate [\\s\\S]*?is (approximately )?0\\.56\\b|^0\\.56$', content), \
|
||||
f'Expected something like "The y coordinate is 0.56.", got {content}'
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.parametrize("n_predict,reasoning_format,expect_content,expect_reasoning_content,hf_repo,template_override", [
|
||||
(128, 'deepseek', "^The sum of 102 and 7 is 109.*", None, "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None),
|
||||
(128, None, "^The sum of 102 and 7 is 109.*", None, "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None),
|
||||
|
||||
(1024, 'deepseek', "To find the sum of.*", "I need to calculate the sum of 102 and 7.*", "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
|
||||
(1024, 'none', "<think>\n?I need[\\s\\S]*?</think>\n?To find.*", None, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
|
||||
|
||||
(1024, 'deepseek', "To find the sum of.*", "First, I [\\s\\S]*", "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", ("llama-cpp-deepseek-r1", None)),
|
||||
])
|
||||
def test_thoughts(n_predict: int, reasoning_format: Literal['deepseek', 'none'] | None, expect_content: str | None, expect_reasoning_content: str | None, hf_repo: str, template_override: str | Tuple[str, str | None] | None):
|
||||
global server
|
||||
server.n_slots = 1
|
||||
server.reasoning_format = reasoning_format
|
||||
server.jinja = True
|
||||
server.n_ctx = 8192 * 2
|
||||
server.n_predict = n_predict
|
||||
server.model_hf_repo = hf_repo
|
||||
server.model_hf_file = None
|
||||
if isinstance(template_override, tuple):
|
||||
(template_hf_repo, template_variant) = template_override
|
||||
server.chat_template_file = f"../../../models/templates/{template_hf_repo.replace('/', '-') + ('-' + template_variant if template_variant else '')}.jinja"
|
||||
assert os.path.exists(server.chat_template_file), f"Template file {server.chat_template_file} does not exist. Run `python scripts/get_chat_template.py {template_hf_repo} {template_variant} > {server.chat_template_file}` to download the template."
|
||||
elif isinstance(template_override, str):
|
||||
server.chat_template = template_override
|
||||
server.start(timeout_seconds=TIMEOUT_SERVER_START)
|
||||
res = server.make_request("POST", "/chat/completions", data={
|
||||
"max_tokens": n_predict,
|
||||
"messages": [
|
||||
{"role": "user", "content": "What's the sum of 102 and 7?"},
|
||||
]
|
||||
}, timeout=TIMEOUT_HTTP_REQUEST)
|
||||
assert res.status_code == 200, f"Expected status code 200, got {res.status_code}"
|
||||
choice = res.body["choices"][0]
|
||||
assert choice["message"].get("tool_calls") is None, f'Expected no tool call in {choice["message"]}'
|
||||
|
||||
content = choice["message"].get("content")
|
||||
if expect_content is None:
|
||||
assert content is None, f'Expected no content in {choice["message"]}'
|
||||
else:
|
||||
assert re.match(expect_content, content), f'Expected {expect_content}, got {content}'
|
||||
|
||||
reasoning_content = choice["message"].get("reasoning_content")
|
||||
if expect_reasoning_content is None:
|
||||
assert reasoning_content is None, f'Expected no reasoning content in {choice["message"]}'
|
||||
else:
|
||||
assert re.match(expect_reasoning_content, reasoning_content), f'Expected {expect_reasoning_content}, got {reasoning_content}'
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.parametrize("expected_arguments_override,hf_repo,template_override", [
|
||||
(None, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
|
||||
# (None, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", "chatml"),
|
||||
|
||||
(None, "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None),
|
||||
(None, "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", "chatml"),
|
||||
|
||||
(None, "bartowski/functionary-small-v3.2-GGUF:Q8_0", ("meetkai-functionary-medium-v3.2", None)),
|
||||
(None, "bartowski/functionary-small-v3.2-GGUF:Q8_0", "chatml"),
|
||||
|
||||
(None, "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", None),
|
||||
('{"code":"print("}', "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", "chatml"),
|
||||
('{"code":"print("}', "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", None),
|
||||
(None, "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", "chatml"),
|
||||
|
||||
('{"code":"print("}', "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama-Llama-3.2-3B-Instruct", None)),
|
||||
(None, "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama-Llama-3.2-3B-Instruct", None)),
|
||||
(None, "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", "chatml"),
|
||||
|
||||
('{"code":"print("}', "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", ("meta-llama-Llama-3.2-3B-Instruct", None)),
|
||||
('{"code":"print("}', "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", "chatml"),
|
||||
(None, "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", "chatml"),
|
||||
|
||||
(None, "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", None),
|
||||
(None, "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", "chatml"),
|
||||
|
@ -371,15 +521,13 @@ def test_weather(hf_repo: str, template_override: Tuple[str, str | None] | None)
|
|||
|
||||
# Note: gemma-2-2b-it knows itself as "model", not "assistant", so we don't test the ill-suited chatml on it.
|
||||
(None, "bartowski/gemma-2-2b-it-GGUF:Q4_K_M", None),
|
||||
|
||||
# (None, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
|
||||
])
|
||||
def test_hello_world_tool_call(expected_arguments_override: str | None, hf_repo: str, template_override: str | Tuple[str, str | None] | None):
|
||||
def test_hello_world(expected_arguments_override: str | None, hf_repo: str, template_override: str | Tuple[str, str | None] | None):
|
||||
global server
|
||||
server.n_slots = 1
|
||||
server.jinja = True
|
||||
server.n_ctx = 8192
|
||||
server.n_predict = 128
|
||||
server.n_predict = 512 # High because of DeepSeek R1
|
||||
server.model_hf_repo = hf_repo
|
||||
server.model_hf_file = None
|
||||
if isinstance(template_override, tuple):
|
||||
|
@ -406,6 +554,7 @@ def test_hello_world_tool_call(expected_arguments_override: str | None, hf_repo:
|
|||
tool_calls = choice["message"].get("tool_calls")
|
||||
assert tool_calls and len(tool_calls) == 1, f'Expected 1 tool call in {choice["message"]}'
|
||||
tool_call = tool_calls[0]
|
||||
assert choice["message"].get("content") is None, f'Expected no content in {choice["message"]}'
|
||||
assert tool_call["function"]["name"] == PYTHON_TOOL["function"]["name"]
|
||||
actual_arguments = tool_call["function"]["arguments"]
|
||||
if expected_arguments_override is not None:
|
||||
|
|
|
@ -78,6 +78,7 @@ class ServerProcess:
|
|||
draft_max: int | None = None
|
||||
no_webui: bool | None = None
|
||||
jinja: bool | None = None
|
||||
reasoning_format: Literal['deepseek', 'none'] | None = None
|
||||
chat_template: str | None = None
|
||||
chat_template_file: str | None = None
|
||||
|
||||
|
@ -172,6 +173,8 @@ class ServerProcess:
|
|||
server_args.append("--no-webui")
|
||||
if self.jinja:
|
||||
server_args.append("--jinja")
|
||||
if self.reasoning_format is not None:
|
||||
server_args.extend(("--reasoning-format", self.reasoning_format))
|
||||
if self.chat_template:
|
||||
server_args.extend(["--chat-template", self.chat_template])
|
||||
if self.chat_template_file:
|
||||
|
|
|
@ -578,6 +578,7 @@ static json oaicompat_completion_params_parse(const json & body) {
|
|||
static json oaicompat_completion_params_parse(
|
||||
const json & body, /* openai api json semantics */
|
||||
bool use_jinja,
|
||||
common_reasoning_format reasoning_format,
|
||||
const common_chat_templates & chat_templates)
|
||||
{
|
||||
json llama_params;
|
||||
|
@ -633,9 +634,10 @@ static json oaicompat_completion_params_parse(
|
|||
throw std::runtime_error("Cannot use custom grammar constraints with tools.");
|
||||
}
|
||||
common_chat_inputs inputs;
|
||||
inputs.messages = body.at("messages");
|
||||
inputs.tools = tools;
|
||||
inputs.tool_choice = tool_choice;
|
||||
inputs.extract_reasoning = reasoning_format != COMMON_REASONING_FORMAT_NONE;
|
||||
inputs.messages = body.at("messages");
|
||||
inputs.tools = tools;
|
||||
inputs.tool_choice = tool_choice;
|
||||
inputs.parallel_tool_calls = json_value(body, "parallel_tool_calls", false);
|
||||
if (inputs.parallel_tool_calls && !tmpl.original_caps().supports_parallel_tool_calls) {
|
||||
LOG_DBG("Disabling parallel_tool_calls because the template does not support it\n");
|
||||
|
|
22
models/templates/README.md
Normal file
22
models/templates/README.md
Normal file
|
@ -0,0 +1,22 @@
|
|||
These templates can be updated with the following commands:
|
||||
|
||||
```bash
|
||||
./scripts/get_chat_template.py CohereForAI/c4ai-command-r-plus tool_use > models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja
|
||||
./scripts/get_chat_template.py CohereForAI/c4ai-command-r7b-12-2024 default > models/templates/CohereForAI-c4ai-command-r7b-12-2024-default.jinja
|
||||
./scripts/get_chat_template.py CohereForAI/c4ai-command-r7b-12-2024 rag > models/templates/CohereForAI-c4ai-command-r7b-12-2024-rag.jinja
|
||||
./scripts/get_chat_template.py CohereForAI/c4ai-command-r7b-12-2024 tool_use > models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja
|
||||
./scripts/get_chat_template.py deepseek-ai/DeepSeek-R1-Distill-Llama-8B > models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja
|
||||
./scripts/get_chat_template.py deepseek-ai/DeepSeek-R1-Distill-Qwen-32B > models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja
|
||||
./scripts/get_chat_template.py fireworks-ai/llama-3-firefunction-v2 > models/templates/fireworks-ai-llama-3-firefunction-v2.jinja
|
||||
./scripts/get_chat_template.py google/gemma-2-2b-it > models/templates/google-gemma-2-2b-it.jinja
|
||||
./scripts/get_chat_template.py meetkai/functionary-medium-v3. > models/templates/meetkai-functionary-medium-v3.jinja
|
||||
./scripts/get_chat_template.py meetkai/functionary-medium-v3.2 > models/templates/meetkai-functionary-medium-v3.2.jinja
|
||||
./scripts/get_chat_template.py meta-llama/Llama-3.1-8B-Instruct > models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja
|
||||
./scripts/get_chat_template.py meta-llama/Llama-3.2-3B-Instruct > models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja
|
||||
./scripts/get_chat_template.py meta-llama/Llama-3.3-70B-Instruct > models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja
|
||||
./scripts/get_chat_template.py microsoft/Phi-3.5-mini-instruct > models/templates/microsoft-Phi-3.5-mini-instruct.jinja
|
||||
./scripts/get_chat_template.py mistralai/Mistral-Nemo-Instruct-2407 > models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja
|
||||
./scripts/get_chat_template.py NousResearch/Hermes-2-Pro-Llama-3-8B tool_use > models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja
|
||||
./scripts/get_chat_template.py NousResearch/Hermes-3-Llama-3.1-8B tool_use > models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja
|
||||
./scripts/get_chat_template.py Qwen/Qwen2.5-7B-Instruct > models/templates/Qwen-Qwen2.5-7B-Instruct.jinja
|
||||
```
|
|
@ -1 +1 @@
|
|||
{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}
|
||||
{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\n'}}{% endif %}
|
|
@ -1,56 +1 @@
|
|||
{% if not add_generation_prompt is defined %}
|
||||
{% set add_generation_prompt = false %}
|
||||
{% endif %}
|
||||
{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}
|
||||
{%- for message in messages %}
|
||||
{%- if message['role'] == 'system' %}
|
||||
{% set ns.system_prompt = message['content'] %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{{bos_token}}
|
||||
{{ns.system_prompt}}
|
||||
{%- for message in messages %}
|
||||
{%- if message['role'] == 'user' %}
|
||||
{%- set ns.is_tool = false -%}
|
||||
{{'<|User|>' + message['content']}}
|
||||
{%- endif %}
|
||||
{%- if message['role'] == 'assistant' and message['content'] is none %}
|
||||
{%- set ns.is_tool = false -%}
|
||||
{%- for tool in message['tool_calls']%}
|
||||
{%- if not ns.is_first %}
|
||||
{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}
|
||||
{%- set ns.is_first = true -%}
|
||||
{%- else %}
|
||||
{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}
|
||||
{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{%- if message['role'] == 'assistant' and message['content'] is not none %}
|
||||
{%- if ns.is_tool %}
|
||||
{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}
|
||||
{%- set ns.is_tool = false -%}
|
||||
{%- else %}
|
||||
{% set content = message['content'] %}
|
||||
{% if '</think>' in content %}
|
||||
{% set content = content.split('</think>')[-1] %}
|
||||
{% endif %}
|
||||
{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- if message['role'] == 'tool' %}
|
||||
{%- set ns.is_tool = true -%}
|
||||
{%- if ns.is_output_first %}
|
||||
{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}
|
||||
{%- set ns.is_output_first = false %}
|
||||
{%- else %}
|
||||
{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- endfor -%}
|
||||
{% if ns.is_tool %}
|
||||
{{'<|tool▁outputs▁end|>'}}
|
||||
{% endif %}
|
||||
{% if add_generation_prompt and not ns.is_tool %}
|
||||
{{'<|Assistant|>'}}
|
||||
{% endif %}
|
||||
{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\n'}}{% endif %}
|
76
models/templates/llama-cpp-deepseek-r1.jinja
Normal file
76
models/templates/llama-cpp-deepseek-r1.jinja
Normal file
|
@ -0,0 +1,76 @@
|
|||
{%- if not add_generation_prompt is defined -%}
|
||||
{%- set add_generation_prompt = false -%}
|
||||
{%- endif -%}
|
||||
{%- set ns = namespace(is_first=false, is_tool_outputs=false, is_output_first=true, system_prompt='') -%}
|
||||
{%- for message in messages -%}
|
||||
{%- if message['role'] == 'system' -%}
|
||||
{%- set ns.system_prompt = message['content'] -%}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
{{bos_token}}
|
||||
{%- if tools %}
|
||||
You can call any of the following function tools to satisfy the user's requests: {{tools | map(attribute='function') | tojson(indent=2)}}
|
||||
|
||||
Example function tool call syntax:
|
||||
|
||||
<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>example_function_name
|
||||
```json
|
||||
{
|
||||
"arg1": "some_value"
|
||||
...
|
||||
}
|
||||
```
|
||||
<|tool▁call▁end|><|tool▁calls▁end|>
|
||||
|
||||
{% endif -%}
|
||||
{{ns.system_prompt}}
|
||||
{%- macro flush_tool_outputs() -%}
|
||||
{%- if ns.is_tool_outputs -%}
|
||||
{{- '<|tool▁outputs▁end|><|end▁of▁sentence|>' -}}
|
||||
{%- set ns.is_tool_outputs = false -%}
|
||||
{%- endif -%}
|
||||
{%- endmacro -%}
|
||||
{{- flush_tool_outputs() -}}
|
||||
{%- for message in messages -%}
|
||||
{%- if message['role'] != 'tool' -%}
|
||||
{{- flush_tool_outputs() -}}
|
||||
{%- endif -%}
|
||||
{%- if message['role'] == 'user' -%}
|
||||
{{- '<|User|>' + message['content'] + '<|end▁of▁sentence|>' -}}
|
||||
{%- endif -%}
|
||||
{%- if message['role'] == 'assistant' and message['content'] is none -%}
|
||||
{{- '<|Assistant|><|tool▁calls▁begin|>' -}}
|
||||
{%- set ns.is_first = true -%}
|
||||
{%- for tc in message['tool_calls'] -%}
|
||||
{%- if ns.is_first -%}
|
||||
{%- set ns.is_first = false -%}
|
||||
{%- else -%}
|
||||
{{- '\n' -}}
|
||||
{%- endif -%}
|
||||
{%- set tool_name = tc['function']['name'] -%}
|
||||
{%- set tool_args = tc['function']['arguments'] -%}
|
||||
{{- '<|tool▁call▁begin|>' + tc['type'] + '<|tool▁sep|>' + tool_name + '\n' + '```json' + '\n' + tool_args + '\n' + '```' + '<|tool▁call▁end|>' -}}
|
||||
{%- endfor -%}
|
||||
{{- '<|tool▁calls▁end|><|end▁of▁sentence|>' -}}
|
||||
{%- endif -%}
|
||||
{%- if message['role'] == 'assistant' and message['content'] is not none -%}
|
||||
{{- flush_tool_outputs() -}}
|
||||
{%- set content = message['content'] -%}
|
||||
{%- if '</think>' in content -%}
|
||||
{%- set content = content.split('</think>')[-1] -%}
|
||||
{%- endif -%}
|
||||
{{- '<|Assistant|>' + content + '<|end▁of▁sentence|>' -}}
|
||||
{%- endif -%}
|
||||
{%- if message['role'] == 'tool' -%}
|
||||
{%- set ns.is_tool_outputs = true -%}
|
||||
{%- if ns.is_output_first -%}
|
||||
{{- '<|tool▁outputs▁begin|>' -}}
|
||||
{%- set ns.is_output_first = false -%}
|
||||
{%- endif -%}
|
||||
{{- '\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>' -}}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
{{- flush_tool_outputs() -}}
|
||||
{%- if add_generation_prompt and not ns.is_tool_outputs -%}
|
||||
{{- '<|Assistant|><think>\n' -}}
|
||||
{%- endif -%}
|
5
scripts/get_chat_template.py
Normal file → Executable file
5
scripts/get_chat_template.py
Normal file → Executable file
|
@ -7,9 +7,8 @@
|
|||
./scripts/get_chat_template.py model_id [variant]
|
||||
|
||||
Examples:
|
||||
./scripts/get_chat_template.py NousResearch/Meta-Llama-3-8B-Instruct
|
||||
./scripts/get_chat_template.py NousResearch/Hermes-3-Llama-3.1-8B tool_use
|
||||
./scripts/get_chat_template.py meta-llama/Llama-3.2-3B-Instruct
|
||||
./scripts/get_chat_template.py CohereForAI/c4ai-command-r-plus tool_use
|
||||
./scripts/get_chat_template.py microsoft/Phi-3.5-mini-instruct
|
||||
'''
|
||||
|
||||
import json
|
||||
|
|
|
@ -1186,7 +1186,7 @@ void llama_grammar_accept_impl(struct llama_grammar & grammar, llama_token token
|
|||
return;
|
||||
}
|
||||
}
|
||||
LLAMA_LOG_DEBUG("Grammar still awaiting trigger after token %d (`%s`) (buffer: `%s`)\n", token, piece.c_str(), grammar.trigger_buffer.c_str());
|
||||
LLAMA_LOG_DEBUG("Grammar still awaiting trigger after token %d (`%s`)\n", token, piece.c_str());
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,7 +24,10 @@ static common_chat_msg msg_from_json(const json & message) {
|
|||
ret.content = message.at("content");
|
||||
}
|
||||
if (message.contains("tool_plan")) {
|
||||
ret.tool_plan = message.at("tool_plan");
|
||||
ret.reasoning_content = message.at("tool_plan");
|
||||
}
|
||||
if (message.contains("reasoning_content")) {
|
||||
ret.reasoning_content = message.at("reasoning_content");
|
||||
}
|
||||
auto has_tool_calls = message.contains("tool_calls");
|
||||
if (has_tool_calls) {
|
||||
|
@ -105,6 +108,7 @@ static std::string dump(const json & j) {
|
|||
static void assert_msg_equals(const common_chat_msg & expected, const common_chat_msg & actual) {
|
||||
assert_equals(expected.role, actual.role);
|
||||
assert_equals(expected.content, actual.content);
|
||||
assert_equals(expected.reasoning_content, actual.reasoning_content);
|
||||
assert_equals(expected.tool_calls.size(), actual.tool_calls.size());
|
||||
for (size_t i = 0; i < expected.tool_calls.size(); i++) {
|
||||
const auto & expected_tool_call = expected.tool_calls[i];
|
||||
|
@ -176,13 +180,15 @@ struct delta_data {
|
|||
|
||||
static delta_data init_delta(const common_chat_template & tmpl, const std::vector<std::string> & end_tokens,
|
||||
const json & user_message, const json & delta_message, const json & tools,
|
||||
const json & tool_choice) {
|
||||
const json & tool_choice,
|
||||
bool think = false) {
|
||||
common_chat_inputs inputs;
|
||||
inputs.parallel_tool_calls = true;
|
||||
inputs.messages = json::array();
|
||||
inputs.messages.push_back(user_message);
|
||||
inputs.tools = tools;
|
||||
inputs.tool_choice = tool_choice;
|
||||
inputs.extract_reasoning = think;
|
||||
auto params_prefix = common_chat_params_init(tmpl, inputs);
|
||||
|
||||
inputs.messages.push_back(delta_message);
|
||||
|
@ -192,17 +198,24 @@ static delta_data init_delta(const common_chat_template & tmpl, const std::vecto
|
|||
std::string prefix = params_prefix.prompt;
|
||||
std::string full = params_full.prompt;
|
||||
|
||||
// Check full starts with prefix
|
||||
if (full.find(prefix) != 0) {
|
||||
fprintf(stderr, "Full:\n%s\n\nPrefix:\n%s\n\n", full.c_str(), prefix.c_str());
|
||||
throw std::runtime_error("Full message does not start with prefix");
|
||||
}
|
||||
|
||||
if (full == prefix) {
|
||||
throw std::runtime_error("Full message is the same as the prefix");
|
||||
}
|
||||
|
||||
auto delta = full.substr(prefix.size());
|
||||
size_t common_prefix_length = 0;
|
||||
for (size_t i = 0; i < prefix.size() && i < full.size(); ++i) {
|
||||
if (prefix[i] != full[i]) {
|
||||
break;
|
||||
}
|
||||
if (prefix[i] == '<') {
|
||||
// DeepSeek R1's template (as of 20250209) adds a trailing <think> if add_generation_prompt,
|
||||
// but it removes thinking tags for past messages.
|
||||
// The prefix and full strings diverge at <think> vs. <|tool▁calls▁begin|>, we avoid consuming the leading <.
|
||||
continue;
|
||||
}
|
||||
common_prefix_length = i + 1;
|
||||
}
|
||||
auto delta = full.substr(common_prefix_length);
|
||||
|
||||
// Strip end tokens
|
||||
for (const auto & end_token : end_tokens) {
|
||||
|
@ -223,7 +236,9 @@ static delta_data init_delta(const common_chat_template & tmpl, const std::vecto
|
|||
*/
|
||||
static void test_template(const common_chat_template & tmpl, const std::vector<std::string> & end_tokens,
|
||||
const json & test_message, const json & tools = {}, const std::string & expected_delta = "",
|
||||
bool expect_grammar_triggered = true) {
|
||||
bool expect_grammar_triggered = true,
|
||||
bool test_grammar_if_triggered = true,
|
||||
bool think = false) {
|
||||
common_chat_msg expected_msg = msg_from_json(test_message);
|
||||
|
||||
auto user_message = json{
|
||||
|
@ -232,7 +247,7 @@ static void test_template(const common_chat_template & tmpl, const std::vector<s
|
|||
};
|
||||
|
||||
for (const auto & tool_choice : json({ "auto", "required" })) {
|
||||
auto data = init_delta(tmpl, end_tokens, user_message, test_message, tools, tool_choice);
|
||||
auto data = init_delta(tmpl, end_tokens, user_message, test_message, tools, tool_choice, think);
|
||||
if (!expected_delta.empty()) {
|
||||
assert_equals(expected_delta, data.delta);
|
||||
}
|
||||
|
@ -274,7 +289,7 @@ static void test_template(const common_chat_template & tmpl, const std::vector<s
|
|||
assert_equals(expect_grammar_triggered, grammar_triggered);
|
||||
}
|
||||
|
||||
if (grammar_triggered && !match_string(constrained, grammar.get())) {
|
||||
if (grammar_triggered && test_grammar_if_triggered && !match_string(constrained, grammar.get())) {
|
||||
throw std::runtime_error("Failed to match delta against grammar:\n\n" + data.delta +
|
||||
"\n\nGrammar: " + data.params.grammar);
|
||||
}
|
||||
|
@ -283,16 +298,33 @@ static void test_template(const common_chat_template & tmpl, const std::vector<s
|
|||
}
|
||||
|
||||
static void test_template_output_parsers() {
|
||||
json text_message {
|
||||
json message_user {
|
||||
{ "role", "user" },
|
||||
{ "content", "Hey there!" },
|
||||
};
|
||||
json message_assist {
|
||||
{ "role", "assistant" },
|
||||
{ "content", "Hello, world!\nWhat's up?" },
|
||||
};
|
||||
json message_assist_thoughts_unparsed_think {
|
||||
{ "role", "assistant" },
|
||||
{ "content", "<think>I'm thinking</think>Hello, world!\nWhat's up?" },
|
||||
};
|
||||
json message_assist_thoughts_unparsed_r7b {
|
||||
{ "role", "assistant" },
|
||||
{ "content", "<|START_THINKING|>I'm thinking<|END_THINKING|>Hello, world!\nWhat's up?" },
|
||||
};
|
||||
json message_assist_thoughts {
|
||||
{ "role", "assistant" },
|
||||
{ "content", "Hello, world!\nWhat's up?" },
|
||||
{ "reasoning_content", "I'm thinking" },
|
||||
};
|
||||
json tool_calls = json::array({{
|
||||
{ "type", "function" },
|
||||
{ "function", { { "name", "special_function" }, { "arguments", "{\"arg1\": 1}" } } },
|
||||
}});
|
||||
|
||||
json tool_call_message {
|
||||
json message_assist_call {
|
||||
{ "role", "assistant"},
|
||||
{ "content", {}},
|
||||
{ "tool_calls", {
|
||||
|
@ -305,7 +337,34 @@ static void test_template_output_parsers() {
|
|||
},
|
||||
}},
|
||||
};
|
||||
json tool_call_message_with_id {
|
||||
json message_assist_call_thoughts = {
|
||||
{ "role", "assistant" },
|
||||
{ "content", nullptr },
|
||||
{ "reasoning_content", "I'm\nthinking" },
|
||||
{ "tool_calls", {
|
||||
{
|
||||
{ "type", "function" },
|
||||
{ "function", {
|
||||
{ "name", "special_function" },
|
||||
{ "arguments", "{\"arg1\": 1}" },
|
||||
}},
|
||||
},
|
||||
}},
|
||||
};
|
||||
json message_assist_call_thoughts_unparsed = {
|
||||
{ "role", "assistant" },
|
||||
{ "content", "<think>I'm\nthinking</think>" },
|
||||
{ "tool_calls", {
|
||||
{
|
||||
{ "type", "function" },
|
||||
{ "function", {
|
||||
{ "name", "special_function" },
|
||||
{ "arguments", "{\"arg1\": 1}" },
|
||||
}},
|
||||
},
|
||||
}},
|
||||
};
|
||||
json message_assist_call_id {
|
||||
{ "role", "assistant"},
|
||||
{ "content", {}},
|
||||
{ "tool_calls", {
|
||||
|
@ -322,10 +381,9 @@ static void test_template_output_parsers() {
|
|||
{ "content", {} },
|
||||
{ "tool_calls", tool_calls }
|
||||
};
|
||||
json tool_call_plan_message_with_idx {
|
||||
json message_assist_call_idx {
|
||||
{ "role", "assistant"},
|
||||
{ "content", {}},
|
||||
{ "tool_plan", "I'm not so sure"},
|
||||
{ "tool_calls", {
|
||||
{
|
||||
{ "type", "function" },
|
||||
|
@ -341,8 +399,10 @@ static void test_template_output_parsers() {
|
|||
{ "content", {} },
|
||||
{ "tool_calls", tool_calls }
|
||||
};
|
||||
json message_assist_call_tool_plan_idx = message_assist_call_idx;
|
||||
message_assist_call_tool_plan_idx["tool_plan"] = "I'm thinking";
|
||||
|
||||
auto python_tool_call_message = json{
|
||||
auto python_message_assist_call = json{
|
||||
{ "role", "assistant" },
|
||||
{ "content", {} },
|
||||
{ "tool_calls", json{ {
|
||||
|
@ -357,7 +417,7 @@ static void test_template_output_parsers() {
|
|||
} },
|
||||
} } }
|
||||
};
|
||||
auto code_interpreter_tool_call_message = json{
|
||||
auto code_interpreter_message_assist_call = json{
|
||||
{ "role", "assistant" },
|
||||
{ "content", {} },
|
||||
{ "tool_calls", json{ {
|
||||
|
@ -374,17 +434,27 @@ static void test_template_output_parsers() {
|
|||
};
|
||||
|
||||
common_chat_inputs inputs_no_tools;
|
||||
inputs_no_tools.messages = {
|
||||
{ { "role", "user" }, { "content", "Hey\nThere" } }
|
||||
};
|
||||
inputs_no_tools.messages = json::array({message_user});
|
||||
inputs_no_tools.extract_reasoning = false;
|
||||
|
||||
common_chat_inputs inputs_tools = inputs_no_tools;
|
||||
inputs_tools.tools = json::array();
|
||||
inputs_tools.tools.push_back(special_function_tool);
|
||||
common_chat_inputs inputs_no_tools_think;
|
||||
inputs_no_tools_think.messages = json::array({message_user});
|
||||
inputs_no_tools_think.extract_reasoning = true;
|
||||
|
||||
common_chat_inputs inputs_tools_builtin = inputs_no_tools;
|
||||
inputs_tools_builtin.tools = json::array();
|
||||
inputs_tools_builtin.tools.push_back(python_tool);
|
||||
common_chat_inputs inputs_tools;
|
||||
inputs_tools.messages = json::array({message_user});
|
||||
inputs_tools.tools = json::array({special_function_tool});
|
||||
inputs_tools.extract_reasoning = false;
|
||||
|
||||
common_chat_inputs inputs_tools_think;
|
||||
inputs_tools_think.messages = json::array({message_user});
|
||||
inputs_tools_think.tools = json::array({special_function_tool});
|
||||
inputs_tools_think.extract_reasoning = true;
|
||||
|
||||
common_chat_inputs inputs_tools_builtin;
|
||||
inputs_tools_builtin.messages = json::array({message_user});
|
||||
inputs_tools_builtin.tools = json::array({python_tool});
|
||||
inputs_tools_builtin.extract_reasoning = false;
|
||||
|
||||
{
|
||||
// Not supported yet
|
||||
|
@ -395,15 +465,53 @@ static void test_template_output_parsers() {
|
|||
const common_chat_template tmpl(read_file("models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja"), "<s>", "</s>");
|
||||
std::vector<std::string> end_tokens{ "<|END_OF_TURN_TOKEN|>" };
|
||||
|
||||
assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_params_init(tmpl, inputs_no_tools).format);
|
||||
assert_equals(COMMON_CHAT_FORMAT_COMMAND_R7B, common_chat_params_init(tmpl, inputs_tools).format);
|
||||
assert_equals(COMMON_CHAT_FORMAT_COMMAND_R7B, common_chat_params_init(tmpl, inputs_no_tools).format);
|
||||
assert_equals(COMMON_CHAT_FORMAT_COMMAND_R7B, common_chat_params_init(tmpl, inputs_tools).format);
|
||||
assert_equals(COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING, common_chat_params_init(tmpl, inputs_tools_think).format);
|
||||
|
||||
test_template(tmpl, end_tokens, tool_call_plan_message_with_idx, tools,
|
||||
"<|START_THINKING|>I'm not so sure<|END_THINKING|>"
|
||||
assert_msg_equals(msg_from_json(message_assist),
|
||||
common_chat_parse(
|
||||
"Hello, world!\nWhat's up?",
|
||||
COMMON_CHAT_FORMAT_COMMAND_R7B));
|
||||
assert_msg_equals(msg_from_json(message_assist),
|
||||
common_chat_parse(
|
||||
"Hello, world!\nWhat's up?<|END_RESPONSE|>",
|
||||
COMMON_CHAT_FORMAT_COMMAND_R7B));
|
||||
assert_msg_equals(msg_from_json(message_assist),
|
||||
common_chat_parse(
|
||||
"<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>",
|
||||
COMMON_CHAT_FORMAT_COMMAND_R7B));
|
||||
assert_msg_equals(msg_from_json(message_assist_thoughts_unparsed_r7b),
|
||||
common_chat_parse(
|
||||
"<|START_THINKING|>I'm thinking<|END_THINKING|>"
|
||||
"<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>",
|
||||
COMMON_CHAT_FORMAT_COMMAND_R7B));
|
||||
assert_msg_equals(msg_from_json(message_assist_thoughts_unparsed_r7b),
|
||||
common_chat_parse(
|
||||
"<|START_THINKING|>I'm thinking<|END_THINKING|>"
|
||||
"Hello, world!\nWhat's up?<|END_RESPONSE|>",
|
||||
COMMON_CHAT_FORMAT_COMMAND_R7B));
|
||||
|
||||
assert_msg_equals(msg_from_json(message_assist_thoughts),
|
||||
common_chat_parse(
|
||||
"<|START_THINKING|>I'm thinking<|END_THINKING|>"
|
||||
"<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>",
|
||||
COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING));
|
||||
|
||||
test_template(tmpl, end_tokens, message_assist_call_idx, tools,
|
||||
"<|START_THINKING|><|END_THINKING|>"
|
||||
"<|START_ACTION|>[\n"
|
||||
" {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n"
|
||||
"]<|END_ACTION|>");
|
||||
test_template(tmpl, end_tokens, text_message, tools,
|
||||
test_template(tmpl, end_tokens, message_assist_call_tool_plan_idx, tools,
|
||||
"<|START_THINKING|>I'm thinking<|END_THINKING|>"
|
||||
"<|START_ACTION|>[\n"
|
||||
" {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n"
|
||||
"]<|END_ACTION|>",
|
||||
/* expect_grammar_triggered= */ true,
|
||||
/* test_grammar_if_triggered= */ true,
|
||||
/* think= */ true);
|
||||
test_template(tmpl, end_tokens, message_assist, tools,
|
||||
"<|START_RESPONSE|>Hello, world!\n"
|
||||
"What's up?<|END_RESPONSE|>",
|
||||
/* expect_grammar_triggered= */ false);
|
||||
|
@ -423,12 +531,12 @@ static void test_template_output_parsers() {
|
|||
|
||||
// Generic tool calls doesn't generate / parse content-only messages symmetrically.
|
||||
|
||||
assert_msg_equals(msg_from_json(text_message),
|
||||
assert_msg_equals(msg_from_json(message_assist),
|
||||
common_chat_parse("{\n"
|
||||
" \"response\": \"Hello, world!\\nWhat's up?\"\n"
|
||||
"}",
|
||||
common_chat_params_init(tmpl, inputs_tools).format));
|
||||
test_template(tmpl, end_tokens, tool_call_message_with_id, tools,
|
||||
test_template(tmpl, end_tokens, message_assist_call_id, tools,
|
||||
"{\n"
|
||||
" \"tool_calls\": [\n"
|
||||
" {\n"
|
||||
|
@ -448,9 +556,9 @@ static void test_template_output_parsers() {
|
|||
|
||||
assert_equals(COMMON_CHAT_FORMAT_MISTRAL_NEMO, common_chat_params_init(tmpl, inputs_tools).format);
|
||||
|
||||
test_template(tmpl, end_tokens, text_message, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
|
||||
test_template(tmpl, end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
|
||||
test_template(
|
||||
tmpl, end_tokens, tool_call_message_with_id, tools,
|
||||
tmpl, end_tokens, message_assist_call_id, tools,
|
||||
"[TOOL_CALLS][{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, \"id\": \"123456789\"}]");
|
||||
}
|
||||
{
|
||||
|
@ -473,12 +581,12 @@ static void test_template_output_parsers() {
|
|||
inputs_tools)
|
||||
.format);
|
||||
|
||||
test_template(tmpl, end_tokens, text_message, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
|
||||
test_template(tmpl, end_tokens, tool_call_message, tools,
|
||||
test_template(tmpl, end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
|
||||
test_template(tmpl, end_tokens, message_assist_call, tools,
|
||||
"<tool_call>\n"
|
||||
"{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
|
||||
"</tool_call>");
|
||||
test_template(tmpl, end_tokens, python_tool_call_message, tools,
|
||||
test_template(tmpl, end_tokens, python_message_assist_call, tools,
|
||||
"<tool_call>\n"
|
||||
"{\"name\": \"python\", \"arguments\": {\"code\": \"print('hey')\"}}\n"
|
||||
"</tool_call>");
|
||||
|
@ -498,12 +606,12 @@ static void test_template_output_parsers() {
|
|||
inputs_tools_builtin)
|
||||
.format);
|
||||
|
||||
// test_template(tmpl, end_tokens, text_message, tools, R"(?)", /* expect_grammar_triggered= */ false);
|
||||
test_template(tmpl, end_tokens, code_interpreter_tool_call_message, llama_3_1_tools,
|
||||
// test_template(tmpl, end_tokens, message_assist, tools, R"(?)", /* expect_grammar_triggered= */ false);
|
||||
test_template(tmpl, end_tokens, code_interpreter_message_assist_call, llama_3_1_tools,
|
||||
"<|python_tag|>code_interpreter.call(code=\"print('hey')\")");
|
||||
test_template(tmpl, end_tokens, python_tool_call_message, tools,
|
||||
test_template(tmpl, end_tokens, python_message_assist_call, tools,
|
||||
"<|python_tag|>python.call(code=\"print('hey')\")");
|
||||
test_template(tmpl, end_tokens, tool_call_message, tools,
|
||||
test_template(tmpl, end_tokens, message_assist_call, tools,
|
||||
"{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}");
|
||||
}
|
||||
{
|
||||
|
@ -513,8 +621,8 @@ static void test_template_output_parsers() {
|
|||
|
||||
assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X, common_chat_params_init(tmpl, inputs_tools).format);
|
||||
|
||||
test_template(tmpl, end_tokens, text_message, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
|
||||
test_template(tmpl, end_tokens, tool_call_message, tools,
|
||||
test_template(tmpl, end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
|
||||
test_template(tmpl, end_tokens, message_assist_call, tools,
|
||||
"{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}");
|
||||
}
|
||||
{
|
||||
|
@ -525,8 +633,8 @@ static void test_template_output_parsers() {
|
|||
assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
|
||||
common_chat_params_init(tmpl, inputs_tools).format);
|
||||
|
||||
test_template(tmpl, end_tokens, text_message, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
|
||||
test_template(tmpl, end_tokens, tool_call_message, tools,
|
||||
test_template(tmpl, end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
|
||||
test_template(tmpl, end_tokens, message_assist_call, tools,
|
||||
"<function=special_function>{\"arg1\": 1}</function>");
|
||||
}
|
||||
{
|
||||
|
@ -537,12 +645,12 @@ static void test_template_output_parsers() {
|
|||
assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, common_chat_params_init(tmpl, inputs_no_tools).format);
|
||||
assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, common_chat_params_init(tmpl, inputs_tools).format);
|
||||
|
||||
test_template(tmpl, end_tokens, text_message, {},
|
||||
test_template(tmpl, end_tokens, message_assist, {},
|
||||
"all\n"
|
||||
"Hello, world!\n"
|
||||
"What's up?",
|
||||
/* expect_grammar_triggered= */ false);
|
||||
test_template(tmpl, end_tokens, tool_call_message, tools,
|
||||
test_template(tmpl, end_tokens, message_assist_call, tools,
|
||||
"special_function\n"
|
||||
"{\"arg1\": 1}");
|
||||
}
|
||||
|
@ -553,23 +661,79 @@ static void test_template_output_parsers() {
|
|||
|
||||
assert_equals(COMMON_CHAT_FORMAT_FIREFUNCTION_V2, common_chat_params_init(tmpl, inputs_tools).format);
|
||||
|
||||
test_template(tmpl, end_tokens, text_message, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
|
||||
test_template(tmpl, end_tokens, tool_call_message, tools,
|
||||
test_template(tmpl, end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
|
||||
test_template(tmpl, end_tokens, message_assist_call, tools,
|
||||
" functools[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]");
|
||||
}
|
||||
{
|
||||
// Original DeepSeek R1 template. Leaves <|tool▁calls▁begin|> and others unclosed. Our logic fixes the prompt.
|
||||
const common_chat_template tmpl(read_file("models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja"),
|
||||
"<s>", "</s>");
|
||||
std::vector<std::string> end_tokens{ "<|end▁of▁sentence|>" };
|
||||
|
||||
assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, common_chat_params_init(tmpl, inputs_tools).format);
|
||||
assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, common_chat_params_init(tmpl, inputs_tools).format);
|
||||
assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING, common_chat_params_init(tmpl, inputs_tools_think).format);
|
||||
|
||||
test_template(tmpl, end_tokens, text_message, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
|
||||
test_template(tmpl, end_tokens, tool_call_message, tools,
|
||||
"<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n"
|
||||
"```json\n"
|
||||
"{\"arg1\": 1}\n"
|
||||
"```<|tool▁call▁end|>");
|
||||
test_template(tmpl, end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
|
||||
test_template(tmpl, end_tokens, message_assist_thoughts, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
|
||||
assert_msg_equals(msg_from_json(message_assist_thoughts_unparsed_think),
|
||||
common_chat_parse("<think>I'm thinking</think>Hello, world!\nWhat's up?",
|
||||
COMMON_CHAT_FORMAT_DEEPSEEK_R1));
|
||||
assert_msg_equals(msg_from_json(message_assist_thoughts),
|
||||
common_chat_parse("<think>I'm thinking</think>Hello, world!\nWhat's up?",
|
||||
COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING));
|
||||
assert_msg_equals(msg_from_json(message_assist_thoughts),
|
||||
// Latest template update (ast of 20250209) adds a trailing <think>\n if add_generation_prompt is true.
|
||||
common_chat_parse("I'm thinking</think>Hello, world!\nWhat's up?",
|
||||
COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING));
|
||||
// test_template(tmpl, end_tokens, message_assist_call, tools,
|
||||
// "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n"
|
||||
// "```json\n"
|
||||
// "{\"arg1\": 1}\n"
|
||||
// // Look what's not here: <|tool▁calls▁end|> (also missing the <|end▁of▁sentence|>, but that is removed lazily by the test's delta logic)
|
||||
// "```<|tool▁call▁end|>",
|
||||
// /* expect_grammar_triggered= */ true,
|
||||
// /* test_grammar_if_triggered= */ false);
|
||||
}
|
||||
{
|
||||
// Replacement DeepSeek R1 template. Makes the Distill Qwen 7B/32B models happy to call tools and all.
|
||||
const common_chat_template tmpl(read_file("models/templates/llama-cpp-deepseek-r1.jinja"),
|
||||
"<s>", "</s>");
|
||||
std::vector<std::string> end_tokens{ "<|end▁of▁sentence|>" };
|
||||
|
||||
assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, common_chat_params_init(tmpl, inputs_tools).format);
|
||||
assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING, common_chat_params_init(tmpl, inputs_tools_think).format);
|
||||
|
||||
test_template(tmpl, end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
|
||||
test_template(tmpl, end_tokens, message_assist_thoughts, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
|
||||
assert_msg_equals(msg_from_json(message_assist_thoughts_unparsed_think),
|
||||
common_chat_parse("<think>I'm thinking</think>Hello, world!\nWhat's up?",
|
||||
COMMON_CHAT_FORMAT_DEEPSEEK_R1));
|
||||
assert_msg_equals(msg_from_json(message_assist_thoughts),
|
||||
common_chat_parse("<think>I'm thinking</think>Hello, world!\nWhat's up?",
|
||||
COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING));
|
||||
|
||||
assert_msg_equals(msg_from_json(message_assist_call_thoughts_unparsed),
|
||||
common_chat_parse(
|
||||
"<think>I'm\nthinking</think>\n\n"
|
||||
"<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n"
|
||||
"```json\n"
|
||||
"{\"arg1\": 1}\n"
|
||||
"```<|tool▁call▁end|><|tool▁calls▁end|>",
|
||||
COMMON_CHAT_FORMAT_DEEPSEEK_R1));
|
||||
assert_msg_equals(msg_from_json(message_assist_call_thoughts),
|
||||
common_chat_parse(
|
||||
"<think>I'm\nthinking</think>\n\n"
|
||||
"<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n"
|
||||
"```json\n"
|
||||
"{\"arg1\": 1}\n"
|
||||
"```<|tool▁call▁end|><|tool▁calls▁end|>",
|
||||
COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING));
|
||||
test_template(tmpl, end_tokens, message_assist_call, tools,
|
||||
"<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n"
|
||||
"```json\n"
|
||||
"{\"arg1\": 1}\n"
|
||||
"```<|tool▁call▁end|><|tool▁calls▁end|>");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -586,16 +750,20 @@ int main(int argc, char ** argv) {
|
|||
std::cout << "|----------|--------|\n";
|
||||
|
||||
for (int i = 1; i < argc; i++) {
|
||||
std::string path = argv[i];
|
||||
if (path.rfind(".jinja") != path.size() - 6) {
|
||||
std::cerr << "Skipping non-jinja file: " << path << std::endl;
|
||||
continue;
|
||||
try {
|
||||
std::string path = argv[i];
|
||||
if (path.rfind(".jinja") != path.size() - 6) {
|
||||
std::cerr << "Skipping non-jinja file: " << path << std::endl;
|
||||
continue;
|
||||
}
|
||||
common_chat_template tmpl(read_file(path), "", "");
|
||||
auto parts = string_split(path, "/");
|
||||
auto name = parts[parts.size() - 1];
|
||||
auto format = common_chat_format_name(common_chat_params_init(tmpl, inputs).format);
|
||||
std::cout << "| " << name << " | " << format << " |\n";
|
||||
} catch (const std::exception & e) {
|
||||
std::cerr << "Failed to process " << argv[i] << ": " << e.what() << std::endl;
|
||||
}
|
||||
common_chat_template tmpl(read_file(path), "", "");
|
||||
auto parts = string_split(path, "/");
|
||||
auto name = parts[parts.size() - 1];
|
||||
std::cout << "| " << name << " | " << common_chat_format_name(common_chat_params_init(tmpl, inputs).format)
|
||||
<< " |\n";
|
||||
}
|
||||
} else
|
||||
#endif
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue