diff --git a/common/tool-call.cpp b/common/tool-call.cpp index f382a776d..559c6653b 100644 --- a/common/tool-call.cpp +++ b/common/tool-call.cpp @@ -57,6 +57,56 @@ static bool parse_json(std::string::const_iterator & it, const std::string::cons } } +/** + * Takes a prefix regex that must have 1 group to capture the function name, a closing suffix, and expects json parameters in between. + * Aggregates the prefix, suffix and in-between text into the content. + */ +static llama_tool_calls parse_json_tool_calls(const json & tools, const std::string& input, const std::regex & function_regex, const std::regex & close_regex, bool check_names) { + std::smatch match; + + llama_tool_calls result; + auto end = input.end(); + auto it = input.begin(); + + std::unordered_set tool_names; + if (check_names) { + for (const auto & tool : tools) { + if (tool.contains("type") && tool["type"] == "function") { + tool_names.insert(tool["function"]["name"]); + } + } + } + + while (it != end) { + std::sregex_iterator rend; + std::sregex_iterator rit(it, end, function_regex); + if (rit == rend) { + result.content += std::string(it, end); + break; + } + auto name = rit->str(1); + if (check_names && tool_names.find(name) == tool_names.end()) { + result.content += std::string(it, rit->suffix().first); + break; + } + + result.content += std::string(it, rit->prefix().second); + it = rit->suffix().first; + + + json arguments; + if (!parse_json(it, end, arguments)) { + throw std::runtime_error("Failed to parse json tool call arguments"); + } + if (!std::regex_search(it, end, match, close_regex)) { + throw std::runtime_error("Malformed input, missing closing pattern"); + } + it = match.suffix().first; + result.tool_calls.push_back({name, arguments.dump()}); + } + return result; +} + static llama_tool_calls parse_hermes_tool_calls(const std::string& input) { try { std::regex start_pattern(R"([\n\s]*)"); @@ -100,81 +150,21 @@ static llama_tool_calls parse_hermes_tool_calls(const std::string& input) { } } -static llama_tool_calls parse_llama_3_1_tool_calls(const json & tools, const std::string& input) { - static std::regex python_tag_regex(R"(<\|python_tag\|>([\s\S\n]*)$)"); - std::smatch match; - if (std::regex_search(input, match, python_tag_regex)) { - return { - match.prefix().str(), { - {"ipython", (json {{"code", match[1].str()}}).dump()}, - } - }; - } - try { - auto call = json::parse(input); - // Only treat JSON as a tool call if it has a name attribute that matches any of the tools specified in the request. - // There doesn't seem to be any better way to detect a tool call. - if (call.contains("name") && call["name"].is_string()) { - std::string name = call["name"]; - for (const auto & tool : tools) { - if (tool.at("function").at("name") == name) { - return { - "", - { - {name, call["parameters"].dump()}, - } - }; +static llama_tool_calls parse_llama_3_tool_calls(const json & tools, const std::string& input, bool allow_python_tag) { + if (allow_python_tag) { + static std::regex python_tag_regex(R"(<\|python_tag\|>([\s\S\n]*)$)"); + std::smatch match; + if (std::regex_search(input, match, python_tag_regex)) { + return { + match.prefix().str(), { + {"ipython", (json {{"code", match[1].str()}}).dump()}, } - } - } - } catch (const std::exception & e) { - // Do nothing - } - return {input, {}}; -} - -static llama_tool_calls parse_functionary_tool_calls(const json & tools, const std::string& input, const std::regex & function_regex, const std::regex & close_regex) { - std::smatch match; - - llama_tool_calls result; - auto end = input.end(); - auto it = input.begin(); - - std::unordered_set tool_names; - for (const auto & tool : tools) { - if (tool.contains("type") && tool["type"] == "function") { - tool_names.insert(tool["function"]["name"]); + }; } } - - while (it != end) { - std::sregex_iterator rend; - std::sregex_iterator rit(it, end, function_regex); - if (rit == rend) { - result.content += std::string(it, end); - break; - } - auto name = rit->str(1); - if (tool_names.find(name) == tool_names.end()) { - result.content += std::string(it, rit->suffix().first); - break; - } - - result.content += std::string(it, rit->prefix().second); - it = rit->suffix().first; - - - json arguments; - if (!parse_json(it, end, arguments)) { - throw std::runtime_error("Failed to parse json tool call arguments"); - } - if (!std::regex_search(it, end, match, close_regex)) { - throw std::runtime_error("Malformed input, missing closing pattern"); - } - it = match.suffix().first; - result.tool_calls.push_back({name, arguments.dump()}); - } - return result; + static std::regex function_regex("(?:^|\\n)\\{\"name\": \"([^\"]+)\", \"parameters\": "); + static std::regex close_regex("\\}"); + return parse_json_tool_calls(tools, input, function_regex, close_regex, /* check_names= */ false); } static llama_tool_calls parse_functionary_v3_llama_3_1_tool_calls(const json & tools, const std::string& input) { @@ -190,19 +180,21 @@ static llama_tool_calls parse_functionary_v3_llama_3_1_tool_calls(const json & t } static std::regex function_regex(R"()"); static std::regex close_regex(R"()"); - return parse_functionary_tool_calls(tools, input, function_regex, close_regex); + return parse_json_tool_calls(tools, input, function_regex, close_regex, /* check_names= */ false); } static llama_tool_calls parse_functionary_v3_tool_calls(const json & tools, const std::string& input) { static std::regex function_regex(R"((?:>>>)?(\w+)\n)"); - static std::regex close_regex(R"($|\n(?=>>>))"); - return parse_functionary_tool_calls(tools, input, function_regex, close_regex); + static std::regex close_regex(R"($|(?=>>>))"); + return parse_json_tool_calls(tools, input, function_regex, close_regex, /* check_names= */ true); } llama_tool_calls parse_tool_calls(llama_tool_call_style style, const json & tools, const std::string& input) { switch (style) { case llama_tool_call_style::Llama31: - return parse_llama_3_1_tool_calls(tools, input); + return parse_llama_3_tool_calls(tools, input, /* parse_llama_3_tool_calls= */ true); + case llama_tool_call_style::Llama32: + return parse_llama_3_tool_calls(tools, input, /* parse_llama_3_tool_calls= */ false); case llama_tool_call_style::FunctionaryV3Llama3: return parse_functionary_v3_tool_calls(tools, input); case llama_tool_call_style::FunctionaryV3Llama31: @@ -224,9 +216,19 @@ llama_tool_call_handler llama_tool_call_handler_init( llama_tool_call_handler handler; switch (tmpl.tool_call_style()) { - case llama_tool_call_style::Llama31: { + case llama_tool_call_style::Llama31: + case llama_tool_call_style::Llama32: { + static auto builtin_tools = json {"wolfram_alpha", "brave_search"}; + + auto uses_python_tag = tmpl.tool_call_style() == llama_tool_call_style::Llama31; + + // Technically we should only trigger on `"\n{\"name\": \"" + name + "\""` for each tool name, + // but Llama-3.2-3B struggles to output valid tool calls so we're "guiding" it strongly as soon + // as it seems to be outputting some JSON. + // TODO: make this conditional on a very small model (e.g. 1B / 3B). + auto eagerly_match_any_json = true; + handler.grammar = build_grammar([&](const llama_grammar_builder & builder) { - static std::vector builtin_tools {"wolfram_alpha", "brave_search"}; std::vector tool_rules; for (const auto & tool : tools) { @@ -234,7 +236,7 @@ llama_tool_call_handler llama_tool_call_handler_init( std::string name = function["name"]; auto parameters = function["parameters"]; builder.resolve_refs(parameters); - if (name == "ipython" || std::find(builtin_tools.begin(), builtin_tools.end(), name) != builtin_tools.end()) { + if (uses_python_tag && (name == "ipython" || builtin_tools.contains(name))) { tool_rules.push_back(builder.add_rule("ipython-call", "\"<|python_tag|>\" .*")); if (allow_content) { handler.grammar_trigger_words.push_back("<|python_tag|>"); @@ -244,15 +246,20 @@ llama_tool_call_handler llama_tool_call_handler_init( tool_rules.push_back( builder.add_rule( name + "-call", - "\"\\n{\\\"name\\\": \\\"" + name + "\\\", \\\"parameters\\\": \" " + + "\"\\n\"? \"{\\\"name\\\": \\\"" + name + "\\\", \\\"parameters\\\": \" " + builder.add_schema(name + "-args", parameters) + " \"}\"")); - if (allow_content) { + if (allow_content && !eagerly_match_any_json) { handler.grammar_trigger_words.push_back("\n{\"name\": \"" + name + "\""); } } } + if (allow_content && eagerly_match_any_json) { + handler.grammar_trigger_words.push_back("\n{\""); + handler.grammar_trigger_words.push_back("{\""); + } + builder.add_rule("root", join(tool_rules.begin(), tool_rules.end(), " | ")); }); handler.additional_stop_words.push_back("<|eom_id|>"); @@ -274,7 +281,7 @@ llama_tool_call_handler llama_tool_call_handler_init( auto parameters = function["parameters"]; auto args_rule = builder.add_schema(name + "-args", parameters); first_tool_rules.push_back(builder.add_rule(name + "-call", "\"" + name + "\\n\" " + args_rule)); - subsequent_tool_rules.push_back(builder.add_rule(name + "-call2", "\"\\n>>>" + name + "\\n\" " + args_rule)); + subsequent_tool_rules.push_back(builder.add_rule(name + "-call2", "\">>>" + name + "\\n\" " + args_rule)); if (allow_content) { handler.grammar_trigger_words.push_back(name + "\n"); handler.grammar_trigger_words.push_back(">>>" + name + "\n"); diff --git a/examples/agent/README.md b/examples/agent/README.md index 1b8a318ea..45b159815 100644 --- a/examples/agent/README.md +++ b/examples/agent/README.md @@ -2,42 +2,47 @@ - Install prerequisite: [uv](https://docs.astral.sh/uv/) (used to simplify python deps) -- Run `llama-server` w/ jinja templates: +- Run `llama-server` w/ jinja templates. Note that most models need a template override (the HF to GGUF conversion only retains a single `chat_template`, but sometimes the models only support tool calls in an alternative chat template). ```bash make -j LLAMA_CURL=1 llama-server - ./llama-server \ - --jinja -fa \ - -mu https://huggingface.co/lmstudio-community/Meta-Llama-3.1-70B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-70B-Instruct-Q4_K_M.gguf - ``` -
- Instructions for NousResearch/Hermes-2-Pro-Llama-3-8B (needs template override) - - The HF model had two variants for its chat template (`default` and `tool_use`), but the GGUF only retained the `default` one. - - ```bash - ./llama-server \ - --jinja -fa \ - -mu https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/resolve/main/Hermes-2-Pro-Llama-3-8B-Q8_0.gguf \ + # Nous Hermes 2 Pro Llama 3 8B + ./llama-server --jinja -fa --verbose \ + -hfr NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF -hff Hermes-2-Pro-Llama-3-8B-Q8_0.gguf \ --chat-template-file tests/chat/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja - ``` -` -
-
- Instructions for meekai/functionary-small-v3.2 (needs template override) + # Llama 3.1 8B + ./llama-server --jinja -fa --verbose \ + -hfr lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF -hff Meta-Llama-3.1-8B-Instruct-Q5_K_M.gguf - The template in the GGUF doesn't support tool calls, but its bigger brother's template can be used: - - ```bash - ./llama-server \ - --jinja -fa \ - -mu https://huggingface.co/meetkai/functionary-small-v3.2-GGUF/resolve/main/functionary-small-v3.2.Q4_0.gguf \ + # functionary-small-v3 + ./llama-server --jinja -fa --verbose \ + -hfr meetkai/functionary-small-v3.2-GGUF -hff functionary-small-v3.2.Q4_0.gguf \ --chat-template-file tests/chat/templates/meetkai-functionary-medium-v3.2.jinja - ``` -
+ ./llama-server --jinja -fa --verbose \ + -m ~/Downloads/functionary-small-v3.2.Q4_0.gguf \ + --chat-template-file tests/chat/templates/meetkai-functionary-medium-v3.2.jinja + + # Llama 3.2 3B (poor adherence) + ./llama-server --jinja -fa --verbose \ + -hfr lmstudio-community/Llama-3.2-3B-Instruct-GGUF -hff Llama-3.2-3B-Instruct-Q6_K_L.gguf \ + --chat-template-file tests/chat/templates/meta-llama-Llama-3.2-3B-Instruct.jinja + + ./llama-server --jinja -fa --verbose \ + -m ~/Downloads/Llama-3.2-3B-Instruct-Q6_K_L.gguf \ + --chat-template-file tests/chat/templates/meta-llama-Llama-3.2-3B-Instruct.jinja + + # Llama 3.2 1B (very poor adherence) + ./llama-server --jinja -fa --verbose \ + -hfr lmstudio-community/Llama-3.2-1B-Instruct-GGUF -hff Llama-3.2-1B-Instruct-Q4_K_M.gguf \ + --chat-template-file tests/chat/templates/meta-llama-Llama-3.2-3B-Instruct.jinja + + # Llama 3.1 70B (untested) + ./llama-server --jinja -fa --verbose \ + -hfr lmstudio-community/Meta-Llama-3.1-70B-Instruct-GGUF -hff Meta-Llama-3.1-70B-Instruct-Q4_K_M.gguf + ``` - Run some tools inside a docker container (check http://localhost:8088/docs once running): @@ -57,3 +62,7 @@ --tool-endpoint http://localhost:8088 \ --goal "What is the sum of 2535 squared and 32222000403?" ``` + +## TODO + +- Implement code_interpreter using whichever tools are builtin for a given model. diff --git a/examples/server/tests/features/tool_call.feature b/examples/server/tests/features/tool_call.feature index ae5326dd5..8aa742eb2 100644 --- a/examples/server/tests/features/tool_call.feature +++ b/examples/server/tests/features/tool_call.feature @@ -35,7 +35,9 @@ Feature: llama.cpp server | meetkai-functionary-medium-v3.2 | 128 | test | {} | [{"type":"function", "function": {"name": "test", "description": "", "parameters": {"type": "object", "properties": {}}}}] | | meetkai-functionary-medium-v3.2 | 128 | ipython | {"code": "Yes,"} | [{"type":"function", "function": {"name": "ipython", "description": "", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": ""}}, "required": ["code"]}}}] | | meta-llama-Meta-Llama-3.1-8B-Instruct | 64 | test | {} | [{"type":"function", "function": {"name": "test", "description": "", "parameters": {"type": "object", "properties": {}}}}] | - | meta-llama-Meta-Llama-3.1-8B-Instruct | 16 | ipython | {"code": "it and "} | [{"type":"function", "function": {"name": "ipython", "description": "", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": ""}}, "required": ["code"]}}}] | + | meta-llama-Meta-Llama-3.1-8B-Instruct | 64 | ipython | {"code": "it and realed at the otter. Asked Dave Dasty, Daisy is a big, shiny blue. As"} | [{"type":"function", "function": {"name": "ipython", "description": "", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": ""}}, "required": ["code"]}}}] | + | meta-llama-Llama-3.2-3B-Instruct | 64 | test | {} | [{"type":"function", "function": {"name": "test", "description": "", "parameters": {"type": "object", "properties": {}}}}] | + | meta-llama-Llama-3.2-3B-Instruct | 64 | ipython | {"code": "Yes,"} | [{"type":"function", "function": {"name": "ipython", "description": "", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": ""}}, "required": ["code"]}}}] | Scenario Outline: OAI Compatibility w/ tools and auto tool_choice