From 080982ebf320862f2da005550bf1da4a2c1c0aab Mon Sep 17 00:00:00 2001 From: ochafik Date: Sun, 27 Oct 2024 16:39:51 +0000 Subject: [PATCH] `tool-call`: test MistralNemo in forced tools server tests (w/ parallel tool calls disabled) --- common/json-schema-to-grammar.cpp | 2 +- common/tool-call.cpp | 40 +++++++++++-------- examples/server/tests/features/steps/steps.py | 19 +++++++++ .../server/tests/features/tool_call.feature | 25 +++++++----- 4 files changed, 57 insertions(+), 29 deletions(-) diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp index e759b31e5..351caf6d9 100644 --- a/common/json-schema-to-grammar.cpp +++ b/common/json-schema-to-grammar.cpp @@ -1047,7 +1047,7 @@ std::string build_grammar(const std::function() : arguments.dump(), + tool_call["id"], + }); + } + }; if (content_end != std::string::npos) { tc_start = content_end + 12; + result.content = input.substr(0, content_end); + auto tool_calls = json::parse(input.substr(tc_start)); + process_tool_calls(tool_calls); } else { // Somehow not getting [TOOL_CALLS] in the output. Oh well, just do without it. - content_end = input.find("[{\""); - if (content_end == std::string::npos || content_end > 0) { - return {input, {}}; + try { + auto tool_calls = json::parse(input); + process_tool_calls(tool_calls); + } catch (const json::exception & e) { + throw std::runtime_error("Failed to parse tool calls: " + std::string(e.what()) + ":\n" + input); } - tc_start = content_end; - } - llama_tool_calls result; - result.content = input.substr(0, content_end); - auto tool_calls = json::parse(input.substr(tc_start)); - for (const auto & tool_call : tool_calls) { - const auto & arguments = tool_call["arguments"]; - result.tool_calls.push_back({ - tool_call["name"], - arguments.is_string() ? arguments.get() : arguments.dump(), - tool_call["id"], - }); } return result; } @@ -403,7 +408,7 @@ llama_tool_call_handler llama_tool_call_handler_init( } : tool_call; handler.grammar = build_grammar([&](const llama_grammar_builder & builder) { - builder.add_schema("", schema); + builder.add_schema("root", schema); }); // TODO: add schema to system prompt. auto tweaked_messages = add_system( @@ -450,11 +455,12 @@ llama_tool_call_handler llama_tool_call_handler_init( if (!parallel) { schema["maxItems"] = 1; } - builder.add_schema("", schema); + builder.add_schema("root", schema); }); if (allow_content) { handler.grammar_trigger_words.push_back("[TOOL_CALLS]"); handler.grammar_trigger_words.push_back("[{\""); + handler.grammar_trigger_words.push_back("[ { \""); } auto tweaked_messages = add_system(messages, "Prefix any tool calls with [TOOL_CALLS]"); handler.prompt = tmpl.apply(tweaked_messages, tools, /* add_generation_prompt= */ true); diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py index aa70c46d3..edeb52c31 100644 --- a/examples/server/tests/features/steps/steps.py +++ b/examples/server/tests/features/steps/steps.py @@ -78,6 +78,7 @@ def step_server_config(context, server_fqdn: str, server_port: str): context.response_format = None context.tools = None context.tool_choice = None + context.parallel_tool_calls = None context.temperature = None context.lora_file = None context.disable_ctx_shift = False @@ -393,6 +394,17 @@ def step_tools(context, tools): def step_tool_choice(context, tool_choice): context.tool_choice = tool_choice +@step('parallel tool calls is {enable_parallel_tool_calls}') +def step_parallel_tool_calls(context, enable_parallel_tool_calls): + if enable_parallel_tool_calls == 'enabled': + context.parallel_tool_calls = True + elif enable_parallel_tool_calls == 'disabled': + context.parallel_tool_calls = False + elif enable_parallel_tool_calls == '': + context.parallel_tool_calls = None + else: + raise ValueError(f"invalid value for enable_parallel_tool_calls: {enable_parallel_tool_calls}") + @step('{temperature:f} temperature') def step_temperature(context, temperature): context.temperature = temperature @@ -541,6 +553,7 @@ async def step_oai_chat_completions(context, api_error): if hasattr(context, 'tools') else None, tool_choice=context.tool_choice, + parallel_tool_calls=context.parallel_tool_calls, user_api_key=context.user_api_key if hasattr(context, 'user_api_key') else None, @@ -615,6 +628,7 @@ async def step_oai_chat_completions(context): tools=context.tools if hasattr(context, 'tools') else None, tool_choice=context.tool_choice, + parallel_tool_calls=context.parallel_tool_calls, user_api_key=context.user_api_key if hasattr(context, 'user_api_key') else None) @@ -638,6 +652,7 @@ async def step_oai_chat_completions(context): # if hasattr(context, 'response_format') else None, tools=context.tools,# if hasattr(context, 'tools') else None, tool_choice=context.tool_choice, # if hasattr(context, 'tool_choice') else None, + parallel_tool_calls=context.parallel_tool_calls, user_api_key=context.user_api_key) # if hasattr(context, 'user_api_key') else None) @@ -1099,6 +1114,7 @@ async def oai_chat_completions(user_prompt, response_format=None, tools=None, tool_choice=None, + parallel_tool_calls=None, user_api_key=None, expect_api_error=None) -> int | dict[str, Any]: if debug: @@ -1133,6 +1149,8 @@ async def oai_chat_completions(user_prompt, payload['tools'] = tools if tool_choice is not None: payload['tool_choice'] = tool_choice + if parallel_tool_calls is not None: + payload['parallel_tool_calls'] = parallel_tool_calls completion_response = { 'content': '', 'timings': { @@ -1199,6 +1217,7 @@ async def oai_chat_completions(user_prompt, response_format=payload.get('response_format') or openai.NOT_GIVEN, tools=payload.get('tools') or openai.NOT_GIVEN, tool_choice=payload.get('tool_choice') or openai.NOT_GIVEN, + parallel_tool_calls=payload.get('parallel_tool_calls', openai.NOT_GIVEN), seed=seed, temperature=payload['temperature'] ) diff --git a/examples/server/tests/features/tool_call.feature b/examples/server/tests/features/tool_call.feature index 8aa742eb2..5a59ae67c 100644 --- a/examples/server/tests/features/tool_call.feature +++ b/examples/server/tests/features/tool_call.feature @@ -16,7 +16,7 @@ Feature: llama.cpp server And jinja templates are enabled - Scenario Outline: OAI Compatibility w/ tools and required tool_choice + Scenario Outline: OAI Compatibility w/ tools and required tool_choice ( template, tool) Given a chat template file ../../../tests/chat/templates/.jinja And the server is starting And the server is healthy @@ -25,22 +25,25 @@ Feature: llama.cpp server And a user prompt write a hello world in python And a tool choice required And tools + And parallel tool calls is And an OAI compatible chat completions request with no api error Then tool is called with arguments Examples: Prompts - | template_name | n_predict | tool_name | tool_arguments | tools | - | meetkai-functionary-medium-v3.1 | 128 | test | {} | [{"type":"function", "function": {"name": "test", "description": "", "parameters": {"type": "object", "properties": {}}}}] | - | meetkai-functionary-medium-v3.1 | 128 | ipython | {"code": "Yes, you can."} | [{"type":"function", "function": {"name": "ipython", "description": "", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": ""}}, "required": ["code"]}}}] | - | meetkai-functionary-medium-v3.2 | 128 | test | {} | [{"type":"function", "function": {"name": "test", "description": "", "parameters": {"type": "object", "properties": {}}}}] | - | meetkai-functionary-medium-v3.2 | 128 | ipython | {"code": "Yes,"} | [{"type":"function", "function": {"name": "ipython", "description": "", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": ""}}, "required": ["code"]}}}] | - | meta-llama-Meta-Llama-3.1-8B-Instruct | 64 | test | {} | [{"type":"function", "function": {"name": "test", "description": "", "parameters": {"type": "object", "properties": {}}}}] | - | meta-llama-Meta-Llama-3.1-8B-Instruct | 64 | ipython | {"code": "it and realed at the otter. Asked Dave Dasty, Daisy is a big, shiny blue. As"} | [{"type":"function", "function": {"name": "ipython", "description": "", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": ""}}, "required": ["code"]}}}] | - | meta-llama-Llama-3.2-3B-Instruct | 64 | test | {} | [{"type":"function", "function": {"name": "test", "description": "", "parameters": {"type": "object", "properties": {}}}}] | - | meta-llama-Llama-3.2-3B-Instruct | 64 | ipython | {"code": "Yes,"} | [{"type":"function", "function": {"name": "ipython", "description": "", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": ""}}, "required": ["code"]}}}] | + | template_name | n_predict | tool_name | tool_arguments | tools | parallel_tool_calls | + | meetkai-functionary-medium-v3.1 | 128 | test | {} | [{"type":"function", "function": {"name": "test", "description": "", "parameters": {"type": "object", "properties": {}}}}] | disabled | + | meetkai-functionary-medium-v3.1 | 128 | ipython | {"code": "Yes, you can."} | [{"type":"function", "function": {"name": "ipython", "description": "", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": ""}}, "required": ["code"]}}}] | disabled | + | meetkai-functionary-medium-v3.2 | 128 | test | {} | [{"type":"function", "function": {"name": "test", "description": "", "parameters": {"type": "object", "properties": {}}}}] | disabled | + | meetkai-functionary-medium-v3.2 | 128 | ipython | {"code": "Yes,"} | [{"type":"function", "function": {"name": "ipython", "description": "", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": ""}}, "required": ["code"]}}}] | disabled | + | meta-llama-Meta-Llama-3.1-8B-Instruct | 64 | test | {} | [{"type":"function", "function": {"name": "test", "description": "", "parameters": {"type": "object", "properties": {}}}}] | disabled | + | meta-llama-Meta-Llama-3.1-8B-Instruct | 64 | ipython | {"code": "it and realed at the otter. Asked Dave Dasty, Daisy is a big, shiny blue. As"} | [{"type":"function", "function": {"name": "ipython", "description": "", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": ""}}, "required": ["code"]}}}] | disabled | + | meta-llama-Llama-3.2-3B-Instruct | 64 | test | {} | [{"type":"function", "function": {"name": "test", "description": "", "parameters": {"type": "object", "properties": {}}}}] | disabled | + | meta-llama-Llama-3.2-3B-Instruct | 64 | ipython | {"code": "Yes,"} | [{"type":"function", "function": {"name": "ipython", "description": "", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": ""}}, "required": ["code"]}}}] | disabled | + | mistralai-Mistral-Nemo-Instruct-2407 | 128 | test | {} | [{"type":"function", "function": {"name": "test", "description": "", "parameters": {"type": "object", "properties": {}}}}] | disabled | + | mistralai-Mistral-Nemo-Instruct-2407 | 128 | ipython | {"code": "It's a small cable."} | [{"type":"function", "function": {"name": "ipython", "description": "", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": ""}}, "required": ["code"]}}}] | disabled | - Scenario Outline: OAI Compatibility w/ tools and auto tool_choice + Scenario Outline: OAI Compatibility w/ tools and auto tool_choice ( template) Given a chat template file ../../../tests/chat/templates/.jinja And the server is starting And the server is healthy