tool-calls: migrate tests to pytest

This commit is contained in:
ochafik 2024-12-10 01:11:08 +00:00
parent 1f0b15799b
commit 93a5245b0e
8 changed files with 282 additions and 165 deletions

View file

@ -383,7 +383,11 @@ static nlohmann::ordered_json add_system(const nlohmann::ordered_json & messages
json messages_with_system = messages;
if (messages_with_system.size() > 0 && messages_with_system[0].at("role") == "system") {
messages_with_system.at(0).at("content") += ("\n" + system_prompt);
std::string existing_system = messages_with_system.at(0).at("content");
messages_with_system[0] = json {
{"role", "system"},
{"content", existing_system + "\n" + system_prompt},
};
} else {
messages_with_system.insert(messages_with_system.begin(), json {
{"role", "system"},

View file

@ -1,163 +0,0 @@
@llama.cpp
@server
Feature: llama.cpp server
Background: Server startup
Given a server listening on localhost:8080
And BOS token is 1
And 42 as server seed
And greedy sampling
And 8192 KV cache size
And 32 as batch size
And 1 slots
And prometheus compatible metrics exposed
And jinja templates are enabled
Scenario Outline: Template <template_name> + tinystories model w/ required tool_choice yields <tool_name> tool call
Given a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models
And a test chat template file named <template_name>
And the server is starting
And the server is healthy
And a model test
And <n_predict> max tokens to predict
And a user prompt say hello world with python
And a tool choice required
And <tool_name> tool
And parallel tool calls is <parallel_tool_calls>
And an OAI compatible chat completions request with no api error
Then tool <tool_name> is called with arguments <tool_arguments>
Examples: Prompts
| template_name | n_predict | tool_name | tool_arguments | parallel_tool_calls |
| meetkai-functionary-medium-v3.1 | 32 | test | {} | disabled |
| meetkai-functionary-medium-v3.1 | 32 | python | {"code": ". She was so excited to go to the park and s"} | disabled |
| meetkai-functionary-medium-v3.2 | 32 | test | {} | disabled |
| meetkai-functionary-medium-v3.2 | 32 | python | {"code": "Yes,"} | disabled |
| NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use | 128 | test | {} | disabled |
| NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use | 128 | python | {"code": "Yes,"} | disabled |
| NousResearch-Hermes-3-Llama-3.1-8B-tool_use | 128 | test | {} | disabled |
| NousResearch-Hermes-3-Llama-3.1-8B-tool_use | 128 | python | {"code": "Yes,"} | disabled |
| meta-llama-Meta-Llama-3.1-8B-Instruct | 128 | test | {} | disabled |
| meta-llama-Meta-Llama-3.1-8B-Instruct | 128 | python | {"code": "It's a shark."} | disabled |
| meta-llama-Llama-3.2-3B-Instruct | 128 | test | {} | disabled |
| meta-llama-Llama-3.2-3B-Instruct | 128 | python | {"code": "It's a shark."} | disabled |
| mistralai-Mistral-Nemo-Instruct-2407 | 128 | test | {} | disabled |
| mistralai-Mistral-Nemo-Instruct-2407 | 128 | python | {"code": "It's a small cost."} | disabled |
Scenario Outline: Template <template_name> + tinystories model yields no tool call
Given a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models
And a test chat template file named <template_name>
And the server is starting
And the server is healthy
And a model test
And <n_predict> max tokens to predict
And a user prompt say hello world with python
And tools [{"type":"function", "function": {"name": "test", "description": "", "parameters": {"type": "object", "properties": {}}}}]
And an OAI compatible chat completions request with no api error
Then no tool is called
Examples: Prompts
| template_name | n_predict |
| meta-llama-Meta-Llama-3.1-8B-Instruct | 64 |
| meetkai-functionary-medium-v3.1 | 128 |
| meetkai-functionary-medium-v3.2 | 128 |
Scenario: Tool call template + tinystories and no tool won't call any tool
Given a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models
And a test chat template file named meta-llama-Meta-Llama-3.1-8B-Instruct
And the server is starting
And the server is healthy
And a model test
And 16 max tokens to predict
And a user prompt say hello world with python
And tools []
And an OAI compatible chat completions request with no api error
Then no tool is called
@slow
Scenario Outline: Python hello world w/ <hf_repo> + <tool> tool yields python call
Given a model file <hf_file> from HF repo <hf_repo>
And a test chat template file named <template_override>
And no warmup
And the server is starting
And the server is healthy
And a model test
And 256 max tokens to predict
And a user prompt say hello world with python
And <tool> tool
And parallel tool calls is disabled
And an OAI compatible chat completions request with no api error
Then tool python is called with arguments <tool_arguments>
Examples: Prompts
| tool | tool_arguments | hf_repo | hf_file | template_override |
| python | {"code": "print('Hello, world!')"} | bartowski/gemma-2-2b-it-GGUF | gemma-2-2b-it-Q4_K_M.gguf | |
| python | {"code": "print('Hello, World!')"} | bartowski/Mistral-Nemo-Instruct-2407-GGUF | Mistral-Nemo-Instruct-2407-Q4_K_M.gguf | |
| python | {"code": "print(\"Hello World\")"} | bartowski/Qwen2.5-7B-Instruct-GGUF | Qwen2.5-7B-Instruct-Q4_K_M.gguf | |
| python | {"code": "print('Hello, World!')"} | bartowski/Phi-3.5-mini-instruct-GGUF | Phi-3.5-mini-instruct-Q4_K_M.gguf | |
| python | {"code": "print('Hello, world!')"} | NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF | Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf | NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use |
| python | {"code": "print('hello world')"} | NousResearch/Hermes-3-Llama-3.1-8B-GGUF | Hermes-3-Llama-3.1-8B.Q4_K_M.gguf | NousResearch-Hermes-3-Llama-3.1-8B-tool_use |
| python | {"code": "print('Hello, World!'}"} | bartowski/Llama-3.2-1B-Instruct-GGUF | Llama-3.2-1B-Instruct-Q4_K_M.gguf | meta-llama-Llama-3.2-3B-Instruct |
| python | {"code": "print("} | bartowski/Llama-3.2-3B-Instruct-GGUF | Llama-3.2-3B-Instruct-Q4_K_M.gguf | meta-llama-Llama-3.2-3B-Instruct |
| python | {"code": "print("} | lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF | Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf | |
| python | {"code": "print('Hello, World!')"} | bartowski/functionary-small-v3.2-GGUF | functionary-small-v3.2-Q8_0.gguf | meetkai-functionary-medium-v3.2 |
| code_interpreter | {"code": "print('Hello, world!')"} | bartowski/gemma-2-2b-it-GGUF | gemma-2-2b-it-Q4_K_M.gguf | |
| code_interpreter | {"code": "print('Hello, World!')"} | bartowski/Mistral-Nemo-Instruct-2407-GGUF | Mistral-Nemo-Instruct-2407-Q4_K_M.gguf | mistralai-Mistral-Nemo-Instruct-2407 |
| code_interpreter | {"code": "print(\"Hello World\")"} | bartowski/Qwen2.5-7B-Instruct-GGUF | Qwen2.5-7B-Instruct-Q4_K_M.gguf | |
| code_interpreter | {"code": "print('Hello, World!')"} | bartowski/Phi-3.5-mini-instruct-GGUF | Phi-3.5-mini-instruct-Q4_K_M.gguf | |
| code_interpreter | {"code": "print('Hello, world!')"} | NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF | Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf | NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use |
| code_interpreter | {"code": "print('hello world')"} | NousResearch/Hermes-3-Llama-3.1-8B-GGUF | Hermes-3-Llama-3.1-8B.Q4_K_M.gguf | NousResearch-Hermes-3-Llama-3.1-8B-tool_use |
| code_interpreter | {"code": "print('Hello, World!'}"} | lmstudio-community/Llama-3.2-1B-Instruct-GGUF | Llama-3.2-1B-Instruct-Q4_K_M.gguf | meta-llama-Llama-3.2-3B-Instruct |
| code_interpreter | {"code": "print("} | lmstudio-community/Llama-3.2-3B-Instruct-GGUF | Llama-3.2-3B-Instruct-Q4_K_M.gguf | meta-llama-Llama-3.2-3B-Instruct |
| code_interpreter | {"code": "print("} | lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF | Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf | |
| code_interpreter | {"code": "print('Hello, World!')"} | bartowski/functionary-small-v3.2-GGUF | functionary-small-v3.2-Q8_0.gguf | meetkai-functionary-medium-v3.2 |
@slow
Scenario Outline: Python hello world w/o tools yields no tool call
Given a model file Phi-3.5-mini-instruct-Q4_K_M.gguf from HF repo bartowski/Phi-3.5-mini-instruct-GGUF
And no warmup
And the server is starting
And the server is healthy
And a model test
And 256 max tokens to predict
And a user prompt say hello world with python
And parallel tool calls is disabled
And an OAI compatible chat completions request with no api error
Then no tool is called
@slow
Scenario Outline: Python hello world w/o none tool_choice yields no tool call
Given a model file Phi-3.5-mini-instruct-Q4_K_M.gguf from HF repo bartowski/Phi-3.5-mini-instruct-GGUF
And no warmup
And the server is starting
And the server is healthy
And a model test
And 256 max tokens to predict
And a user prompt say hello world with python
And a tool choice none
And python tool
And parallel tool calls is disabled
And an OAI compatible chat completions request with no api error
Then no tool is called
@slow
Scenario: Parallel tool calls
Given a model file Mistral-Nemo-Instruct-2407-Q4_K_M.gguf from HF repo bartowski/Mistral-Nemo-Instruct-2407-GGUF
And a test chat template file named mistralai-Mistral-Nemo-Instruct-2407
And no warmup
And the server is starting
And the server is healthy
And a model test
And 512 max tokens to predict
And a user prompt get the weather in paris and search for llama.cpp's latest commits (don't write comments in the code)
And python tool
And parallel tool calls is enabled
And an OAI compatible chat completions request with no api error
Then receiving the following tool calls: [{"arguments": {"code": "import requests\nresponse = requests.get('https://api.openweathermap.org/data/2.9/weather?q=Paris&appid=YOUR_API_KEY')\nprint(response.json())"}, "name": "ipython" , "id": "123456789"}, {"arguments": {"code": "!git log --oneline --after 2024-01-01 --before 2024-12-31 llama.cpp" }, "name": "ipython" , "id": "987654321"}]

View file

@ -0,0 +1,4 @@
[pytest]
markers =
slow: marks tests as slow (deselect with '-m "not slow"')
serial

View file

@ -4,7 +4,7 @@ set -eu
if [ $# -lt 1 ]
then
pytest -v -x
pytest -v -x -m "not slow"
else
pytest "$@"
fi

View file

@ -163,3 +163,159 @@ def test_chat_completion_with_timings_per_token():
assert "predicted_per_second" in data["timings"]
assert "predicted_n" in data["timings"]
assert data["timings"]["predicted_n"] <= 10
TEST_TOOL = {
"type":"function",
"function": {
"name": "test",
"description": "",
"parameters": {
"type": "object",
"properties": {}
}
}
}
PYTHON_TOOL = {
"type": "function",
"function": {
"name": "python",
"description": "Runs code in a Python interpreter and returns the result of the execution after 60 seconds.",
"parameters": {
"type": "object",
"properties": {
"code": {
"type": "string",
"description": "The code to run in the Python interpreter."
}
},
"required": ["code"]
}
}
}
CODE_INTEPRETER_TOOL = {
"type": "code_interpreter",
}
@pytest.mark.parametrize("template_name,n_predict,tool,expected_arguments", [
("meetkai-functionary-medium-v3.1", 32, TEST_TOOL, {} ),
("meetkai-functionary-medium-v3.1", 32, PYTHON_TOOL, {"code": ". She was so excited to go to the park and s"} ),
("meetkai-functionary-medium-v3.2", 32, TEST_TOOL, {} ),
("meetkai-functionary-medium-v3.2", 32, PYTHON_TOOL, {"code": "Yes,"} ),
("NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use", 128, TEST_TOOL, {} ),
("NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use", 128, PYTHON_TOOL, {"code": "Yes,"} ),
("NousResearch-Hermes-3-Llama-3.1-8B-tool_use", 128, TEST_TOOL, {} ),
("NousResearch-Hermes-3-Llama-3.1-8B-tool_use", 128, PYTHON_TOOL, {"code": "Yes,"} ),
("meta-llama-Meta-Llama-3.1-8B-Instruct", 128, TEST_TOOL, {} ),
("meta-llama-Meta-Llama-3.1-8B-Instruct", 128, PYTHON_TOOL, {"code": "It's a shark."} ),
("meta-llama-Llama-3.2-3B-Instruct", 128, TEST_TOOL, {} ),
("meta-llama-Llama-3.2-3B-Instruct", 128, PYTHON_TOOL, {"code": "It's a shark."} ),
("mistralai-Mistral-Nemo-Instruct-2407", 128, TEST_TOOL, {} ),
("mistralai-Mistral-Nemo-Instruct-2407", 128, PYTHON_TOOL, {"code": "It's a small cost."} ),
])
def test_completion_with_required_tool(template_name: str, n_predict: int, tool: dict, expected_arguments: dict):
global server
server.use_jinja = True
server.chat_template_file = f'../../../tests/chat/templates/{template_name}.jinja'
server.start()
res = server.make_request("POST", "/chat/completions", data={
"max_tokens": n_predict,
"messages": [
{"role": "system", "content": "You are a coding assistant."},
{"role": "user", "content": "Write an example"},
],
"tool_choice": tool["function"]["name"],
"tools": [tool],
})
assert res.status_code == 200, f"Expected status code 200, got {res.status_code}"
choice = res.body["choices"][0]
tool_calls = choice["message"].get("tool_calls")
assert tool_calls and len(tool_calls==1), f'Expected 1 tool call in {choice["message"]}'
tool_call = tool_calls[0]
assert tool["function"]["name"] == tool_call["function"]["name"]
actual_arguments = json.loads(tool_call["function"]["arguments"])
assert json.dumps(expected_arguments) == json.dumps(actual_arguments), f"tool arguments: {json.dumps(actual_arguments)}, expected: {json.dumps(expected_arguments)}"
@pytest.mark.parametrize("template_name,n_predict,tools,tool_choice", [
("meetkai-functionary-medium-v3.1", 32, [], None),
("meetkai-functionary-medium-v3.1", 32, [TEST_TOOL], None),
("meetkai-functionary-medium-v3.1", 32, [PYTHON_TOOL], 'none'),
("meetkai-functionary-medium-v3.2", 32, [], None),
("meetkai-functionary-medium-v3.2", 32, [TEST_TOOL], None),
("meetkai-functionary-medium-v3.2", 32, [PYTHON_TOOL], 'none'),
("meta-llama-Meta-Llama-3.1-8B-Instruct", 128, [], None),
("meta-llama-Meta-Llama-3.1-8B-Instruct", 128, [TEST_TOOL], None),
("meta-llama-Meta-Llama-3.1-8B-Instruct", 128, [PYTHON_TOOL], 'none'),
])
def test_completion_without_tool_call(template_name: str, n_predict: int, tools: list[dict], tool_choice: str | None):
global server
server.use_jinja = True
server.chat_template_file = f'../../../tests/chat/templates/{template_name}.jinja'
server.start()
res = server.make_request("POST", "/chat/completions", data={
"max_tokens": n_predict,
"messages": [
{"role": "system", "content": "You are a coding assistant."},
{"role": "user", "content": "say hello world with python"},
],
"tools": tools if tools else None,
"tool_choice": tool_choice,
})
assert res.status_code == 200, f"Expected status code 200, got {res.status_code}"
choice = res.body["choices"][0]
assert "tool_calls" not in choice["message"], f'Expected no tool call in {choice["message"]}'
@pytest.mark.slow
@pytest.mark.parametrize("tool,expected_arguments,hf_repo,hf_file,template_override", [
(PYTHON_TOOL, {"code": "print('Hello, world!')"}, "bartowski/gemma-2-2b-it-GGUF", "gemma-2-2b-it-Q4_K_M.gguf", None),
(PYTHON_TOOL, {"code": "print('Hello, World!')"}, "bartowski/Mistral-Nemo-Instruct-2407-GGUF", "Mistral-Nemo-Instruct-2407-Q4_K_M.gguf", None),
(PYTHON_TOOL, {"code": "print(\"Hello World\")"}, "bartowski/Qwen2.5-7B-Instruct-GGUF", "Qwen2.5-7B-Instruct-Q4_K_M.gguf", None),
(PYTHON_TOOL, {"code": "print('Hello, World!')"}, "bartowski/Phi-3.5-mini-instruct-GGUF", "Phi-3.5-mini-instruct-Q4_K_M.gguf", None),
(PYTHON_TOOL, {"code": "print('Hello, world!')"}, "NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF", "Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")),
(PYTHON_TOOL, {"code": "print('hello world')"}, "NousResearch/Hermes-3-Llama-3.1-8B-GGUF", "Hermes-3-Llama-3.1-8B.Q4_K_M.gguf", ("NousResearch-Hermes-3-Llama-3.1-8B", "tool_use")),
(PYTHON_TOOL, {"code": "print('Hello, World!')"}, "bartowski/Llama-3.2-1B-Instruct-GGUF", "Llama-3.2-1B-Instruct-Q4_K_M.gguf", ("meta-llama-Llama-3.2-3B-Instruct", None)),
(PYTHON_TOOL, {"code": "print("}, "bartowski/Llama-3.2-3B-Instruct-GGUF", "Llama-3.2-3B-Instruct-Q4_K_M.gguf", ("meta-llama-Llama-3.2-3B-Instruct", None)),
(PYTHON_TOOL, {"code": "print("}, "lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF", "Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf", None),
(PYTHON_TOOL, {"code": "print('Hello, World!')"}, "bartowski/functionary-small-v3.2-GGUF", "functionary-small-v3.2-Q8_0.gguf", ("meetkai-functionary-medium-v3.2", None)),
(CODE_INTEPRETER_TOOL, {"code": "print('Hello, world!')"}, "bartowski/gemma-2-2b-it-GGUF", "gemma-2-2b-it-Q4_K_M.gguf", None),
(CODE_INTEPRETER_TOOL, {"code": "print('Hello, World!')"}, "bartowski/Mistral-Nemo-Instruct-2407-GGUF", "Mistral-Nemo-Instruct-2407-Q4_K_M.gguf", ("mistralai-Mistral-Nemo-Instruct-2407", None)),
(CODE_INTEPRETER_TOOL, {"code": "print(\"Hello World\")"}, "bartowski/Qwen2.5-7B-Instruct-GGUF", "Qwen2.5-7B-Instruct-Q4_K_M.gguf", None),
(CODE_INTEPRETER_TOOL, {"code": "print('Hello, World!')"}, "bartowski/Phi-3.5-mini-instruct-GGUF", "Phi-3.5-mini-instruct-Q4_K_M.gguf", None),
(CODE_INTEPRETER_TOOL, {"code": "print('Hello, world!')"}, "NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF", "Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf", ("NousResearch-Hermes-2-Pro-Llama-3-8B", "tool_use")),
(CODE_INTEPRETER_TOOL, {"code": "print('hello world')"}, "NousResearch/Hermes-3-Llama-3.1-8B-GGUF", "Hermes-3-Llama-3.1-8B.Q4_K_M.gguf", ("NousResearch-Hermes-3-Llama-3.1-8B", "tool_use")),
(CODE_INTEPRETER_TOOL, {"code": "print('Hello, World!')"}, "lmstudio-community/Llama-3.2-1B-Instruct-GGUF", "Llama-3.2-1B-Instruct-Q4_K_M.gguf", ("meta-llama-Llama-3.2-3B-Instruct", None)),
(CODE_INTEPRETER_TOOL, {"code": "print("}, "lmstudio-community/Llama-3.2-3B-Instruct-GGUF", "Llama-3.2-3B-Instruct-Q4_K_M.gguf", ("meta-llama-Llama-3.2-3B-Instruct", None)),
(CODE_INTEPRETER_TOOL, {"code": "print("}, "lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF", "Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf", None),
(CODE_INTEPRETER_TOOL, {"code": "print('Hello, World!')"}, "bartowski/functionary-small-v3.2-GGUF", "functionary-small-v3.2-Q8_0.gguf", ("meetkai-functionary-medium-v3.2", None)),
])
def test_hello_world_tool_call(tool: dict, expected_arguments: dict, hf_repo: str, hf_file: str, template_override: Tuple[str, str | None] | None):
global server
server.use_jinja = True
server.model_hf_repo = hf_repo
server.model_hf_file = hf_file
if template_override:
(template_hf_repo, template_variant) = template_override
server.chat_template_file = f"../../../tests/chat/templates/{template_hf_repo.replace('/', '') + ('-' + template_variant if template_variant else '')}.jinja"
assert os.path.exists(server.chat_template_file), f"Template file {server.chat_template_file} does not exist. Run `python scripts/fetch_server_test_models.py {template_hf_repo} {template_variant}` to download the template."
server.start(timeout_seconds=15*60)
res = server.make_request("POST", "/chat/completions", data={
"max_tokens": 256,
"messages": [
{"role": "system", "content": "You are a coding assistant."},
{"role": "user", "content": "say hello world with python"},
],
"tools": [tool],
})
assert res.status_code == 200, f"Expected status code 200, got {res.status_code}"
choice = res.body["choices"][0]
tool_calls = choice["message"].get("tool_calls")
assert tool_calls and len(tool_calls==1), f'Expected 1 tool call in {choice["message"]}'
tool_call = tool_calls[0]
assert tool["function"]["name"] == tool_call["function"]["name"]
actual_arguments = json.loads(tool_call["function"]["arguments"])
assert json.dumps(expected_arguments) == json.dumps(actual_arguments), f"tool arguments: {json.dumps(actual_arguments)}, expected: {json.dumps(expected_arguments)}"

View file

@ -67,6 +67,8 @@ class ServerProcess:
draft: int | None = None
api_key: str | None = None
response_format: str | None = None
chat_template_file: str | None = None
use_jinja: bool | None = None
lora_files: List[str] | None = None
disable_ctx_shift: int | None = False
draft_min: int | None = None
@ -148,6 +150,10 @@ class ServerProcess:
if self.lora_files:
for lora_file in self.lora_files:
server_args.extend(["--lora", lora_file])
if self.chat_template_file:
server_args.extend(["--chat-template-file", self.chat_template_file])
if self.use_jinja:
server_args.append("--jinja")
if self.disable_ctx_shift:
server_args.extend(["--no-context-shift"])
if self.api_key:

View file

@ -0,0 +1,109 @@
{{- bos_token }}
{%- if custom_tools is defined %}
{%- set tools = custom_tools %}
{%- endif %}
{%- if not tools_in_user_message is defined %}
{%- set tools_in_user_message = true %}
{%- endif %}
{%- if not date_string is defined %}
{%- set date_string = "26 Jul 2024" %}
{%- endif %}
{%- if not tools is defined %}
{%- set tools = none %}
{%- endif %}
{#- This block extracts the system message, so we can slot it into the right place. #}
{%- if messages[0]['role'] == 'system' %}
{%- set system_message = messages[0]['content']|trim %}
{%- set messages = messages[1:] %}
{%- else %}
{%- set system_message = "" %}
{%- endif %}
{#- System message + builtin tools #}
{{- "<|start_header_id|>system<|end_header_id|>\n\n" }}
{%- if builtin_tools is defined or tools is not none %}
{{- "Environment: ipython\n" }}
{%- endif %}
{%- if builtin_tools is defined %}
{{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}}
{%- endif %}
{{- "Cutting Knowledge Date: December 2023\n" }}
{{- "Today Date: " + date_string + "\n\n" }}
{%- if tools is not none and not tools_in_user_message %}
{{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }}
{{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
{{- "Do not use variables.\n\n" }}
{%- for t in tools %}
{{- t | tojson(indent=4) }}
{{- "\n\n" }}
{%- endfor %}
{%- endif %}
{{- system_message }}
{{- "<|eot_id|>" }}
{#- Custom tools are passed in a user message with some extra guidance #}
{%- if tools_in_user_message and not tools is none %}
{#- Extract the first user message so we can plug it in here #}
{%- if messages | length != 0 %}
{%- set first_user_message = messages[0]['content']|trim %}
{%- set messages = messages[1:] %}
{%- else %}
{{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
{%- endif %}
{{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}
{{- "Given the following functions, please respond with a JSON for a function call " }}
{{- "with its proper arguments that best answers the given prompt.\n\n" }}
{{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
{{- "Do not use variables.\n\n" }}
{%- for t in tools %}
{{- t | tojson(indent=4) }}
{{- "\n\n" }}
{%- endfor %}
{{- first_user_message + "<|eot_id|>"}}
{%- endif %}
{%- for message in messages %}
{%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
{{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }}
{%- elif 'tool_calls' in message %}
{%- if not message.tool_calls|length == 1 %}
{{- raise_exception("This model only supports single tool-calls at once!") }}
{%- endif %}
{%- set tool_call = message.tool_calls[0].function %}
{%- if builtin_tools is defined and tool_call.name in builtin_tools %}
{{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
{{- "<|python_tag|>" + tool_call.name + ".call(" }}
{%- for arg_name, arg_val in tool_call.arguments | items %}
{{- arg_name + '="' + arg_val + '"' }}
{%- if not loop.last %}
{{- ", " }}
{%- endif %}
{%- endfor %}
{{- ")" }}
{%- else %}
{{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
{{- '{"name": "' + tool_call.name + '", ' }}
{{- '"parameters": ' }}
{{- tool_call.arguments | tojson }}
{{- "}" }}
{%- endif %}
{%- if builtin_tools is defined %}
{#- This means we're in ipython mode #}
{{- "<|eom_id|>" }}
{%- else %}
{{- "<|eot_id|>" }}
{%- endif %}
{%- elif message.role == "tool" or message.role == "ipython" %}
{{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }}
{%- if message.content is mapping or message.content is iterable %}
{{- message.content | tojson }}
{%- else %}
{{- message.content }}
{%- endif %}
{{- "<|eot_id|>" }}
{%- endif %}
{%- endfor %}
{%- if add_generation_prompt %}
{{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
{%- endif %}

View file

@ -414,6 +414,7 @@ static void test_grammars() {
test_template("tests/chat/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja", "<s>", "</s>", { "<|im_end|>" }, tool_call_message, tools);
test_template("tests/chat/templates/meta-llama-Meta-Llama-3.1-8B-Instruct.jinja", "<s>", "</s>", { "<|eom_id|>", "<|eot_id|>" }, tool_call_message, tools);
test_template("tests/chat/templates/meta-llama-Llama-3.2-3B-Instruct.jinja", "<s>", "</s>", { "<|eom_id|>", "<|eot_id|>" }, tool_call_message, tools);
test_template("tests/chat/templates/meta-llama-Llama-3.3-70B-Instruct.jinja", "<s>", "</s>", { "<|eom_id|>", "<|eot_id|>" }, tool_call_message, tools);
test_template("tests/chat/templates/meetkai-functionary-medium-v3.1.jinja", "<s>", "</s>", { "<|eom_id|>", "<|eot_id|>" }, tool_call_message, tools);
test_template("tests/chat/templates/meetkai-functionary-medium-v3.2.jinja", "<s>", "</s>", { "<|eom_id|>", "<|eot_id|>" }, tool_call_message, tools);
test_template("tests/chat/templates/fireworks-ai-llama-3-firefunction-v2.jinja", "<s>", "</s>", { "<|eot_id|>" }, tool_call_message, tools);