tool-call
: slow tool call integration tests
This commit is contained in:
parent
ec9f3b101b
commit
9a86ea79a2
4 changed files with 82 additions and 12 deletions
|
@ -864,7 +864,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
[](common_params & params) {
|
||||
params.warmup = false;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_MAIN}));
|
||||
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER}));
|
||||
add_opt(common_arg(
|
||||
{"--spm-infill"},
|
||||
string_format(
|
||||
|
|
|
@ -20,7 +20,7 @@ import aiohttp
|
|||
import numpy as np
|
||||
import openai
|
||||
from openai.types.chat import ChatCompletionChunk
|
||||
from behave import step # pyright: ignore[reportAttributeAccessIssue]
|
||||
from behave import register_type, step # pyright: ignore[reportAttributeAccessIssue]
|
||||
from behave.api.async_step import async_run_until_complete
|
||||
from prometheus_client import parser
|
||||
|
||||
|
@ -28,6 +28,13 @@ from prometheus_client import parser
|
|||
|
||||
DEFAULT_TIMEOUT_SECONDS = aiohttp.ClientTimeout(total=600)
|
||||
|
||||
@parse.with_pattern(r".*")
|
||||
def parse_maybe_empty_string(text):
|
||||
return text.strip()
|
||||
|
||||
register_type(MaybeEmptyString=parse_maybe_empty_string)
|
||||
|
||||
|
||||
@step("a server listening on {server_fqdn}:{server_port}")
|
||||
def step_server_config(context, server_fqdn: str, server_port: str):
|
||||
context.server_fqdn = server_fqdn
|
||||
|
@ -82,6 +89,7 @@ def step_server_config(context, server_fqdn: str, server_port: str):
|
|||
context.temperature = None
|
||||
context.lora_file = None
|
||||
context.disable_ctx_shift = False
|
||||
context.warmup = True
|
||||
context.use_jinja = False
|
||||
context.chat_template_file = None
|
||||
|
||||
|
@ -98,7 +106,6 @@ def step_server_config(context, server_fqdn: str, server_port: str):
|
|||
def step_download_hf_model(context, hf_file: str, hf_repo: str):
|
||||
context.model_hf_repo = hf_repo
|
||||
context.model_hf_file = hf_file
|
||||
context.model_file = os.path.basename(hf_file)
|
||||
|
||||
@step('a lora adapter file from {lora_file_url}')
|
||||
def step_download_lora_file(context, lora_file_url: str):
|
||||
|
@ -172,11 +179,23 @@ def step_use_jinja(context):
|
|||
context.use_jinja = True
|
||||
|
||||
|
||||
@step('no warmup')
|
||||
def step_no_warmup(context):
|
||||
context.warmup = False
|
||||
|
||||
|
||||
@step('a chat template file {file}')
|
||||
def step_use_jinja(context, file):
|
||||
def step_chat_template_file(context, file):
|
||||
context.chat_template_file = file
|
||||
|
||||
|
||||
@step('a test chat template file named {name:MaybeEmptyString}')
|
||||
def step_test_chat_template_file_named(context, name):
|
||||
name = name.strip()
|
||||
if name:
|
||||
context.chat_template_file = f'../../../tests/chat/templates/{name}.jinja'
|
||||
|
||||
|
||||
@step('using slot id {id_slot:d}')
|
||||
def step_id_slot(context, id_slot: int):
|
||||
context.id_slot = id_slot
|
||||
|
@ -390,6 +409,29 @@ def step_response_format(context, response_format):
|
|||
def step_tools(context, tools):
|
||||
context.tools = json.loads(tools)
|
||||
|
||||
|
||||
@step('python tool')
|
||||
def step_python_tool(context):
|
||||
if not context.tools:
|
||||
context.tools = []
|
||||
context.tools.append({
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "ipython",
|
||||
"description": "",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {
|
||||
"type": "string",
|
||||
"description": ""
|
||||
}
|
||||
},
|
||||
"required": ["code"]
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
@step('a tool choice {tool_choice}')
|
||||
def step_tool_choice(context, tool_choice):
|
||||
context.tool_choice = tool_choice
|
||||
|
@ -1552,6 +1594,8 @@ def start_server_background(context):
|
|||
server_args.extend(['--lora', context.lora_file])
|
||||
if context.disable_ctx_shift:
|
||||
server_args.extend(['--no-context-shift'])
|
||||
if not context.warmup:
|
||||
server_args.extend(['--no-warmup'])
|
||||
|
||||
args = [str(arg) for arg in [context.server_path, *server_args]]
|
||||
print(f"bench: starting server with: {' '.join(args)}")
|
||||
|
|
|
@ -4,20 +4,18 @@ Feature: llama.cpp server
|
|||
|
||||
Background: Server startup
|
||||
Given a server listening on localhost:8080
|
||||
And a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models
|
||||
And a model file test-model.gguf
|
||||
And a model alias tinyllama-2
|
||||
And BOS token is 1
|
||||
And 42 as server seed
|
||||
And 8192 KV cache size
|
||||
And 32 as batch size
|
||||
And 2 slots
|
||||
And 1 slots
|
||||
And prometheus compatible metrics exposed
|
||||
And jinja templates are enabled
|
||||
|
||||
|
||||
Scenario Outline: OAI Compatibility w/ tools and required tool_choice (<template_name> template, <tool_name> tool)
|
||||
Given a chat template file ../../../tests/chat/templates/<template_name>.jinja
|
||||
Given a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models
|
||||
And a test chat template file named <template_name>
|
||||
And the server is starting
|
||||
And the server is healthy
|
||||
And a model test
|
||||
|
@ -44,7 +42,8 @@ Feature: llama.cpp server
|
|||
|
||||
|
||||
Scenario Outline: OAI Compatibility w/ tools and auto tool_choice (<template_name> template)
|
||||
Given a chat template file ../../../tests/chat/templates/<template_name>.jinja
|
||||
Given a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models
|
||||
And a test chat template file named <template_name>
|
||||
And the server is starting
|
||||
And the server is healthy
|
||||
And a model test
|
||||
|
@ -62,7 +61,8 @@ Feature: llama.cpp server
|
|||
|
||||
|
||||
Scenario: OAI Compatibility w/ no tool
|
||||
Given a chat template file ../../../tests/chat/templates/meta-llama-Meta-Llama-3.1-8B-Instruct.jinja
|
||||
Given a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models
|
||||
And a chat template file ../../../tests/chat/templates/meta-llama-Meta-Llama-3.1-8B-Instruct.jinja
|
||||
And the server is starting
|
||||
And the server is healthy
|
||||
And a model test
|
||||
|
@ -73,3 +73,29 @@ Feature: llama.cpp server
|
|||
And an OAI compatible chat completions request with no api error
|
||||
Then no tool is called
|
||||
|
||||
|
||||
@slow
|
||||
Scenario Outline: OAI Compatibility w/ tools (<hf_repo> / <hf_file> with <template_override> template)
|
||||
Given a model file <hf_file> from HF repo <hf_repo>
|
||||
And a test chat template file named <template_override>
|
||||
And no warmup
|
||||
And the server is starting
|
||||
And the server is healthy
|
||||
And a model test
|
||||
And 256 max tokens to predict
|
||||
And a user prompt write a hello world in python (use single quotes for strings)
|
||||
And python tool
|
||||
And parallel tool calls is disabled
|
||||
And an OAI compatible chat completions request with no api error
|
||||
Then tool <tool_name> is called with arguments <tool_arguments>
|
||||
|
||||
Examples: Prompts
|
||||
| tool_name | tool_arguments | hf_repo | hf_file | template_override |
|
||||
| ipython | {"code": "print('Hello, world!')"} | NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF | Hermes-2-Pro-Llama-3-8B-Q8_0.gguf | NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use |
|
||||
| ipython | {"code": "print('Hello, World!')\n"} | bartowski/Mistral-Nemo-Instruct-2407-GGUF | Mistral-Nemo-Instruct-2407-Q8_0.gguf | mistralai-Mistral-Nemo-Instruct-2407 |
|
||||
| ipython | {"code": "print('Hello, World!'}"} | lmstudio-community/Llama-3.2-1B-Instruct-GGUF | Llama-3.2-1B-Instruct-Q4_K_M.gguf | meta-llama-Llama-3.2-3B-Instruct |
|
||||
| ipython | {"code": "print("} | lmstudio-community/Llama-3.2-3B-Instruct-GGUF | Llama-3.2-3B-Instruct-Q6_K.gguf | meta-llama-Llama-3.2-3B-Instruct |
|
||||
| ipython | {"code": "print("} | lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF | Meta-Llama-3.1-8B-Instruct-Q5_K_M.gguf | |
|
||||
| ipython | {"code": "print("} | lmstudio-community/Meta-Llama-3.1-70B-Instruct-GGUF | Meta-Llama-3.1-70B-Instruct-Q4_K_M.gguf | |
|
||||
# | ipython | {"code": "print('Hello, World!')"} | meetkai/functionary-small-v3.2-GGUF | functionary-small-v3.2.Q4_0.gguf | meetkai-functionary-medium-v3.2 |
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@ set -eu
|
|||
if [ $# -lt 1 ]
|
||||
then
|
||||
# Start @llama.cpp scenario
|
||||
behave --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp
|
||||
behave --summary --stop --no-capture --exclude 'issues|wrong_usages' --tags llama.cpp,-slow
|
||||
else
|
||||
behave "$@"
|
||||
fi
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue