json: add server tests for OAI JSON response_format
This commit is contained in:
parent
3e1bf44e5e
commit
edbd2e9862
4 changed files with 53 additions and 3 deletions
|
@ -37,6 +37,22 @@ Feature: Security
|
||||||
| llama.cpp | no |
|
| llama.cpp | no |
|
||||||
| hackme | raised |
|
| hackme | raised |
|
||||||
|
|
||||||
|
Scenario Outline: OAI Compatibility (invalid response formats)
|
||||||
|
Given a system prompt test
|
||||||
|
And a user prompt test
|
||||||
|
And a response format <response_format>
|
||||||
|
And a model test
|
||||||
|
And 2 max tokens to predict
|
||||||
|
And streaming is disabled
|
||||||
|
Given an OAI compatible chat completions request with raised api error
|
||||||
|
|
||||||
|
Examples: Prompts
|
||||||
|
| response_format |
|
||||||
|
| {"type": "sound"} |
|
||||||
|
| {"type": "json_object", "schema": 123} |
|
||||||
|
| {"type": "json_object", "schema": {"type": 123}} |
|
||||||
|
| {"type": "json_object", "schema": {"type": "hiccup"}} |
|
||||||
|
|
||||||
|
|
||||||
Scenario Outline: CORS Options
|
Scenario Outline: CORS Options
|
||||||
Given a user api key llama.cpp
|
Given a user api key llama.cpp
|
||||||
|
|
|
@ -69,6 +69,22 @@ Feature: llama.cpp server
|
||||||
| codellama70b | You are a coding assistant. | Write the fibonacci function in c++. | 128 | (thanks\|happy\|bird)+ | -1 | 64 | enabled | |
|
| codellama70b | You are a coding assistant. | Write the fibonacci function in c++. | 128 | (thanks\|happy\|bird)+ | -1 | 64 | enabled | |
|
||||||
|
|
||||||
|
|
||||||
|
Scenario Outline: OAI Compatibility w/ response format
|
||||||
|
Given a model test
|
||||||
|
And a system prompt test
|
||||||
|
And a user prompt test
|
||||||
|
And a response format <response_format>
|
||||||
|
And 10 max tokens to predict
|
||||||
|
Given an OAI compatible chat completions request with no api error
|
||||||
|
Then <n_predicted> tokens are predicted matching <re_content>
|
||||||
|
|
||||||
|
Examples: Prompts
|
||||||
|
| response_format | n_predicted | re_content |
|
||||||
|
| {"type": "json_object", "schema": {"const": "42"}} | 5 | "42" |
|
||||||
|
| {"type": "json_object", "schema": {"items": [{"type": "integer"}]}} | 10 | \[ -300 \] |
|
||||||
|
| {"type": "json_object"} | 10 | \{ " said the other an |
|
||||||
|
|
||||||
|
|
||||||
Scenario: Tokenize / Detokenize
|
Scenario: Tokenize / Detokenize
|
||||||
When tokenizing:
|
When tokenizing:
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -52,6 +52,7 @@ def step_server_config(context, server_fqdn, server_port):
|
||||||
context.seed = None
|
context.seed = None
|
||||||
context.server_seed = None
|
context.server_seed = None
|
||||||
context.user_api_key = None
|
context.user_api_key = None
|
||||||
|
context.response_format = None
|
||||||
|
|
||||||
context.tasks_result = []
|
context.tasks_result = []
|
||||||
context.concurrent_tasks = []
|
context.concurrent_tasks = []
|
||||||
|
@ -248,6 +249,11 @@ def step_max_tokens(context, max_tokens):
|
||||||
context.n_predict = max_tokens
|
context.n_predict = max_tokens
|
||||||
|
|
||||||
|
|
||||||
|
@step('a response format {response_format}')
|
||||||
|
def step_response_format(context, response_format):
|
||||||
|
context.response_format = json.loads(response_format)
|
||||||
|
|
||||||
|
|
||||||
@step('streaming is {enable_streaming}')
|
@step('streaming is {enable_streaming}')
|
||||||
def step_streaming(context, enable_streaming):
|
def step_streaming(context, enable_streaming):
|
||||||
context.enable_streaming = enable_streaming == 'enabled'
|
context.enable_streaming = enable_streaming == 'enabled'
|
||||||
|
@ -363,6 +369,9 @@ async def step_oai_chat_completions(context, api_error):
|
||||||
enable_streaming=context.enable_streaming
|
enable_streaming=context.enable_streaming
|
||||||
if hasattr(context, 'enable_streaming') else None,
|
if hasattr(context, 'enable_streaming') else None,
|
||||||
|
|
||||||
|
response_format=context.response_format
|
||||||
|
if hasattr(context, 'response_format') else None,
|
||||||
|
|
||||||
seed=await completions_seed(context),
|
seed=await completions_seed(context),
|
||||||
|
|
||||||
user_api_key=context.user_api_key
|
user_api_key=context.user_api_key
|
||||||
|
@ -422,6 +431,8 @@ async def step_oai_chat_completions(context):
|
||||||
if hasattr(context, 'n_predict') else None,
|
if hasattr(context, 'n_predict') else None,
|
||||||
enable_streaming=context.enable_streaming
|
enable_streaming=context.enable_streaming
|
||||||
if hasattr(context, 'enable_streaming') else None,
|
if hasattr(context, 'enable_streaming') else None,
|
||||||
|
response_format=context.response_format
|
||||||
|
if hasattr(context, 'response_format') else None,
|
||||||
seed=await completions_seed(context),
|
seed=await completions_seed(context),
|
||||||
user_api_key=context.user_api_key
|
user_api_key=context.user_api_key
|
||||||
if hasattr(context, 'user_api_key') else None)
|
if hasattr(context, 'user_api_key') else None)
|
||||||
|
@ -442,6 +453,8 @@ async def step_oai_chat_completions(context):
|
||||||
if hasattr(context, 'n_predict') else None,
|
if hasattr(context, 'n_predict') else None,
|
||||||
enable_streaming=context.enable_streaming
|
enable_streaming=context.enable_streaming
|
||||||
if hasattr(context, 'enable_streaming') else None,
|
if hasattr(context, 'enable_streaming') else None,
|
||||||
|
response_format=context.response_format
|
||||||
|
if hasattr(context, 'response_format') else None,
|
||||||
seed=context.seed
|
seed=context.seed
|
||||||
if hasattr(context, 'seed') else
|
if hasattr(context, 'seed') else
|
||||||
context.server_seed
|
context.server_seed
|
||||||
|
@ -724,6 +737,7 @@ async def oai_chat_completions(user_prompt,
|
||||||
model=None,
|
model=None,
|
||||||
n_predict=None,
|
n_predict=None,
|
||||||
enable_streaming=None,
|
enable_streaming=None,
|
||||||
|
response_format=None,
|
||||||
seed=None,
|
seed=None,
|
||||||
user_api_key=None,
|
user_api_key=None,
|
||||||
expect_api_error=None):
|
expect_api_error=None):
|
||||||
|
@ -749,6 +763,8 @@ async def oai_chat_completions(user_prompt,
|
||||||
"stream": enable_streaming,
|
"stream": enable_streaming,
|
||||||
"seed": seed
|
"seed": seed
|
||||||
}
|
}
|
||||||
|
if response_format is not None:
|
||||||
|
payload['response_format'] = response_format
|
||||||
completion_response = {
|
completion_response = {
|
||||||
'content': '',
|
'content': '',
|
||||||
'timings': {
|
'timings': {
|
||||||
|
@ -809,6 +825,7 @@ async def oai_chat_completions(user_prompt,
|
||||||
model=model,
|
model=model,
|
||||||
max_tokens=n_predict,
|
max_tokens=n_predict,
|
||||||
stream=enable_streaming,
|
stream=enable_streaming,
|
||||||
|
response_format=payload.get('response_format'),
|
||||||
seed=seed
|
seed=seed
|
||||||
)
|
)
|
||||||
except openai.error.AuthenticationError as e:
|
except openai.error.AuthenticationError as e:
|
||||||
|
|
|
@ -376,11 +376,12 @@ static json oaicompat_completion_params_parse(
|
||||||
llama_params["grammar"] = json_value(body, "grammar", json::object());
|
llama_params["grammar"] = json_value(body, "grammar", json::object());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (body.contains("response_format")) {
|
if (!body["response_format"].is_null()) {
|
||||||
auto response_format = json_value(body, "response_format", json::object());
|
auto response_format = json_value(body, "response_format", json::object());
|
||||||
if (response_format.contains("schema") && response_format["type"] == "json_object") {
|
if (response_format["type"] == "json_object") {
|
||||||
llama_params["json_schema"] = json_value(response_format, "schema", json::object());
|
llama_params["json_schema"] = json_value(response_format, "schema", json::object());
|
||||||
std::cerr << "GOT " << llama_params["json_schema"] << std::endl;
|
} else {
|
||||||
|
throw std::runtime_error("response_format type not supported: " + response_format["type"].dump());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue