server: tests: add OAI stream test, fix file end of line, fast fail behave
This commit is contained in:
parent
56583bee41
commit
9b7ea97979
5 changed files with 35 additions and 14 deletions
2
.github/workflows/server-test.yml
vendored
2
.github/workflows/server-test.yml
vendored
|
@ -47,5 +47,3 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
cd examples/server/tests
|
cd examples/server/tests
|
||||||
./tests.sh ../../../stories260K.gguf
|
./tests.sh ../../../stories260K.gguf
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -23,13 +23,14 @@ Feature: llama.cpp server
|
||||||
And a user prompt <user_prompt>
|
And a user prompt <user_prompt>
|
||||||
And a model <model>
|
And a model <model>
|
||||||
And <max_tokens> max tokens to predict
|
And <max_tokens> max tokens to predict
|
||||||
|
And streaming is <enable_streaming>
|
||||||
Given an OAI compatible chat completions request
|
Given an OAI compatible chat completions request
|
||||||
Then <predicted_n> tokens are predicted
|
Then <predicted_n> tokens are predicted
|
||||||
|
|
||||||
Examples: Prompts
|
Examples: Prompts
|
||||||
| model | system_prompt | user_prompt | max_tokens | predicted_n |
|
| model | system_prompt | user_prompt | max_tokens | enable_streaming | predicted_n |
|
||||||
| llama-2 | You are ChatGPT. | Say hello. | 64 | 64 |
|
| llama-2 | You are ChatGPT. | Say hello. | 64 | false | 64 |
|
||||||
| codellama70b | You are a coding assistant. | Write the fibonacci function in c++. | 512 | 512 |
|
| codellama70b | You are a coding assistant. | Write the fibonacci function in c++. | 512 | true | 512 |
|
||||||
|
|
||||||
Scenario: Multi users
|
Scenario: Multi users
|
||||||
Given a prompt:
|
Given a prompt:
|
||||||
|
|
|
@ -74,6 +74,11 @@ def step_max_tokens(context, max_tokens):
|
||||||
context.max_tokens = int(max_tokens)
|
context.max_tokens = int(max_tokens)
|
||||||
|
|
||||||
|
|
||||||
|
@step(u'streaming is {enable_streaming}')
|
||||||
|
def step_streaming(context, enable_streaming):
|
||||||
|
context.enable_streaming = bool(enable_streaming)
|
||||||
|
|
||||||
|
|
||||||
@step(u'an OAI compatible chat completions request')
|
@step(u'an OAI compatible chat completions request')
|
||||||
def step_oai_chat_completions(context):
|
def step_oai_chat_completions(context):
|
||||||
chat_completion = openai.Completion.create(
|
chat_completion = openai.Completion.create(
|
||||||
|
@ -88,14 +93,31 @@ def step_oai_chat_completions(context):
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
model=context.model,
|
model=context.model,
|
||||||
max_tokens=context.max_tokens
|
max_tokens=context.max_tokens,
|
||||||
|
stream=context.enable_streaming
|
||||||
)
|
)
|
||||||
context.completions.append({
|
if context.enable_streaming:
|
||||||
'content': chat_completion.choices[0].message,
|
completion_response = {
|
||||||
'timings': {
|
'content': '',
|
||||||
'predicted_n': chat_completion.usage.completion_tokens
|
'timings': {
|
||||||
|
'predicted_n': 0
|
||||||
|
}
|
||||||
}
|
}
|
||||||
})
|
for chunk in chat_completion:
|
||||||
|
assert len(chunk.choices) == 1
|
||||||
|
delta = chunk.choices[0].delta
|
||||||
|
if 'content' in delta:
|
||||||
|
completion_response['content'] += delta['content']
|
||||||
|
completion_response['timings']['predicted_n'] += 1
|
||||||
|
context.completions.append(completion_response)
|
||||||
|
else:
|
||||||
|
assert len(chat_completion.choices) == 1
|
||||||
|
context.completions.append({
|
||||||
|
'content': chat_completion.choices[0].message,
|
||||||
|
'timings': {
|
||||||
|
'predicted_n': chat_completion.usage.completion_tokens
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
@step(u'a prompt')
|
@step(u'a prompt')
|
||||||
|
|
|
@ -32,4 +32,4 @@ set -eu
|
||||||
"$@" &
|
"$@" &
|
||||||
|
|
||||||
# Start tests
|
# Start tests
|
||||||
behave
|
behave --summary --stop
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue