From 9b7ea97979a087a8ffbcba5368fa81385d6580bf Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Tue, 20 Feb 2024 21:34:35 +0100 Subject: [PATCH] server: tests: add OAI stream test, fix file end of line, fast fail behave --- .github/workflows/server-test.yml | 2 -- examples/server/tests/features/server.feature | 9 ++--- examples/server/tests/features/steps/steps.py | 34 +++++++++++++++---- examples/server/tests/requirements.txt | 2 +- examples/server/tests/tests.sh | 2 +- 5 files changed, 35 insertions(+), 14 deletions(-) diff --git a/.github/workflows/server-test.yml b/.github/workflows/server-test.yml index d05230fbd..b70006e04 100644 --- a/.github/workflows/server-test.yml +++ b/.github/workflows/server-test.yml @@ -47,5 +47,3 @@ jobs: run: | cd examples/server/tests ./tests.sh ../../../stories260K.gguf - - diff --git a/examples/server/tests/features/server.feature b/examples/server/tests/features/server.feature index d2e691f12..a14d1459a 100644 --- a/examples/server/tests/features/server.feature +++ b/examples/server/tests/features/server.feature @@ -23,13 +23,14 @@ Feature: llama.cpp server And a user prompt And a model And max tokens to predict + And streaming is Given an OAI compatible chat completions request Then tokens are predicted Examples: Prompts - | model | system_prompt | user_prompt | max_tokens | predicted_n | - | llama-2 | You are ChatGPT. | Say hello. | 64 | 64 | - | codellama70b | You are a coding assistant. | Write the fibonacci function in c++. | 512 | 512 | + | model | system_prompt | user_prompt | max_tokens | enable_streaming | predicted_n | + | llama-2 | You are ChatGPT. | Say hello. | 64 | false | 64 | + | codellama70b | You are a coding assistant. | Write the fibonacci function in c++. | 512 | true | 512 | Scenario: Multi users Given a prompt: @@ -55,4 +56,4 @@ Feature: llama.cpp server Given concurrent completion requests Then the server is busy Then the server is idle - Then all prompts are predicted \ No newline at end of file + Then all prompts are predicted diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py index c6fbff84d..f9823b51f 100644 --- a/examples/server/tests/features/steps/steps.py +++ b/examples/server/tests/features/steps/steps.py @@ -74,6 +74,11 @@ def step_max_tokens(context, max_tokens): context.max_tokens = int(max_tokens) +@step(u'streaming is {enable_streaming}') +def step_streaming(context, enable_streaming): + context.enable_streaming = bool(enable_streaming) + + @step(u'an OAI compatible chat completions request') def step_oai_chat_completions(context): chat_completion = openai.Completion.create( @@ -88,14 +93,31 @@ def step_oai_chat_completions(context): } ], model=context.model, - max_tokens=context.max_tokens + max_tokens=context.max_tokens, + stream=context.enable_streaming ) - context.completions.append({ - 'content': chat_completion.choices[0].message, - 'timings': { - 'predicted_n': chat_completion.usage.completion_tokens + if context.enable_streaming: + completion_response = { + 'content': '', + 'timings': { + 'predicted_n': 0 + } } - }) + for chunk in chat_completion: + assert len(chunk.choices) == 1 + delta = chunk.choices[0].delta + if 'content' in delta: + completion_response['content'] += delta['content'] + completion_response['timings']['predicted_n'] += 1 + context.completions.append(completion_response) + else: + assert len(chat_completion.choices) == 1 + context.completions.append({ + 'content': chat_completion.choices[0].message, + 'timings': { + 'predicted_n': chat_completion.usage.completion_tokens + } + }) @step(u'a prompt') diff --git a/examples/server/tests/requirements.txt b/examples/server/tests/requirements.txt index f5c6f2e4a..b64fbc6ba 100644 --- a/examples/server/tests/requirements.txt +++ b/examples/server/tests/requirements.txt @@ -1,2 +1,2 @@ behave~=1.2.6 -openai~=0.25.0 \ No newline at end of file +openai~=0.25.0 diff --git a/examples/server/tests/tests.sh b/examples/server/tests/tests.sh index 01b2f5d4d..230ee45ad 100755 --- a/examples/server/tests/tests.sh +++ b/examples/server/tests/tests.sh @@ -32,4 +32,4 @@ set -eu "$@" & # Start tests -behave \ No newline at end of file +behave --summary --stop