From 9b63d7057a5e0b3e6ad6fbb681d754182b3bc762 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Mon, 19 Feb 2024 21:50:56 +0100 Subject: [PATCH] server: tests: reduce number of files, all in one tests shell script --- .github/workflows/server-test.yml | 23 +--------- examples/server/tests/README.md | 4 +- .../server/tests/features/completions.feature | 11 ----- .../features/{oai.feature => server.feature} | 14 +++++- .../server/tests/features/steps/completion.py | 24 ---------- .../tests/features/steps/{oai.py => steps.py} | 24 +++++++++- examples/server/tests/tests.sh | 45 +++++++++++++++++++ 7 files changed, 85 insertions(+), 60 deletions(-) delete mode 100644 examples/server/tests/features/completions.feature rename examples/server/tests/features/{oai.feature => server.feature} (57%) delete mode 100644 examples/server/tests/features/steps/completion.py rename examples/server/tests/features/steps/{oai.py => steps.py} (66%) create mode 100755 examples/server/tests/tests.sh diff --git a/.github/workflows/server-test.yml b/.github/workflows/server-test.yml index d47c593f3..efd1bfcf3 100644 --- a/.github/workflows/server-test.yml +++ b/.github/workflows/server-test.yml @@ -45,26 +45,7 @@ jobs: - name: Server Integration Tests id: server_integration_test run: | - ./build/bin/server \ - -m tinyllama-2-1b-miniguanaco.Q2_K.gguf \ - --ctx-size 512 \ - --parallel 4 \ - --n-predict 512 \ - --batch-size 128 \ - --threads 4 \ - --threads-batch 128 \ - --alias phi-2 \ - --embedding \ - --cont-batching & - sh -c '\ - max_attempts=30; \ - attempts=${max_attempts}; \ - echo "waiting for server to be ready..."; \ - until curl --silent --show-error --fail "http://localhost:8080/health" | jq -r '.status' | grep ok; do \ - attempts=$(( attempts - 1)); \ - [ "${attempts}" -eq 0 ] && { echo "Server did not startup" >&2; exit 1; }; \ - sleep $(( (max_attempts - attempts) * 2 )); \ - done;' cd examples/server/tests - behave + ./tests.sh + diff --git a/examples/server/tests/README.md b/examples/server/tests/README.md index 3e0e2d8b1..975fee848 100644 --- a/examples/server/tests/README.md +++ b/examples/server/tests/README.md @@ -6,4 +6,6 @@ Functional server tests suite. `pip install -r requirements.txt` ### Run tests -`python -m behave` +1. Build the server +2. download a GGUF model: `../../../scripts/hf.sh --repo TheBloke/Tinyllama-2-1b-miniguanaco-GGUF --file tinyllama-2-1b-miniguanaco.Q2_K.gguf` +3. Start the test: `./tests.sh tinyllama-2-1b-miniguanaco.Q2_K.gguf -ngl 23 --log-disable` diff --git a/examples/server/tests/features/completions.feature b/examples/server/tests/features/completions.feature deleted file mode 100644 index 4dc8786f6..000000000 --- a/examples/server/tests/features/completions.feature +++ /dev/null @@ -1,11 +0,0 @@ -Feature: Completion request - - Scenario Outline: run a completion request - Given a prompt - When we request a completion - Then tokens are predicted - - Examples: Prompts - | prompt | - | I believe the meaning of life is | - | Write a detailed analogy between mathematics and a lighthouse. | \ No newline at end of file diff --git a/examples/server/tests/features/oai.feature b/examples/server/tests/features/server.feature similarity index 57% rename from examples/server/tests/features/oai.feature rename to examples/server/tests/features/server.feature index d56aa8404..60d8de954 100644 --- a/examples/server/tests/features/oai.feature +++ b/examples/server/tests/features/server.feature @@ -1,4 +1,14 @@ -Feature: OpenAI compatible completions request +Feature: llama.cpp server + + Scenario Outline: run a completion request + Given a prompt + When we request a completion + Then tokens are predicted + + Examples: Prompts + | prompt | + | I believe | + | Write a joke | Scenario Outline: run a completion on the OAI endpoint Given a system prompt @@ -9,5 +19,5 @@ Feature: OpenAI compatible completions request Examples: Prompts | model | system_prompt | user_prompt | - | tinyllama-2 | You are ChatGPT. | I believe the meaning of life is | + | tinyllama-2 | You are ChatGPT. | Say hello | | tinyllama-2 | You are a coding assistant. | Write the fibonacci function in c++ | \ No newline at end of file diff --git a/examples/server/tests/features/steps/completion.py b/examples/server/tests/features/steps/completion.py deleted file mode 100644 index fda9a68e6..000000000 --- a/examples/server/tests/features/steps/completion.py +++ /dev/null @@ -1,24 +0,0 @@ -from behave import * -import requests - - -@given(u'a prompt {prompt}') -def step_prompt(context, prompt): - context.prompt = prompt - - -@when(u'we request a completion') -def step_request_completion(context): - response = requests.post('http://localhost:8080/completion', json={ - "prompt": context.prompt - }) - status_code = response.status_code - assert status_code == 200 - context.response_data = response.json() - - -@then(u'tokens are predicted') -def step_request_completion(context): - assert len(context.response_data['content']) > 0 - assert context.response_data['timings']['predicted_n'] > 0 - diff --git a/examples/server/tests/features/steps/oai.py b/examples/server/tests/features/steps/steps.py similarity index 66% rename from examples/server/tests/features/steps/oai.py rename to examples/server/tests/features/steps/steps.py index 0ed4ebd64..f2721097b 100644 --- a/examples/server/tests/features/steps/oai.py +++ b/examples/server/tests/features/steps/steps.py @@ -1,10 +1,32 @@ -from behave import * import openai +import requests +from behave import * openai.api_key = 'llama.cpp' openai.api_base = "http://localhost:8080/v1/chat" +@given(u'a prompt {prompt}') +def step_prompt(context, prompt): + context.prompt = prompt + + +@when(u'we request a completion') +def step_request_completion(context): + response = requests.post('http://localhost:8080/completion', json={ + "prompt": context.prompt + }) + status_code = response.status_code + assert status_code == 200 + context.response_data = response.json() + + +@then(u'tokens are predicted') +def step_request_completion(context): + assert len(context.response_data['content']) > 0 + assert context.response_data['timings']['predicted_n'] > 0 + + @given(u'a user prompt {user_prompt}') def step_user_prompt(context, user_prompt): context.user_prompt = user_prompt diff --git a/examples/server/tests/tests.sh b/examples/server/tests/tests.sh new file mode 100755 index 000000000..d3d414cd3 --- /dev/null +++ b/examples/server/tests/tests.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +if [ $# -lt 1 ] +then + >&2 echo "Usage: $0 model_path [server_args...]" + exit 1 +fi + +cleanup() { + pkill -P $$ +} +trap cleanup EXIT + +model_path="$1" +shift 1 + +set -eu + +# Start the server in background +../../../build/bin/server \ + --model "$model_path" \ + --alias tinyllama-2 \ + --ctx-size 64 \ + --parallel 2 \ + --n-predict 32 \ + --batch-size 32 \ + --threads 4 \ + --threads-batch 4 \ + --embedding \ + --cont-batching \ + "$@" & + +# Wait for the server to start +max_attempts=30 +attempts=${max_attempts} +until curl --silent --fail "http://localhost:8080/health" | jq -r '.status' | grep ok; do + attempts=$(( attempts - 1)); + [ "${attempts}" -eq 0 ] && { echo "Server did not startup" >&2; exit 1; } + sleep_time=$(( (max_attempts - attempts) * 2 )) + echo "waiting for server to be ready ${sleep_time}s..." + sleep ${sleep_time} +done + +# Start tests +behave \ No newline at end of file