From 157bcf2286a4004ebf89e107bbe5ad124ae3714c Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sun, 18 Feb 2024 17:13:04 +0100 Subject: [PATCH] server: init functional test --- .github/workflows/server-test.yml | 70 +++++++++++++++++++ examples/server/tests/README.md | 9 +++ .../server/tests/features/completions.feature | 11 +++ examples/server/tests/features/oai.feature | 13 ++++ .../server/tests/features/steps/completion.py | 24 +++++++ examples/server/tests/features/steps/oai.py | 44 ++++++++++++ examples/server/tests/requirements.txt | 2 + 7 files changed, 173 insertions(+) create mode 100644 .github/workflows/server-test.yml create mode 100644 examples/server/tests/README.md create mode 100644 examples/server/tests/features/completions.feature create mode 100644 examples/server/tests/features/oai.feature create mode 100644 examples/server/tests/features/steps/completion.py create mode 100644 examples/server/tests/features/steps/oai.py create mode 100644 examples/server/tests/requirements.txt diff --git a/.github/workflows/server-test.yml b/.github/workflows/server-test.yml new file mode 100644 index 000000000..d47c593f3 --- /dev/null +++ b/.github/workflows/server-test.yml @@ -0,0 +1,70 @@ +# Server test scenario +name: Server Integration Tests + +# FIXME put only necessary triggers +on: + push: + branches: + - master + - test/server-add-ci-test # FIXME remove + paths: ['.github/workflows/server-test.yml', '**/CMakeLists.txt', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', 'examples/server/**.*'] + +jobs: + ubuntu-latest-cmake: + runs-on: ubuntu-latest + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v3 + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential + + - name: Build + id: cmake_build + run: | + mkdir build + cd build + cmake .. -DCMAKE_BUILD_TYPE=Release + cmake --build . --config Release -j $(nproc) + + - name: Tests dependencies + id: test_dependencies + run: | + pip install -r examples/server/tests/requirements.txt + + - name: Download test model + id: download_model + run: | + ./scripts/hf.sh --repo TheBloke/Tinyllama-2-1b-miniguanaco-GGUF --file tinyllama-2-1b-miniguanaco.Q2_K.gguf + + - name: Server Integration Tests + id: server_integration_test + run: | + ./build/bin/server \ + -m tinyllama-2-1b-miniguanaco.Q2_K.gguf \ + --ctx-size 512 \ + --parallel 4 \ + --n-predict 512 \ + --batch-size 128 \ + --threads 4 \ + --threads-batch 128 \ + --alias phi-2 \ + --embedding \ + --cont-batching & + sh -c '\ + max_attempts=30; \ + attempts=${max_attempts}; \ + echo "waiting for server to be ready..."; \ + until curl --silent --show-error --fail "http://localhost:8080/health" | jq -r '.status' | grep ok; do \ + attempts=$(( attempts - 1)); \ + [ "${attempts}" -eq 0 ] && { echo "Server did not startup" >&2; exit 1; }; \ + sleep $(( (max_attempts - attempts) * 2 )); \ + done;' + cd examples/server/tests + behave + diff --git a/examples/server/tests/README.md b/examples/server/tests/README.md new file mode 100644 index 000000000..3e0e2d8b1 --- /dev/null +++ b/examples/server/tests/README.md @@ -0,0 +1,9 @@ +# Server Integration Test + +Functional server tests suite. + +### Install dependencies +`pip install -r requirements.txt` + +### Run tests +`python -m behave` diff --git a/examples/server/tests/features/completions.feature b/examples/server/tests/features/completions.feature new file mode 100644 index 000000000..4dc8786f6 --- /dev/null +++ b/examples/server/tests/features/completions.feature @@ -0,0 +1,11 @@ +Feature: Completion request + + Scenario Outline: run a completion request + Given a prompt + When we request a completion + Then tokens are predicted + + Examples: Prompts + | prompt | + | I believe the meaning of life is | + | Write a detailed analogy between mathematics and a lighthouse. | \ No newline at end of file diff --git a/examples/server/tests/features/oai.feature b/examples/server/tests/features/oai.feature new file mode 100644 index 000000000..d56aa8404 --- /dev/null +++ b/examples/server/tests/features/oai.feature @@ -0,0 +1,13 @@ +Feature: OpenAI compatible completions request + + Scenario Outline: run a completion on the OAI endpoint + Given a system prompt + And a user prompt + And a model + When we request the oai completions endpoint + Then the oai response contains completion tokens + + Examples: Prompts + | model | system_prompt | user_prompt | + | tinyllama-2 | You are ChatGPT. | I believe the meaning of life is | + | tinyllama-2 | You are a coding assistant. | Write the fibonacci function in c++ | \ No newline at end of file diff --git a/examples/server/tests/features/steps/completion.py b/examples/server/tests/features/steps/completion.py new file mode 100644 index 000000000..fda9a68e6 --- /dev/null +++ b/examples/server/tests/features/steps/completion.py @@ -0,0 +1,24 @@ +from behave import * +import requests + + +@given(u'a prompt {prompt}') +def step_prompt(context, prompt): + context.prompt = prompt + + +@when(u'we request a completion') +def step_request_completion(context): + response = requests.post('http://localhost:8080/completion', json={ + "prompt": context.prompt + }) + status_code = response.status_code + assert status_code == 200 + context.response_data = response.json() + + +@then(u'tokens are predicted') +def step_request_completion(context): + assert len(context.response_data['content']) > 0 + assert context.response_data['timings']['predicted_n'] > 0 + diff --git a/examples/server/tests/features/steps/oai.py b/examples/server/tests/features/steps/oai.py new file mode 100644 index 000000000..0ed4ebd64 --- /dev/null +++ b/examples/server/tests/features/steps/oai.py @@ -0,0 +1,44 @@ +from behave import * +import openai + +openai.api_key = 'llama.cpp' +openai.api_base = "http://localhost:8080/v1/chat" + + +@given(u'a user prompt {user_prompt}') +def step_user_prompt(context, user_prompt): + context.user_prompt = user_prompt + + +@given(u'a system prompt {system_prompt}') +def step_system_prompt(context, system_prompt): + context.system_prompt = system_prompt + + +@given(u'a model {model}') +def step_model(context, model): + context.model = model + + +@when(u'we request the oai completions endpoint') +def step_oai_completions(context): + context.chat_completion = openai.Completion.create( + messages=[ + { + "role": "system", + "content": context.system_prompt, + }, + { + "role": "user", + "content": context.user_prompt, + } + ], + model=context.model, + ) + + +@then(u'the oai response contains completion tokens') +def step_oai_response_has_completion_tokens(context): + assert len(context.chat_completion.choices) == 1 + assert len(context.chat_completion.choices[0].message) > 0 + assert context.chat_completion.usage.completion_tokens > 0 diff --git a/examples/server/tests/requirements.txt b/examples/server/tests/requirements.txt new file mode 100644 index 000000000..f5c6f2e4a --- /dev/null +++ b/examples/server/tests/requirements.txt @@ -0,0 +1,2 @@ +behave~=1.2.6 +openai~=0.25.0 \ No newline at end of file