server: init functional test

This commit is contained in:
Pierrick HYMBERT 2024-02-18 17:13:04 +01:00
parent 4ed8e4fbef
commit 157bcf2286
7 changed files with 173 additions and 0 deletions

70
.github/workflows/server-test.yml vendored Normal file
View file

@ -0,0 +1,70 @@
# Server test scenario
name: Server Integration Tests
# FIXME put only necessary triggers
on:
push:
branches:
- master
- test/server-add-ci-test # FIXME remove
paths: ['.github/workflows/server-test.yml', '**/CMakeLists.txt', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', 'examples/server/**.*']
jobs:
ubuntu-latest-cmake:
runs-on: ubuntu-latest
steps:
- name: Clone
id: checkout
uses: actions/checkout@v3
- name: Dependencies
id: depends
run: |
sudo apt-get update
sudo apt-get install build-essential
- name: Build
id: cmake_build
run: |
mkdir build
cd build
cmake .. -DCMAKE_BUILD_TYPE=Release
cmake --build . --config Release -j $(nproc)
- name: Tests dependencies
id: test_dependencies
run: |
pip install -r examples/server/tests/requirements.txt
- name: Download test model
id: download_model
run: |
./scripts/hf.sh --repo TheBloke/Tinyllama-2-1b-miniguanaco-GGUF --file tinyllama-2-1b-miniguanaco.Q2_K.gguf
- name: Server Integration Tests
id: server_integration_test
run: |
./build/bin/server \
-m tinyllama-2-1b-miniguanaco.Q2_K.gguf \
--ctx-size 512 \
--parallel 4 \
--n-predict 512 \
--batch-size 128 \
--threads 4 \
--threads-batch 128 \
--alias phi-2 \
--embedding \
--cont-batching &
sh -c '\
max_attempts=30; \
attempts=${max_attempts}; \
echo "waiting for server to be ready..."; \
until curl --silent --show-error --fail "http://localhost:8080/health" | jq -r '.status' | grep ok; do \
attempts=$(( attempts - 1)); \
[ "${attempts}" -eq 0 ] && { echo "Server did not startup" >&2; exit 1; }; \
sleep $(( (max_attempts - attempts) * 2 )); \
done;'
cd examples/server/tests
behave

View file

@ -0,0 +1,9 @@
# Server Integration Test
Functional server tests suite.
### Install dependencies
`pip install -r requirements.txt`
### Run tests
`python -m behave`

View file

@ -0,0 +1,11 @@
Feature: Completion request
Scenario Outline: run a completion request
Given a prompt <prompt>
When we request a completion
Then tokens are predicted
Examples: Prompts
| prompt |
| I believe the meaning of life is |
| Write a detailed analogy between mathematics and a lighthouse. |

View file

@ -0,0 +1,13 @@
Feature: OpenAI compatible completions request
Scenario Outline: run a completion on the OAI endpoint
Given a system prompt <system_prompt>
And a user prompt <user_prompt>
And a model <model>
When we request the oai completions endpoint
Then the oai response contains completion tokens
Examples: Prompts
| model | system_prompt | user_prompt |
| tinyllama-2 | You are ChatGPT. | I believe the meaning of life is |
| tinyllama-2 | You are a coding assistant. | Write the fibonacci function in c++ |

View file

@ -0,0 +1,24 @@
from behave import *
import requests
@given(u'a prompt {prompt}')
def step_prompt(context, prompt):
context.prompt = prompt
@when(u'we request a completion')
def step_request_completion(context):
response = requests.post('http://localhost:8080/completion', json={
"prompt": context.prompt
})
status_code = response.status_code
assert status_code == 200
context.response_data = response.json()
@then(u'tokens are predicted')
def step_request_completion(context):
assert len(context.response_data['content']) > 0
assert context.response_data['timings']['predicted_n'] > 0

View file

@ -0,0 +1,44 @@
from behave import *
import openai
openai.api_key = 'llama.cpp'
openai.api_base = "http://localhost:8080/v1/chat"
@given(u'a user prompt {user_prompt}')
def step_user_prompt(context, user_prompt):
context.user_prompt = user_prompt
@given(u'a system prompt {system_prompt}')
def step_system_prompt(context, system_prompt):
context.system_prompt = system_prompt
@given(u'a model {model}')
def step_model(context, model):
context.model = model
@when(u'we request the oai completions endpoint')
def step_oai_completions(context):
context.chat_completion = openai.Completion.create(
messages=[
{
"role": "system",
"content": context.system_prompt,
},
{
"role": "user",
"content": context.user_prompt,
}
],
model=context.model,
)
@then(u'the oai response contains completion tokens')
def step_oai_response_has_completion_tokens(context):
assert len(context.chat_completion.choices) == 1
assert len(context.chat_completion.choices[0].message) > 0
assert context.chat_completion.usage.completion_tokens > 0

View file

@ -0,0 +1,2 @@
behave~=1.2.6
openai~=0.25.0