server: init functional test
This commit is contained in:
parent
4ed8e4fbef
commit
157bcf2286
7 changed files with 173 additions and 0 deletions
70
.github/workflows/server-test.yml
vendored
Normal file
70
.github/workflows/server-test.yml
vendored
Normal file
|
@ -0,0 +1,70 @@
|
||||||
|
# Server test scenario
|
||||||
|
name: Server Integration Tests
|
||||||
|
|
||||||
|
# FIXME put only necessary triggers
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
- test/server-add-ci-test # FIXME remove
|
||||||
|
paths: ['.github/workflows/server-test.yml', '**/CMakeLists.txt', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', 'examples/server/**.*']
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
ubuntu-latest-cmake:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
id: checkout
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- name: Dependencies
|
||||||
|
id: depends
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install build-essential
|
||||||
|
|
||||||
|
- name: Build
|
||||||
|
id: cmake_build
|
||||||
|
run: |
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
cmake .. -DCMAKE_BUILD_TYPE=Release
|
||||||
|
cmake --build . --config Release -j $(nproc)
|
||||||
|
|
||||||
|
- name: Tests dependencies
|
||||||
|
id: test_dependencies
|
||||||
|
run: |
|
||||||
|
pip install -r examples/server/tests/requirements.txt
|
||||||
|
|
||||||
|
- name: Download test model
|
||||||
|
id: download_model
|
||||||
|
run: |
|
||||||
|
./scripts/hf.sh --repo TheBloke/Tinyllama-2-1b-miniguanaco-GGUF --file tinyllama-2-1b-miniguanaco.Q2_K.gguf
|
||||||
|
|
||||||
|
- name: Server Integration Tests
|
||||||
|
id: server_integration_test
|
||||||
|
run: |
|
||||||
|
./build/bin/server \
|
||||||
|
-m tinyllama-2-1b-miniguanaco.Q2_K.gguf \
|
||||||
|
--ctx-size 512 \
|
||||||
|
--parallel 4 \
|
||||||
|
--n-predict 512 \
|
||||||
|
--batch-size 128 \
|
||||||
|
--threads 4 \
|
||||||
|
--threads-batch 128 \
|
||||||
|
--alias phi-2 \
|
||||||
|
--embedding \
|
||||||
|
--cont-batching &
|
||||||
|
sh -c '\
|
||||||
|
max_attempts=30; \
|
||||||
|
attempts=${max_attempts}; \
|
||||||
|
echo "waiting for server to be ready..."; \
|
||||||
|
until curl --silent --show-error --fail "http://localhost:8080/health" | jq -r '.status' | grep ok; do \
|
||||||
|
attempts=$(( attempts - 1)); \
|
||||||
|
[ "${attempts}" -eq 0 ] && { echo "Server did not startup" >&2; exit 1; }; \
|
||||||
|
sleep $(( (max_attempts - attempts) * 2 )); \
|
||||||
|
done;'
|
||||||
|
cd examples/server/tests
|
||||||
|
behave
|
||||||
|
|
9
examples/server/tests/README.md
Normal file
9
examples/server/tests/README.md
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
# Server Integration Test
|
||||||
|
|
||||||
|
Functional server tests suite.
|
||||||
|
|
||||||
|
### Install dependencies
|
||||||
|
`pip install -r requirements.txt`
|
||||||
|
|
||||||
|
### Run tests
|
||||||
|
`python -m behave`
|
11
examples/server/tests/features/completions.feature
Normal file
11
examples/server/tests/features/completions.feature
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
Feature: Completion request
|
||||||
|
|
||||||
|
Scenario Outline: run a completion request
|
||||||
|
Given a prompt <prompt>
|
||||||
|
When we request a completion
|
||||||
|
Then tokens are predicted
|
||||||
|
|
||||||
|
Examples: Prompts
|
||||||
|
| prompt |
|
||||||
|
| I believe the meaning of life is |
|
||||||
|
| Write a detailed analogy between mathematics and a lighthouse. |
|
13
examples/server/tests/features/oai.feature
Normal file
13
examples/server/tests/features/oai.feature
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
Feature: OpenAI compatible completions request
|
||||||
|
|
||||||
|
Scenario Outline: run a completion on the OAI endpoint
|
||||||
|
Given a system prompt <system_prompt>
|
||||||
|
And a user prompt <user_prompt>
|
||||||
|
And a model <model>
|
||||||
|
When we request the oai completions endpoint
|
||||||
|
Then the oai response contains completion tokens
|
||||||
|
|
||||||
|
Examples: Prompts
|
||||||
|
| model | system_prompt | user_prompt |
|
||||||
|
| tinyllama-2 | You are ChatGPT. | I believe the meaning of life is |
|
||||||
|
| tinyllama-2 | You are a coding assistant. | Write the fibonacci function in c++ |
|
24
examples/server/tests/features/steps/completion.py
Normal file
24
examples/server/tests/features/steps/completion.py
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
from behave import *
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
@given(u'a prompt {prompt}')
|
||||||
|
def step_prompt(context, prompt):
|
||||||
|
context.prompt = prompt
|
||||||
|
|
||||||
|
|
||||||
|
@when(u'we request a completion')
|
||||||
|
def step_request_completion(context):
|
||||||
|
response = requests.post('http://localhost:8080/completion', json={
|
||||||
|
"prompt": context.prompt
|
||||||
|
})
|
||||||
|
status_code = response.status_code
|
||||||
|
assert status_code == 200
|
||||||
|
context.response_data = response.json()
|
||||||
|
|
||||||
|
|
||||||
|
@then(u'tokens are predicted')
|
||||||
|
def step_request_completion(context):
|
||||||
|
assert len(context.response_data['content']) > 0
|
||||||
|
assert context.response_data['timings']['predicted_n'] > 0
|
||||||
|
|
44
examples/server/tests/features/steps/oai.py
Normal file
44
examples/server/tests/features/steps/oai.py
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
from behave import *
|
||||||
|
import openai
|
||||||
|
|
||||||
|
openai.api_key = 'llama.cpp'
|
||||||
|
openai.api_base = "http://localhost:8080/v1/chat"
|
||||||
|
|
||||||
|
|
||||||
|
@given(u'a user prompt {user_prompt}')
|
||||||
|
def step_user_prompt(context, user_prompt):
|
||||||
|
context.user_prompt = user_prompt
|
||||||
|
|
||||||
|
|
||||||
|
@given(u'a system prompt {system_prompt}')
|
||||||
|
def step_system_prompt(context, system_prompt):
|
||||||
|
context.system_prompt = system_prompt
|
||||||
|
|
||||||
|
|
||||||
|
@given(u'a model {model}')
|
||||||
|
def step_model(context, model):
|
||||||
|
context.model = model
|
||||||
|
|
||||||
|
|
||||||
|
@when(u'we request the oai completions endpoint')
|
||||||
|
def step_oai_completions(context):
|
||||||
|
context.chat_completion = openai.Completion.create(
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": context.system_prompt,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": context.user_prompt,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
model=context.model,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@then(u'the oai response contains completion tokens')
|
||||||
|
def step_oai_response_has_completion_tokens(context):
|
||||||
|
assert len(context.chat_completion.choices) == 1
|
||||||
|
assert len(context.chat_completion.choices[0].message) > 0
|
||||||
|
assert context.chat_completion.usage.completion_tokens > 0
|
2
examples/server/tests/requirements.txt
Normal file
2
examples/server/tests/requirements.txt
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
behave~=1.2.6
|
||||||
|
openai~=0.25.0
|
Loading…
Add table
Add a link
Reference in a new issue