server: tests: reduce number of files, all in one tests shell script
This commit is contained in:
parent
157bcf2286
commit
9b63d7057a
7 changed files with 85 additions and 60 deletions
23
.github/workflows/server-test.yml
vendored
23
.github/workflows/server-test.yml
vendored
|
@ -45,26 +45,7 @@ jobs:
|
||||||
- name: Server Integration Tests
|
- name: Server Integration Tests
|
||||||
id: server_integration_test
|
id: server_integration_test
|
||||||
run: |
|
run: |
|
||||||
./build/bin/server \
|
|
||||||
-m tinyllama-2-1b-miniguanaco.Q2_K.gguf \
|
|
||||||
--ctx-size 512 \
|
|
||||||
--parallel 4 \
|
|
||||||
--n-predict 512 \
|
|
||||||
--batch-size 128 \
|
|
||||||
--threads 4 \
|
|
||||||
--threads-batch 128 \
|
|
||||||
--alias phi-2 \
|
|
||||||
--embedding \
|
|
||||||
--cont-batching &
|
|
||||||
sh -c '\
|
|
||||||
max_attempts=30; \
|
|
||||||
attempts=${max_attempts}; \
|
|
||||||
echo "waiting for server to be ready..."; \
|
|
||||||
until curl --silent --show-error --fail "http://localhost:8080/health" | jq -r '.status' | grep ok; do \
|
|
||||||
attempts=$(( attempts - 1)); \
|
|
||||||
[ "${attempts}" -eq 0 ] && { echo "Server did not startup" >&2; exit 1; }; \
|
|
||||||
sleep $(( (max_attempts - attempts) * 2 )); \
|
|
||||||
done;'
|
|
||||||
cd examples/server/tests
|
cd examples/server/tests
|
||||||
behave
|
./tests.sh
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -6,4 +6,6 @@ Functional server tests suite.
|
||||||
`pip install -r requirements.txt`
|
`pip install -r requirements.txt`
|
||||||
|
|
||||||
### Run tests
|
### Run tests
|
||||||
`python -m behave`
|
1. Build the server
|
||||||
|
2. download a GGUF model: `../../../scripts/hf.sh --repo TheBloke/Tinyllama-2-1b-miniguanaco-GGUF --file tinyllama-2-1b-miniguanaco.Q2_K.gguf`
|
||||||
|
3. Start the test: `./tests.sh tinyllama-2-1b-miniguanaco.Q2_K.gguf -ngl 23 --log-disable`
|
||||||
|
|
|
@ -1,11 +0,0 @@
|
||||||
Feature: Completion request
|
|
||||||
|
|
||||||
Scenario Outline: run a completion request
|
|
||||||
Given a prompt <prompt>
|
|
||||||
When we request a completion
|
|
||||||
Then tokens are predicted
|
|
||||||
|
|
||||||
Examples: Prompts
|
|
||||||
| prompt |
|
|
||||||
| I believe the meaning of life is |
|
|
||||||
| Write a detailed analogy between mathematics and a lighthouse. |
|
|
|
@ -1,4 +1,14 @@
|
||||||
Feature: OpenAI compatible completions request
|
Feature: llama.cpp server
|
||||||
|
|
||||||
|
Scenario Outline: run a completion request
|
||||||
|
Given a prompt <prompt>
|
||||||
|
When we request a completion
|
||||||
|
Then tokens are predicted
|
||||||
|
|
||||||
|
Examples: Prompts
|
||||||
|
| prompt |
|
||||||
|
| I believe |
|
||||||
|
| Write a joke |
|
||||||
|
|
||||||
Scenario Outline: run a completion on the OAI endpoint
|
Scenario Outline: run a completion on the OAI endpoint
|
||||||
Given a system prompt <system_prompt>
|
Given a system prompt <system_prompt>
|
||||||
|
@ -9,5 +19,5 @@ Feature: OpenAI compatible completions request
|
||||||
|
|
||||||
Examples: Prompts
|
Examples: Prompts
|
||||||
| model | system_prompt | user_prompt |
|
| model | system_prompt | user_prompt |
|
||||||
| tinyllama-2 | You are ChatGPT. | I believe the meaning of life is |
|
| tinyllama-2 | You are ChatGPT. | Say hello |
|
||||||
| tinyllama-2 | You are a coding assistant. | Write the fibonacci function in c++ |
|
| tinyllama-2 | You are a coding assistant. | Write the fibonacci function in c++ |
|
|
@ -1,24 +0,0 @@
|
||||||
from behave import *
|
|
||||||
import requests
|
|
||||||
|
|
||||||
|
|
||||||
@given(u'a prompt {prompt}')
|
|
||||||
def step_prompt(context, prompt):
|
|
||||||
context.prompt = prompt
|
|
||||||
|
|
||||||
|
|
||||||
@when(u'we request a completion')
|
|
||||||
def step_request_completion(context):
|
|
||||||
response = requests.post('http://localhost:8080/completion', json={
|
|
||||||
"prompt": context.prompt
|
|
||||||
})
|
|
||||||
status_code = response.status_code
|
|
||||||
assert status_code == 200
|
|
||||||
context.response_data = response.json()
|
|
||||||
|
|
||||||
|
|
||||||
@then(u'tokens are predicted')
|
|
||||||
def step_request_completion(context):
|
|
||||||
assert len(context.response_data['content']) > 0
|
|
||||||
assert context.response_data['timings']['predicted_n'] > 0
|
|
||||||
|
|
|
@ -1,10 +1,32 @@
|
||||||
from behave import *
|
|
||||||
import openai
|
import openai
|
||||||
|
import requests
|
||||||
|
from behave import *
|
||||||
|
|
||||||
openai.api_key = 'llama.cpp'
|
openai.api_key = 'llama.cpp'
|
||||||
openai.api_base = "http://localhost:8080/v1/chat"
|
openai.api_base = "http://localhost:8080/v1/chat"
|
||||||
|
|
||||||
|
|
||||||
|
@given(u'a prompt {prompt}')
|
||||||
|
def step_prompt(context, prompt):
|
||||||
|
context.prompt = prompt
|
||||||
|
|
||||||
|
|
||||||
|
@when(u'we request a completion')
|
||||||
|
def step_request_completion(context):
|
||||||
|
response = requests.post('http://localhost:8080/completion', json={
|
||||||
|
"prompt": context.prompt
|
||||||
|
})
|
||||||
|
status_code = response.status_code
|
||||||
|
assert status_code == 200
|
||||||
|
context.response_data = response.json()
|
||||||
|
|
||||||
|
|
||||||
|
@then(u'tokens are predicted')
|
||||||
|
def step_request_completion(context):
|
||||||
|
assert len(context.response_data['content']) > 0
|
||||||
|
assert context.response_data['timings']['predicted_n'] > 0
|
||||||
|
|
||||||
|
|
||||||
@given(u'a user prompt {user_prompt}')
|
@given(u'a user prompt {user_prompt}')
|
||||||
def step_user_prompt(context, user_prompt):
|
def step_user_prompt(context, user_prompt):
|
||||||
context.user_prompt = user_prompt
|
context.user_prompt = user_prompt
|
45
examples/server/tests/tests.sh
Executable file
45
examples/server/tests/tests.sh
Executable file
|
@ -0,0 +1,45 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
if [ $# -lt 1 ]
|
||||||
|
then
|
||||||
|
>&2 echo "Usage: $0 model_path [server_args...]"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
cleanup() {
|
||||||
|
pkill -P $$
|
||||||
|
}
|
||||||
|
trap cleanup EXIT
|
||||||
|
|
||||||
|
model_path="$1"
|
||||||
|
shift 1
|
||||||
|
|
||||||
|
set -eu
|
||||||
|
|
||||||
|
# Start the server in background
|
||||||
|
../../../build/bin/server \
|
||||||
|
--model "$model_path" \
|
||||||
|
--alias tinyllama-2 \
|
||||||
|
--ctx-size 64 \
|
||||||
|
--parallel 2 \
|
||||||
|
--n-predict 32 \
|
||||||
|
--batch-size 32 \
|
||||||
|
--threads 4 \
|
||||||
|
--threads-batch 4 \
|
||||||
|
--embedding \
|
||||||
|
--cont-batching \
|
||||||
|
"$@" &
|
||||||
|
|
||||||
|
# Wait for the server to start
|
||||||
|
max_attempts=30
|
||||||
|
attempts=${max_attempts}
|
||||||
|
until curl --silent --fail "http://localhost:8080/health" | jq -r '.status' | grep ok; do
|
||||||
|
attempts=$(( attempts - 1));
|
||||||
|
[ "${attempts}" -eq 0 ] && { echo "Server did not startup" >&2; exit 1; }
|
||||||
|
sleep_time=$(( (max_attempts - attempts) * 2 ))
|
||||||
|
echo "waiting for server to be ready ${sleep_time}s..."
|
||||||
|
sleep ${sleep_time}
|
||||||
|
done
|
||||||
|
|
||||||
|
# Start tests
|
||||||
|
behave
|
Loading…
Add table
Add a link
Reference in a new issue