server: ci: windows build and tests

This commit is contained in:
Pierrick HYMBERT 2024-03-09 23:00:48 +01:00
parent 621e86b331
commit 89c4bd5e97
4 changed files with 100 additions and 23 deletions

View file

@ -11,6 +11,7 @@ on:
push: push:
branches: branches:
- master - master
- hp/server/ci/windows/init
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/tests/**.*'] paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/tests/**.*']
pull_request: pull_request:
types: [opened, synchronize, reopened] types: [opened, synchronize, reopened]
@ -90,3 +91,46 @@ jobs:
run: | run: |
cd examples/server/tests cd examples/server/tests
PORT=8888 ./tests.sh --stop --no-skipped --no-capture --tags slow PORT=8888 ./tests.sh --stop --no-skipped --no-capture --tags slow
server-windows:
runs-on: windows-latest
steps:
- name: Clone
id: checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Build
id: cmake_build
run: |
mkdir build
cd build
cmake .. -DLLAMA_BUILD_SERVER=ON -DCMAKE_BUILD_TYPE=Release ;
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server
- name: Python setup
id: setup_python
uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Tests dependencies
id: test_dependencies
run: |
pip install -r examples/server/tests/requirements.txt
- name: Tests
id: server_integration_tests
run: |
cd examples/server/tests
behave.exe --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp
- name: Slow tests
id: server_integration_tests_slow
if: ${{ github.event.schedule != '' || github.event.inputs.slow_tests == 'true' }}
run: |
cd examples/server/tests
behave.exe --stop --no-skipped --no-capture --tags slow

View file

@ -3,7 +3,7 @@ import socket
import subprocess import subprocess
import time import time
from contextlib import closing from contextlib import closing
from signal import SIGKILL import signal
def before_scenario(context, scenario): def before_scenario(context, scenario):
@ -29,31 +29,43 @@ def after_scenario(context, scenario):
for line in f: for line in f:
print(line) print(line)
if not is_server_listening(context.server_fqdn, context.server_port): if not is_server_listening(context.server_fqdn, context.server_port):
print("\x1b[33;101mERROR: Server stopped listening\x1b[0m") print("\x1b[33;101mERROR: Server stopped listening\x1b[0m\n")
if not pid_exists(context.server_process.pid): if not pid_exists(context.server_process.pid):
assert False, f"Server not running pid={context.server_process.pid} ..." assert False, f"Server not running pid={context.server_process.pid} ..."
print(f"stopping server pid={context.server_process.pid} ...") kill_server(context)
context.server_process.kill()
# Wait few for socket to free up # Wait few for socket to free up
time.sleep(0.05) time.sleep(0.05)
attempts = 0 attempts = 0
while is_server_listening(context.server_fqdn, context.server_port): while is_server_listening(context.server_fqdn, context.server_port):
print(f"stopping server pid={context.server_process.pid} ...") context.server_process.kill()
os.kill(context.server_process.pid, SIGKILL)
time.sleep(0.1) time.sleep(0.1)
attempts += 1 attempts += 1
if attempts > 5: if attempts > 5:
print(f"Server dangling exits, killing all {context.server_path} ...") if os.name == 'nt':
process = subprocess.run(['killall', '-9', context.server_path], print(f"Server dangling exits, task killing force {context.server_process.pid} ...\n")
stderr=subprocess.PIPE, process = subprocess.run(['taskkill', '/F', '/pid', str(context.server_process.pid)],
universal_newlines=True) stderr=subprocess.PIPE)
else:
print(f"Server dangling exits, killing all {context.server_path} ...\n")
process = subprocess.run(['killall', '-9', context.server_path],
stderr=subprocess.PIPE)
print(process) print(process)
def kill_server(context):
print(f"stopping server pid={context.server_process.pid} ...\n")
if os.name == 'nt':
os.kill(context.server_process.pid, signal.CTRL_C_EVENT)
else:
os.kill(context.server_process.pid, signal.SIGKILL)
def is_server_listening(server_fqdn, server_port): def is_server_listening(server_fqdn, server_port):
print(f"is server listening on {server_fqdn}:{server_port}...\n")
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
result = sock.connect_ex((server_fqdn, server_port)) result = sock.connect_ex((server_fqdn, server_port))
return result == 0 return result == 0

View file

@ -47,7 +47,7 @@ Feature: llama.cpp server
Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
""" """
And a completion request with no api error And a completion request with no api error
Then 64 tokens are predicted matching fun|Annaks|popcorns Then 64 tokens are predicted matching fun|Annaks|popcorns|pictry
And the completion is truncated And the completion is truncated
And 109 prompt tokens are processed And 109 prompt tokens are processed

View file

@ -285,17 +285,17 @@ def step_seed(context, seed):
@step(u'a prefix prompt') @step(u'a prefix prompt')
def step_prompt_prefix(context): def step_prompt_prefix(context):
context.prompt_prefix = context.text context.prompt_prefix = context_text(context)
@step(u'a junk suffix prompt') @step(u'a junk suffix prompt')
def step_prompt_junk_suffix(context): def step_prompt_junk_suffix(context):
context.prompt_junk_suffix = context.text context.prompt_junk_suffix = context_text(context)
@step(u'a suffix prompt') @step(u'a suffix prompt')
def step_prompt_suffix(context): def step_prompt_suffix(context):
context.prompt_suffix = context.text context.prompt_suffix = context_text(context)
@step(u'{n_ga:d} group attention factor' @step(u'{n_ga:d} group attention factor'
@ -311,7 +311,7 @@ def step_impl(context, n_ga_w):
@step(u'a passkey prompt template') @step(u'a passkey prompt template')
def step_prompt_passkey(context): def step_prompt_passkey(context):
context.prompt_passkey = context.text context.prompt_passkey = context_text(context)
@step(u'{n_prompts:d} fixed prompts') @step(u'{n_prompts:d} fixed prompts')
@ -371,7 +371,7 @@ async def step_oai_chat_completions(context, api_error):
@step(u'a prompt') @step(u'a prompt')
def step_a_prompt(context): def step_a_prompt(context):
context.prompts.append(context.text) context.prompts.append(context_text(context))
context.n_prompts = len(context.prompts) context.n_prompts = len(context.prompts)
@ -464,7 +464,7 @@ async def all_prompts_are_predicted(context, expected_predicted_n=None):
@async_run_until_complete @async_run_until_complete
async def step_compute_embedding(context): async def step_compute_embedding(context):
context.n_prompts = 1 context.n_prompts = 1
context.embeddings = await request_embedding(context.text, base_url=context.base_url) context.embeddings = await request_embedding(context_text(context), base_url=context.base_url)
@step(u'all embeddings are the same') @step(u'all embeddings are the same')
@ -491,6 +491,7 @@ async def step_all_embeddings_are_the_same(context):
print(f"{msg}\n") print(f"{msg}\n")
assert np.isclose(similarity, 1.0, rtol=1e-05, atol=1e-08, equal_nan=False), msg assert np.isclose(similarity, 1.0, rtol=1e-05, atol=1e-08, equal_nan=False), msg
@step(u'embeddings are generated') @step(u'embeddings are generated')
def step_assert_embeddings(context): def step_assert_embeddings(context):
assert context.n_prompts == len(context.embeddings), (f"unexpected response:\n" assert context.n_prompts == len(context.embeddings), (f"unexpected response:\n"
@ -504,7 +505,7 @@ def step_assert_embeddings(context):
@async_run_until_complete @async_run_until_complete
async def step_oai_compute_embeddings(context): async def step_oai_compute_embeddings(context):
context.n_prompts = 1 context.n_prompts = 1
context.embeddings = await request_oai_embeddings(context.text, context.embeddings = await request_oai_embeddings(context_text(context),
base_url=context.base_url, base_url=context.base_url,
user_api_key=context.user_api_key, user_api_key=context.user_api_key,
model=context.model) model=context.model)
@ -552,7 +553,7 @@ async def all_embeddings_are_generated(context):
@step(u'tokenizing') @step(u'tokenizing')
@async_run_until_complete @async_run_until_complete
async def step_tokenize(context): async def step_tokenize(context):
context.tokenized_text = context.text context.tokenized_text = context_text(context)
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession() as session:
async with session.post(f'{context.base_url}/tokenize', async with session.post(f'{context.base_url}/tokenize',
json={ json={
@ -1007,12 +1008,22 @@ async def completions_seed(context):
else context.server_seed if hasattr(context, 'server_seed') else None else context.server_seed if hasattr(context, 'server_seed') else None
def context_text(context):
return context.text.replace('\r', '')
def start_server_background(context): def start_server_background(context):
context.server_path = '../../../build/bin/server' if os.name == 'nt':
context.server_path = '../../../build/bin/Release/server.exe'
else:
context.server_path = '../../../build/bin/server'
if 'LLAMA_SERVER_BIN_PATH' in os.environ: if 'LLAMA_SERVER_BIN_PATH' in os.environ:
context.server_path = os.environ['LLAMA_SERVER_BIN_PATH'] context.server_path = os.environ['LLAMA_SERVER_BIN_PATH']
server_listen_addr = context.server_fqdn
if os.name == 'nt':
server_listen_addr = '0.0.0.0'
server_args = [ server_args = [
'--host', context.server_fqdn, '--host', server_listen_addr,
'--port', context.server_port, '--port', context.server_port,
'--model', context.model_file '--model', context.model_file
] ]
@ -1045,7 +1056,17 @@ def start_server_background(context):
if 'SERVER_LOG_FORMAT_JSON' not in os.environ: if 'SERVER_LOG_FORMAT_JSON' not in os.environ:
server_args.extend(['--log-format', "text"]) server_args.extend(['--log-format', "text"])
print(f"starting server with: {context.server_path} {server_args}\n") print(f"starting server with: {context.server_path} {server_args}\n")
flags = 0
if 'nt' == os.name:
flags |= 0x00000008 # DETACHED_PROCESS
flags |= 0x00000200 # CREATE_NEW_PROCESS_GROUP
flags |= 0x08000000 # CREATE_NO_WINDOW
pkwargs = {
'close_fds': True, # close stdin/stdout/stderr on child
'creationflags': flags,
}
context.server_process = subprocess.Popen( context.server_process = subprocess.Popen(
[str(arg) for arg in [context.server_path, *server_args]], [str(arg) for arg in [context.server_path, *server_args]],
close_fds=True) **pkwargs)
print(f"server pid={context.server_process.pid}") print(f"server pid={context.server_process.pid}, behave pid={os.getpid()}")