server: ci: windows build and tests
This commit is contained in:
parent
621e86b331
commit
89c4bd5e97
4 changed files with 100 additions and 23 deletions
44
.github/workflows/server.yml
vendored
44
.github/workflows/server.yml
vendored
|
@ -11,6 +11,7 @@ on:
|
||||||
push:
|
push:
|
||||||
branches:
|
branches:
|
||||||
- master
|
- master
|
||||||
|
- hp/server/ci/windows/init
|
||||||
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/tests/**.*']
|
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/tests/**.*']
|
||||||
pull_request:
|
pull_request:
|
||||||
types: [opened, synchronize, reopened]
|
types: [opened, synchronize, reopened]
|
||||||
|
@ -90,3 +91,46 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
cd examples/server/tests
|
cd examples/server/tests
|
||||||
PORT=8888 ./tests.sh --stop --no-skipped --no-capture --tags slow
|
PORT=8888 ./tests.sh --stop --no-skipped --no-capture --tags slow
|
||||||
|
|
||||||
|
|
||||||
|
server-windows:
|
||||||
|
runs-on: windows-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
id: checkout
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Build
|
||||||
|
id: cmake_build
|
||||||
|
run: |
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
cmake .. -DLLAMA_BUILD_SERVER=ON -DCMAKE_BUILD_TYPE=Release ;
|
||||||
|
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server
|
||||||
|
|
||||||
|
- name: Python setup
|
||||||
|
id: setup_python
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: '3.11'
|
||||||
|
|
||||||
|
- name: Tests dependencies
|
||||||
|
id: test_dependencies
|
||||||
|
run: |
|
||||||
|
pip install -r examples/server/tests/requirements.txt
|
||||||
|
|
||||||
|
- name: Tests
|
||||||
|
id: server_integration_tests
|
||||||
|
run: |
|
||||||
|
cd examples/server/tests
|
||||||
|
behave.exe --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp
|
||||||
|
|
||||||
|
- name: Slow tests
|
||||||
|
id: server_integration_tests_slow
|
||||||
|
if: ${{ github.event.schedule != '' || github.event.inputs.slow_tests == 'true' }}
|
||||||
|
run: |
|
||||||
|
cd examples/server/tests
|
||||||
|
behave.exe --stop --no-skipped --no-capture --tags slow
|
|
@ -3,7 +3,7 @@ import socket
|
||||||
import subprocess
|
import subprocess
|
||||||
import time
|
import time
|
||||||
from contextlib import closing
|
from contextlib import closing
|
||||||
from signal import SIGKILL
|
import signal
|
||||||
|
|
||||||
|
|
||||||
def before_scenario(context, scenario):
|
def before_scenario(context, scenario):
|
||||||
|
@ -29,31 +29,43 @@ def after_scenario(context, scenario):
|
||||||
for line in f:
|
for line in f:
|
||||||
print(line)
|
print(line)
|
||||||
if not is_server_listening(context.server_fqdn, context.server_port):
|
if not is_server_listening(context.server_fqdn, context.server_port):
|
||||||
print("\x1b[33;101mERROR: Server stopped listening\x1b[0m")
|
print("\x1b[33;101mERROR: Server stopped listening\x1b[0m\n")
|
||||||
|
|
||||||
if not pid_exists(context.server_process.pid):
|
if not pid_exists(context.server_process.pid):
|
||||||
assert False, f"Server not running pid={context.server_process.pid} ..."
|
assert False, f"Server not running pid={context.server_process.pid} ..."
|
||||||
|
|
||||||
print(f"stopping server pid={context.server_process.pid} ...")
|
kill_server(context)
|
||||||
context.server_process.kill()
|
|
||||||
# Wait few for socket to free up
|
# Wait few for socket to free up
|
||||||
time.sleep(0.05)
|
time.sleep(0.05)
|
||||||
|
|
||||||
attempts = 0
|
attempts = 0
|
||||||
while is_server_listening(context.server_fqdn, context.server_port):
|
while is_server_listening(context.server_fqdn, context.server_port):
|
||||||
print(f"stopping server pid={context.server_process.pid} ...")
|
context.server_process.kill()
|
||||||
os.kill(context.server_process.pid, SIGKILL)
|
|
||||||
time.sleep(0.1)
|
time.sleep(0.1)
|
||||||
attempts += 1
|
attempts += 1
|
||||||
if attempts > 5:
|
if attempts > 5:
|
||||||
print(f"Server dangling exits, killing all {context.server_path} ...")
|
if os.name == 'nt':
|
||||||
process = subprocess.run(['killall', '-9', context.server_path],
|
print(f"Server dangling exits, task killing force {context.server_process.pid} ...\n")
|
||||||
stderr=subprocess.PIPE,
|
process = subprocess.run(['taskkill', '/F', '/pid', str(context.server_process.pid)],
|
||||||
universal_newlines=True)
|
stderr=subprocess.PIPE)
|
||||||
|
else:
|
||||||
|
print(f"Server dangling exits, killing all {context.server_path} ...\n")
|
||||||
|
process = subprocess.run(['killall', '-9', context.server_path],
|
||||||
|
stderr=subprocess.PIPE)
|
||||||
print(process)
|
print(process)
|
||||||
|
|
||||||
|
|
||||||
|
def kill_server(context):
|
||||||
|
print(f"stopping server pid={context.server_process.pid} ...\n")
|
||||||
|
if os.name == 'nt':
|
||||||
|
os.kill(context.server_process.pid, signal.CTRL_C_EVENT)
|
||||||
|
else:
|
||||||
|
os.kill(context.server_process.pid, signal.SIGKILL)
|
||||||
|
|
||||||
|
|
||||||
def is_server_listening(server_fqdn, server_port):
|
def is_server_listening(server_fqdn, server_port):
|
||||||
|
print(f"is server listening on {server_fqdn}:{server_port}...\n")
|
||||||
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
|
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
|
||||||
result = sock.connect_ex((server_fqdn, server_port))
|
result = sock.connect_ex((server_fqdn, server_port))
|
||||||
return result == 0
|
return result == 0
|
||||||
|
|
|
@ -47,7 +47,7 @@ Feature: llama.cpp server
|
||||||
Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
"""
|
"""
|
||||||
And a completion request with no api error
|
And a completion request with no api error
|
||||||
Then 64 tokens are predicted matching fun|Annaks|popcorns
|
Then 64 tokens are predicted matching fun|Annaks|popcorns|pictry
|
||||||
And the completion is truncated
|
And the completion is truncated
|
||||||
And 109 prompt tokens are processed
|
And 109 prompt tokens are processed
|
||||||
|
|
||||||
|
|
|
@ -285,17 +285,17 @@ def step_seed(context, seed):
|
||||||
|
|
||||||
@step(u'a prefix prompt')
|
@step(u'a prefix prompt')
|
||||||
def step_prompt_prefix(context):
|
def step_prompt_prefix(context):
|
||||||
context.prompt_prefix = context.text
|
context.prompt_prefix = context_text(context)
|
||||||
|
|
||||||
|
|
||||||
@step(u'a junk suffix prompt')
|
@step(u'a junk suffix prompt')
|
||||||
def step_prompt_junk_suffix(context):
|
def step_prompt_junk_suffix(context):
|
||||||
context.prompt_junk_suffix = context.text
|
context.prompt_junk_suffix = context_text(context)
|
||||||
|
|
||||||
|
|
||||||
@step(u'a suffix prompt')
|
@step(u'a suffix prompt')
|
||||||
def step_prompt_suffix(context):
|
def step_prompt_suffix(context):
|
||||||
context.prompt_suffix = context.text
|
context.prompt_suffix = context_text(context)
|
||||||
|
|
||||||
|
|
||||||
@step(u'{n_ga:d} group attention factor'
|
@step(u'{n_ga:d} group attention factor'
|
||||||
|
@ -311,7 +311,7 @@ def step_impl(context, n_ga_w):
|
||||||
|
|
||||||
@step(u'a passkey prompt template')
|
@step(u'a passkey prompt template')
|
||||||
def step_prompt_passkey(context):
|
def step_prompt_passkey(context):
|
||||||
context.prompt_passkey = context.text
|
context.prompt_passkey = context_text(context)
|
||||||
|
|
||||||
|
|
||||||
@step(u'{n_prompts:d} fixed prompts')
|
@step(u'{n_prompts:d} fixed prompts')
|
||||||
|
@ -371,7 +371,7 @@ async def step_oai_chat_completions(context, api_error):
|
||||||
|
|
||||||
@step(u'a prompt')
|
@step(u'a prompt')
|
||||||
def step_a_prompt(context):
|
def step_a_prompt(context):
|
||||||
context.prompts.append(context.text)
|
context.prompts.append(context_text(context))
|
||||||
context.n_prompts = len(context.prompts)
|
context.n_prompts = len(context.prompts)
|
||||||
|
|
||||||
|
|
||||||
|
@ -464,7 +464,7 @@ async def all_prompts_are_predicted(context, expected_predicted_n=None):
|
||||||
@async_run_until_complete
|
@async_run_until_complete
|
||||||
async def step_compute_embedding(context):
|
async def step_compute_embedding(context):
|
||||||
context.n_prompts = 1
|
context.n_prompts = 1
|
||||||
context.embeddings = await request_embedding(context.text, base_url=context.base_url)
|
context.embeddings = await request_embedding(context_text(context), base_url=context.base_url)
|
||||||
|
|
||||||
|
|
||||||
@step(u'all embeddings are the same')
|
@step(u'all embeddings are the same')
|
||||||
|
@ -491,6 +491,7 @@ async def step_all_embeddings_are_the_same(context):
|
||||||
print(f"{msg}\n")
|
print(f"{msg}\n")
|
||||||
assert np.isclose(similarity, 1.0, rtol=1e-05, atol=1e-08, equal_nan=False), msg
|
assert np.isclose(similarity, 1.0, rtol=1e-05, atol=1e-08, equal_nan=False), msg
|
||||||
|
|
||||||
|
|
||||||
@step(u'embeddings are generated')
|
@step(u'embeddings are generated')
|
||||||
def step_assert_embeddings(context):
|
def step_assert_embeddings(context):
|
||||||
assert context.n_prompts == len(context.embeddings), (f"unexpected response:\n"
|
assert context.n_prompts == len(context.embeddings), (f"unexpected response:\n"
|
||||||
|
@ -504,7 +505,7 @@ def step_assert_embeddings(context):
|
||||||
@async_run_until_complete
|
@async_run_until_complete
|
||||||
async def step_oai_compute_embeddings(context):
|
async def step_oai_compute_embeddings(context):
|
||||||
context.n_prompts = 1
|
context.n_prompts = 1
|
||||||
context.embeddings = await request_oai_embeddings(context.text,
|
context.embeddings = await request_oai_embeddings(context_text(context),
|
||||||
base_url=context.base_url,
|
base_url=context.base_url,
|
||||||
user_api_key=context.user_api_key,
|
user_api_key=context.user_api_key,
|
||||||
model=context.model)
|
model=context.model)
|
||||||
|
@ -552,7 +553,7 @@ async def all_embeddings_are_generated(context):
|
||||||
@step(u'tokenizing')
|
@step(u'tokenizing')
|
||||||
@async_run_until_complete
|
@async_run_until_complete
|
||||||
async def step_tokenize(context):
|
async def step_tokenize(context):
|
||||||
context.tokenized_text = context.text
|
context.tokenized_text = context_text(context)
|
||||||
async with aiohttp.ClientSession() as session:
|
async with aiohttp.ClientSession() as session:
|
||||||
async with session.post(f'{context.base_url}/tokenize',
|
async with session.post(f'{context.base_url}/tokenize',
|
||||||
json={
|
json={
|
||||||
|
@ -1007,12 +1008,22 @@ async def completions_seed(context):
|
||||||
else context.server_seed if hasattr(context, 'server_seed') else None
|
else context.server_seed if hasattr(context, 'server_seed') else None
|
||||||
|
|
||||||
|
|
||||||
|
def context_text(context):
|
||||||
|
return context.text.replace('\r', '')
|
||||||
|
|
||||||
|
|
||||||
def start_server_background(context):
|
def start_server_background(context):
|
||||||
context.server_path = '../../../build/bin/server'
|
if os.name == 'nt':
|
||||||
|
context.server_path = '../../../build/bin/Release/server.exe'
|
||||||
|
else:
|
||||||
|
context.server_path = '../../../build/bin/server'
|
||||||
if 'LLAMA_SERVER_BIN_PATH' in os.environ:
|
if 'LLAMA_SERVER_BIN_PATH' in os.environ:
|
||||||
context.server_path = os.environ['LLAMA_SERVER_BIN_PATH']
|
context.server_path = os.environ['LLAMA_SERVER_BIN_PATH']
|
||||||
|
server_listen_addr = context.server_fqdn
|
||||||
|
if os.name == 'nt':
|
||||||
|
server_listen_addr = '0.0.0.0'
|
||||||
server_args = [
|
server_args = [
|
||||||
'--host', context.server_fqdn,
|
'--host', server_listen_addr,
|
||||||
'--port', context.server_port,
|
'--port', context.server_port,
|
||||||
'--model', context.model_file
|
'--model', context.model_file
|
||||||
]
|
]
|
||||||
|
@ -1045,7 +1056,17 @@ def start_server_background(context):
|
||||||
if 'SERVER_LOG_FORMAT_JSON' not in os.environ:
|
if 'SERVER_LOG_FORMAT_JSON' not in os.environ:
|
||||||
server_args.extend(['--log-format', "text"])
|
server_args.extend(['--log-format', "text"])
|
||||||
print(f"starting server with: {context.server_path} {server_args}\n")
|
print(f"starting server with: {context.server_path} {server_args}\n")
|
||||||
|
flags = 0
|
||||||
|
if 'nt' == os.name:
|
||||||
|
flags |= 0x00000008 # DETACHED_PROCESS
|
||||||
|
flags |= 0x00000200 # CREATE_NEW_PROCESS_GROUP
|
||||||
|
flags |= 0x08000000 # CREATE_NO_WINDOW
|
||||||
|
|
||||||
|
pkwargs = {
|
||||||
|
'close_fds': True, # close stdin/stdout/stderr on child
|
||||||
|
'creationflags': flags,
|
||||||
|
}
|
||||||
context.server_process = subprocess.Popen(
|
context.server_process = subprocess.Popen(
|
||||||
[str(arg) for arg in [context.server_path, *server_args]],
|
[str(arg) for arg in [context.server_path, *server_args]],
|
||||||
close_fds=True)
|
**pkwargs)
|
||||||
print(f"server pid={context.server_process.pid}")
|
print(f"server pid={context.server_process.pid}, behave pid={os.getpid()}")
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue