From 89c4bd5e97cc7a1eead1cdd0a0019b824b1696eb Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 9 Mar 2024 23:00:48 +0100 Subject: [PATCH] server: ci: windows build and tests --- .github/workflows/server.yml | 44 ++++++++++++++++++ examples/server/tests/features/environment.py | 32 ++++++++----- examples/server/tests/features/server.feature | 2 +- examples/server/tests/features/steps/steps.py | 45 ++++++++++++++----- 4 files changed, 100 insertions(+), 23 deletions(-) diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml index f9aeefaa8..3b6eb9113 100644 --- a/.github/workflows/server.yml +++ b/.github/workflows/server.yml @@ -11,6 +11,7 @@ on: push: branches: - master + - hp/server/ci/windows/init paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/tests/**.*'] pull_request: types: [opened, synchronize, reopened] @@ -90,3 +91,46 @@ jobs: run: | cd examples/server/tests PORT=8888 ./tests.sh --stop --no-skipped --no-capture --tags slow + + + server-windows: + runs-on: windows-latest + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Build + id: cmake_build + run: | + mkdir build + cd build + cmake .. -DLLAMA_BUILD_SERVER=ON -DCMAKE_BUILD_TYPE=Release ; + cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server + + - name: Python setup + id: setup_python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Tests dependencies + id: test_dependencies + run: | + pip install -r examples/server/tests/requirements.txt + + - name: Tests + id: server_integration_tests + run: | + cd examples/server/tests + behave.exe --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp + + - name: Slow tests + id: server_integration_tests_slow + if: ${{ github.event.schedule != '' || github.event.inputs.slow_tests == 'true' }} + run: | + cd examples/server/tests + behave.exe --stop --no-skipped --no-capture --tags slow \ No newline at end of file diff --git a/examples/server/tests/features/environment.py b/examples/server/tests/features/environment.py index 9fd330db6..1b7f7c61f 100644 --- a/examples/server/tests/features/environment.py +++ b/examples/server/tests/features/environment.py @@ -3,7 +3,7 @@ import socket import subprocess import time from contextlib import closing -from signal import SIGKILL +import signal def before_scenario(context, scenario): @@ -29,31 +29,43 @@ def after_scenario(context, scenario): for line in f: print(line) if not is_server_listening(context.server_fqdn, context.server_port): - print("\x1b[33;101mERROR: Server stopped listening\x1b[0m") + print("\x1b[33;101mERROR: Server stopped listening\x1b[0m\n") if not pid_exists(context.server_process.pid): assert False, f"Server not running pid={context.server_process.pid} ..." - print(f"stopping server pid={context.server_process.pid} ...") - context.server_process.kill() + kill_server(context) + # Wait few for socket to free up time.sleep(0.05) attempts = 0 while is_server_listening(context.server_fqdn, context.server_port): - print(f"stopping server pid={context.server_process.pid} ...") - os.kill(context.server_process.pid, SIGKILL) + context.server_process.kill() time.sleep(0.1) attempts += 1 if attempts > 5: - print(f"Server dangling exits, killing all {context.server_path} ...") - process = subprocess.run(['killall', '-9', context.server_path], - stderr=subprocess.PIPE, - universal_newlines=True) + if os.name == 'nt': + print(f"Server dangling exits, task killing force {context.server_process.pid} ...\n") + process = subprocess.run(['taskkill', '/F', '/pid', str(context.server_process.pid)], + stderr=subprocess.PIPE) + else: + print(f"Server dangling exits, killing all {context.server_path} ...\n") + process = subprocess.run(['killall', '-9', context.server_path], + stderr=subprocess.PIPE) print(process) +def kill_server(context): + print(f"stopping server pid={context.server_process.pid} ...\n") + if os.name == 'nt': + os.kill(context.server_process.pid, signal.CTRL_C_EVENT) + else: + os.kill(context.server_process.pid, signal.SIGKILL) + + def is_server_listening(server_fqdn, server_port): + print(f"is server listening on {server_fqdn}:{server_port}...\n") with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: result = sock.connect_ex((server_fqdn, server_port)) return result == 0 diff --git a/examples/server/tests/features/server.feature b/examples/server/tests/features/server.feature index aa132fa34..5014f326d 100644 --- a/examples/server/tests/features/server.feature +++ b/examples/server/tests/features/server.feature @@ -47,7 +47,7 @@ Feature: llama.cpp server Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. """ And a completion request with no api error - Then 64 tokens are predicted matching fun|Annaks|popcorns + Then 64 tokens are predicted matching fun|Annaks|popcorns|pictry And the completion is truncated And 109 prompt tokens are processed diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py index 142048509..62e0b1155 100644 --- a/examples/server/tests/features/steps/steps.py +++ b/examples/server/tests/features/steps/steps.py @@ -285,17 +285,17 @@ def step_seed(context, seed): @step(u'a prefix prompt') def step_prompt_prefix(context): - context.prompt_prefix = context.text + context.prompt_prefix = context_text(context) @step(u'a junk suffix prompt') def step_prompt_junk_suffix(context): - context.prompt_junk_suffix = context.text + context.prompt_junk_suffix = context_text(context) @step(u'a suffix prompt') def step_prompt_suffix(context): - context.prompt_suffix = context.text + context.prompt_suffix = context_text(context) @step(u'{n_ga:d} group attention factor' @@ -311,7 +311,7 @@ def step_impl(context, n_ga_w): @step(u'a passkey prompt template') def step_prompt_passkey(context): - context.prompt_passkey = context.text + context.prompt_passkey = context_text(context) @step(u'{n_prompts:d} fixed prompts') @@ -371,7 +371,7 @@ async def step_oai_chat_completions(context, api_error): @step(u'a prompt') def step_a_prompt(context): - context.prompts.append(context.text) + context.prompts.append(context_text(context)) context.n_prompts = len(context.prompts) @@ -464,7 +464,7 @@ async def all_prompts_are_predicted(context, expected_predicted_n=None): @async_run_until_complete async def step_compute_embedding(context): context.n_prompts = 1 - context.embeddings = await request_embedding(context.text, base_url=context.base_url) + context.embeddings = await request_embedding(context_text(context), base_url=context.base_url) @step(u'all embeddings are the same') @@ -491,6 +491,7 @@ async def step_all_embeddings_are_the_same(context): print(f"{msg}\n") assert np.isclose(similarity, 1.0, rtol=1e-05, atol=1e-08, equal_nan=False), msg + @step(u'embeddings are generated') def step_assert_embeddings(context): assert context.n_prompts == len(context.embeddings), (f"unexpected response:\n" @@ -504,7 +505,7 @@ def step_assert_embeddings(context): @async_run_until_complete async def step_oai_compute_embeddings(context): context.n_prompts = 1 - context.embeddings = await request_oai_embeddings(context.text, + context.embeddings = await request_oai_embeddings(context_text(context), base_url=context.base_url, user_api_key=context.user_api_key, model=context.model) @@ -552,7 +553,7 @@ async def all_embeddings_are_generated(context): @step(u'tokenizing') @async_run_until_complete async def step_tokenize(context): - context.tokenized_text = context.text + context.tokenized_text = context_text(context) async with aiohttp.ClientSession() as session: async with session.post(f'{context.base_url}/tokenize', json={ @@ -1007,12 +1008,22 @@ async def completions_seed(context): else context.server_seed if hasattr(context, 'server_seed') else None +def context_text(context): + return context.text.replace('\r', '') + + def start_server_background(context): - context.server_path = '../../../build/bin/server' + if os.name == 'nt': + context.server_path = '../../../build/bin/Release/server.exe' + else: + context.server_path = '../../../build/bin/server' if 'LLAMA_SERVER_BIN_PATH' in os.environ: context.server_path = os.environ['LLAMA_SERVER_BIN_PATH'] + server_listen_addr = context.server_fqdn + if os.name == 'nt': + server_listen_addr = '0.0.0.0' server_args = [ - '--host', context.server_fqdn, + '--host', server_listen_addr, '--port', context.server_port, '--model', context.model_file ] @@ -1045,7 +1056,17 @@ def start_server_background(context): if 'SERVER_LOG_FORMAT_JSON' not in os.environ: server_args.extend(['--log-format', "text"]) print(f"starting server with: {context.server_path} {server_args}\n") + flags = 0 + if 'nt' == os.name: + flags |= 0x00000008 # DETACHED_PROCESS + flags |= 0x00000200 # CREATE_NEW_PROCESS_GROUP + flags |= 0x08000000 # CREATE_NO_WINDOW + + pkwargs = { + 'close_fds': True, # close stdin/stdout/stderr on child + 'creationflags': flags, + } context.server_process = subprocess.Popen( [str(arg) for arg in [context.server_path, *server_args]], - close_fds=True) - print(f"server pid={context.server_process.pid}") + **pkwargs) + print(f"server pid={context.server_process.pid}, behave pid={os.getpid()}")