From 9b63d7057a5e0b3e6ad6fbb681d754182b3bc762 Mon Sep 17 00:00:00 2001
From: Pierrick HYMBERT <pierrick.hymbert@gmail.com>
Date: Mon, 19 Feb 2024 21:50:56 +0100
Subject: [PATCH] server: tests: reduce number of files, all in one tests shell
 script

---
 .github/workflows/server-test.yml             | 23 +---------
 examples/server/tests/README.md               |  4 +-
 .../server/tests/features/completions.feature | 11 -----
 .../features/{oai.feature => server.feature}  | 14 +++++-
 .../server/tests/features/steps/completion.py | 24 ----------
 .../tests/features/steps/{oai.py => steps.py} | 24 +++++++++-
 examples/server/tests/tests.sh                | 45 +++++++++++++++++++
 7 files changed, 85 insertions(+), 60 deletions(-)
 delete mode 100644 examples/server/tests/features/completions.feature
 rename examples/server/tests/features/{oai.feature => server.feature} (57%)
 delete mode 100644 examples/server/tests/features/steps/completion.py
 rename examples/server/tests/features/steps/{oai.py => steps.py} (66%)
 create mode 100755 examples/server/tests/tests.sh
diff --git a/.github/workflows/server-test.yml b/.github/workflows/server-test.yml
index d47c593f3..efd1bfcf3 100644
--- a/.github/workflows/server-test.yml
+++ b/.github/workflows/server-test.yml
@@ -45,26 +45,7 @@ jobs:
       - name: Server Integration Tests
         id: server_integration_test
         run: |
-          ./build/bin/server \
-            -m tinyllama-2-1b-miniguanaco.Q2_K.gguf \
-            --ctx-size 512 \
-            --parallel 4 \
-            --n-predict 512 \
-            --batch-size 128 \
-            --threads 4 \
-            --threads-batch 128 \
-            --alias phi-2 \
-            --embedding \
-            --cont-batching &
-          sh -c '\
-            max_attempts=30; \
-            attempts=${max_attempts}; \
-            echo "waiting for server to be ready..."; \
-            until curl --silent --show-error --fail "http://localhost:8080/health" | jq -r '.status' | grep ok; do \
-              attempts=$(( attempts - 1)); \
-              [ "${attempts}" -eq 0 ] && { echo "Server did not startup" >&2; exit 1; }; \
-              sleep $(( (max_attempts - attempts) * 2 )); \
-            done;'
           cd examples/server/tests
-          behave
+          ./tests.sh
+          
           
diff --git a/examples/server/tests/README.md b/examples/server/tests/README.md
index 3e0e2d8b1..975fee848 100644
--- a/examples/server/tests/README.md
+++ b/examples/server/tests/README.md
@@ -6,4 +6,6 @@ Functional server tests suite.
 `pip install -r requirements.txt`
 
 ### Run tests
-`python -m behave`
+1. Build the server
+2. download a GGUF model: `../../../scripts/hf.sh --repo TheBloke/Tinyllama-2-1b-miniguanaco-GGUF --file tinyllama-2-1b-miniguanaco.Q2_K.gguf`
+3. Start the test: `./tests.sh tinyllama-2-1b-miniguanaco.Q2_K.gguf -ngl 23 --log-disable`
diff --git a/examples/server/tests/features/completions.feature b/examples/server/tests/features/completions.feature
deleted file mode 100644
index 4dc8786f6..000000000
--- a/examples/server/tests/features/completions.feature
+++ /dev/null
@@ -1,11 +0,0 @@
-Feature: Completion request
-
-  Scenario Outline: run a completion request
-      Given a prompt <prompt>
-      When we request a completion
-      Then tokens are predicted
-
-    Examples: Prompts
-      | prompt                                                         |
-      | I believe the meaning of life is                               |
-      | Write a detailed analogy between mathematics and a lighthouse. |
\ No newline at end of file
diff --git a/examples/server/tests/features/oai.feature b/examples/server/tests/features/server.feature
similarity index 57%
rename from examples/server/tests/features/oai.feature
rename to examples/server/tests/features/server.feature
index d56aa8404..60d8de954 100644
--- a/examples/server/tests/features/oai.feature
+++ b/examples/server/tests/features/server.feature
@@ -1,4 +1,14 @@
-Feature: OpenAI compatible completions request
+Feature: llama.cpp server
+
+  Scenario Outline: run a completion request
+    Given a prompt <prompt>
+    When we request a completion
+    Then tokens are predicted
+
+    Examples: Prompts
+      | prompt       |
+      | I believe    |
+      | Write a joke |
 
   Scenario Outline: run a completion on the OAI endpoint
     Given a system prompt <system_prompt>
@@ -9,5 +19,5 @@ Feature: OpenAI compatible completions request
 
     Examples: Prompts
       | model          | system_prompt                | user_prompt                            |
-      | tinyllama-2    | You are ChatGPT.             | I believe the meaning of life is       |
+      | tinyllama-2    | You are ChatGPT.             | Say hello                              |
       | tinyllama-2    | You are a coding assistant.  | Write the fibonacci function in c++    |
\ No newline at end of file
diff --git a/examples/server/tests/features/steps/completion.py b/examples/server/tests/features/steps/completion.py
deleted file mode 100644
index fda9a68e6..000000000
--- a/examples/server/tests/features/steps/completion.py
+++ /dev/null
@@ -1,24 +0,0 @@
-from behave import *
-import requests
-
-
-@given(u'a prompt {prompt}')
-def step_prompt(context, prompt):
-    context.prompt = prompt
-
-
-@when(u'we request a completion')
-def step_request_completion(context):
-    response = requests.post('http://localhost:8080/completion', json={
-        "prompt": context.prompt
-    })
-    status_code = response.status_code
-    assert status_code == 200
-    context.response_data = response.json()
-
-
-@then(u'tokens are predicted')
-def step_request_completion(context):
-    assert len(context.response_data['content']) > 0
-    assert context.response_data['timings']['predicted_n'] > 0
-
diff --git a/examples/server/tests/features/steps/oai.py b/examples/server/tests/features/steps/steps.py
similarity index 66%
rename from examples/server/tests/features/steps/oai.py
rename to examples/server/tests/features/steps/steps.py
index 0ed4ebd64..f2721097b 100644
--- a/examples/server/tests/features/steps/oai.py
+++ b/examples/server/tests/features/steps/steps.py
@@ -1,10 +1,32 @@
-from behave import *
 import openai
+import requests
+from behave import *
 
 openai.api_key = 'llama.cpp'
 openai.api_base = "http://localhost:8080/v1/chat"
 
 
+@given(u'a prompt {prompt}')
+def step_prompt(context, prompt):
+    context.prompt = prompt
+
+
+@when(u'we request a completion')
+def step_request_completion(context):
+    response = requests.post('http://localhost:8080/completion', json={
+        "prompt": context.prompt
+    })
+    status_code = response.status_code
+    assert status_code == 200
+    context.response_data = response.json()
+
+
+@then(u'tokens are predicted')
+def step_request_completion(context):
+    assert len(context.response_data['content']) > 0
+    assert context.response_data['timings']['predicted_n'] > 0
+
+
 @given(u'a user prompt {user_prompt}')
 def step_user_prompt(context, user_prompt):
     context.user_prompt = user_prompt
diff --git a/examples/server/tests/tests.sh b/examples/server/tests/tests.sh
new file mode 100755
index 000000000..d3d414cd3
--- /dev/null
+++ b/examples/server/tests/tests.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+if [ $# -lt 1 ]
+then
+    >&2 echo "Usage: $0 model_path [server_args...]"
+    exit 1
+fi
+
+cleanup() {
+    pkill -P $$
+}
+trap cleanup EXIT
+
+model_path="$1"
+shift 1
+
+set -eu
+
+# Start the server in background
+../../../build/bin/server \
+            --model "$model_path" \
+            --alias tinyllama-2 \
+            --ctx-size 64 \
+            --parallel 2 \
+            --n-predict 32 \
+            --batch-size 32 \
+            --threads 4 \
+            --threads-batch 4 \
+            --embedding \
+            --cont-batching \
+            "$@" &
+
+# Wait for the server to start
+max_attempts=30
+attempts=${max_attempts}
+until curl --silent --fail "http://localhost:8080/health" | jq -r '.status' | grep ok; do
+  attempts=$(( attempts - 1));
+  [ "${attempts}" -eq 0 ] && { echo "Server did not startup" >&2; exit 1; }
+  sleep_time=$(( (max_attempts - attempts) * 2 ))
+  echo "waiting for server to be ready ${sleep_time}s..."
+  sleep ${sleep_time}
+done
+
+# Start tests
+behave
\ No newline at end of file