From 157bcf2286a4004ebf89e107bbe5ad124ae3714c Mon Sep 17 00:00:00 2001
From: Pierrick HYMBERT <pierrick.hymbert@gmail.com>
Date: Sun, 18 Feb 2024 17:13:04 +0100
Subject: [PATCH] server: init functional test

---
 .github/workflows/server-test.yml             | 70 +++++++++++++++++++
 examples/server/tests/README.md               |  9 +++
 .../server/tests/features/completions.feature | 11 +++
 examples/server/tests/features/oai.feature    | 13 ++++
 .../server/tests/features/steps/completion.py | 24 +++++++
 examples/server/tests/features/steps/oai.py   | 44 ++++++++++++
 examples/server/tests/requirements.txt        |  2 +
 7 files changed, 173 insertions(+)
 create mode 100644 .github/workflows/server-test.yml
 create mode 100644 examples/server/tests/README.md
 create mode 100644 examples/server/tests/features/completions.feature
 create mode 100644 examples/server/tests/features/oai.feature
 create mode 100644 examples/server/tests/features/steps/completion.py
 create mode 100644 examples/server/tests/features/steps/oai.py
 create mode 100644 examples/server/tests/requirements.txt

diff --git a/.github/workflows/server-test.yml b/.github/workflows/server-test.yml
new file mode 100644
index 000000000..d47c593f3
--- /dev/null
+++ b/.github/workflows/server-test.yml
@@ -0,0 +1,70 @@
+# Server test scenario
+name: Server Integration Tests
+
+# FIXME put only necessary triggers
+on:
+  push:
+    branches:
+      - master
+      - test/server-add-ci-test # FIXME remove
+    paths: ['.github/workflows/server-test.yml', '**/CMakeLists.txt', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', 'examples/server/**.*']
+
+jobs:
+  ubuntu-latest-cmake:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v3
+
+      - name: Dependencies
+        id: depends
+        run: |
+          sudo apt-get update
+          sudo apt-get install build-essential
+
+      - name: Build
+        id: cmake_build
+        run: |
+          mkdir build
+          cd build
+          cmake .. -DCMAKE_BUILD_TYPE=Release
+          cmake --build . --config Release -j $(nproc)
+
+      - name: Tests dependencies
+        id: test_dependencies
+        run: |
+          pip install -r examples/server/tests/requirements.txt
+
+      - name: Download test model
+        id: download_model
+        run: |
+          ./scripts/hf.sh --repo TheBloke/Tinyllama-2-1b-miniguanaco-GGUF --file tinyllama-2-1b-miniguanaco.Q2_K.gguf
+
+      - name: Server Integration Tests
+        id: server_integration_test
+        run: |
+          ./build/bin/server \
+            -m tinyllama-2-1b-miniguanaco.Q2_K.gguf \
+            --ctx-size 512 \
+            --parallel 4 \
+            --n-predict 512 \
+            --batch-size 128 \
+            --threads 4 \
+            --threads-batch 128 \
+            --alias phi-2 \
+            --embedding \
+            --cont-batching &
+          sh -c '\
+            max_attempts=30; \
+            attempts=${max_attempts}; \
+            echo "waiting for server to be ready..."; \
+            until curl --silent --show-error --fail "http://localhost:8080/health" | jq -r '.status' | grep ok; do \
+              attempts=$(( attempts - 1)); \
+              [ "${attempts}" -eq 0 ] && { echo "Server did not startup" >&2; exit 1; }; \
+              sleep $(( (max_attempts - attempts) * 2 )); \
+            done;'
+          cd examples/server/tests
+          behave
+          
diff --git a/examples/server/tests/README.md b/examples/server/tests/README.md
new file mode 100644
index 000000000..3e0e2d8b1
--- /dev/null
+++ b/examples/server/tests/README.md
@@ -0,0 +1,9 @@
+# Server Integration Test
+
+Functional server tests suite.
+
+### Install dependencies
+`pip install -r requirements.txt`
+
+### Run tests
+`python -m behave`
diff --git a/examples/server/tests/features/completions.feature b/examples/server/tests/features/completions.feature
new file mode 100644
index 000000000..4dc8786f6
--- /dev/null
+++ b/examples/server/tests/features/completions.feature
@@ -0,0 +1,11 @@
+Feature: Completion request
+
+  Scenario Outline: run a completion request
+      Given a prompt <prompt>
+      When we request a completion
+      Then tokens are predicted
+
+    Examples: Prompts
+      | prompt                                                         |
+      | I believe the meaning of life is                               |
+      | Write a detailed analogy between mathematics and a lighthouse. |
\ No newline at end of file
diff --git a/examples/server/tests/features/oai.feature b/examples/server/tests/features/oai.feature
new file mode 100644
index 000000000..d56aa8404
--- /dev/null
+++ b/examples/server/tests/features/oai.feature
@@ -0,0 +1,13 @@
+Feature: OpenAI compatible completions request
+
+  Scenario Outline: run a completion on the OAI endpoint
+    Given a system prompt <system_prompt>
+    And a user prompt <user_prompt>
+    And a model <model>
+    When we request the oai completions endpoint
+    Then the oai response contains completion tokens
+
+    Examples: Prompts
+      | model          | system_prompt                | user_prompt                            |
+      | tinyllama-2    | You are ChatGPT.             | I believe the meaning of life is       |
+      | tinyllama-2    | You are a coding assistant.  | Write the fibonacci function in c++    |
\ No newline at end of file
diff --git a/examples/server/tests/features/steps/completion.py b/examples/server/tests/features/steps/completion.py
new file mode 100644
index 000000000..fda9a68e6
--- /dev/null
+++ b/examples/server/tests/features/steps/completion.py
@@ -0,0 +1,24 @@
+from behave import *
+import requests
+
+
+@given(u'a prompt {prompt}')
+def step_prompt(context, prompt):
+    context.prompt = prompt
+
+
+@when(u'we request a completion')
+def step_request_completion(context):
+    response = requests.post('http://localhost:8080/completion', json={
+        "prompt": context.prompt
+    })
+    status_code = response.status_code
+    assert status_code == 200
+    context.response_data = response.json()
+
+
+@then(u'tokens are predicted')
+def step_request_completion(context):
+    assert len(context.response_data['content']) > 0
+    assert context.response_data['timings']['predicted_n'] > 0
+
diff --git a/examples/server/tests/features/steps/oai.py b/examples/server/tests/features/steps/oai.py
new file mode 100644
index 000000000..0ed4ebd64
--- /dev/null
+++ b/examples/server/tests/features/steps/oai.py
@@ -0,0 +1,44 @@
+from behave import *
+import openai
+
+openai.api_key = 'llama.cpp'
+openai.api_base = "http://localhost:8080/v1/chat"
+
+
+@given(u'a user prompt {user_prompt}')
+def step_user_prompt(context, user_prompt):
+    context.user_prompt = user_prompt
+
+
+@given(u'a system prompt {system_prompt}')
+def step_system_prompt(context, system_prompt):
+    context.system_prompt = system_prompt
+
+
+@given(u'a model {model}')
+def step_model(context, model):
+    context.model = model
+
+
+@when(u'we request the oai completions endpoint')
+def step_oai_completions(context):
+    context.chat_completion = openai.Completion.create(
+        messages=[
+            {
+                "role": "system",
+                "content": context.system_prompt,
+            },
+            {
+                "role": "user",
+                "content": context.user_prompt,
+            }
+        ],
+        model=context.model,
+    )
+
+
+@then(u'the oai response contains completion tokens')
+def step_oai_response_has_completion_tokens(context):
+    assert len(context.chat_completion.choices) == 1
+    assert len(context.chat_completion.choices[0].message) > 0
+    assert context.chat_completion.usage.completion_tokens > 0
diff --git a/examples/server/tests/requirements.txt b/examples/server/tests/requirements.txt
new file mode 100644
index 000000000..f5c6f2e4a
--- /dev/null
+++ b/examples/server/tests/requirements.txt
@@ -0,0 +1,2 @@
+behave~=1.2.6
+openai~=0.25.0
\ No newline at end of file