From 4fadb072e93ed724c93353eeddd6e207eb245991 Mon Sep 17 00:00:00 2001
From: Pierrick HYMBERT <pierrick.hymbert@gmail.com>
Date: Sat, 16 Mar 2024 18:15:20 +0100
Subject: [PATCH] server: tests: add `--model-url` tests

---
 examples/server/tests/README.md                 |  2 +-
 .../server/tests/features/embeddings.feature    |  3 ++-
 examples/server/tests/features/steps/steps.py   | 17 ++++++++++++++++-
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/examples/server/tests/README.md b/examples/server/tests/README.md
index 95a0353b6..feb2b1d6c 100644
--- a/examples/server/tests/README.md
+++ b/examples/server/tests/README.md
@@ -57,7 +57,7 @@ Feature or Scenario must be annotated with `@llama.cpp` to be included in the de
 To run a scenario annotated with `@bug`, start:
 
 ```shell
-DEBUG=ON ./tests.sh --no-skipped --tags bug
+DEBUG=ON ./tests.sh --no-skipped --tags bug --stop
 ```
 
 After changing logic in `steps.py`, ensure that `@bug` and `@wrong_usage` scenario are updated.
diff --git a/examples/server/tests/features/embeddings.feature b/examples/server/tests/features/embeddings.feature
index 57359b267..fb821f802 100644
--- a/examples/server/tests/features/embeddings.feature
+++ b/examples/server/tests/features/embeddings.feature
@@ -4,7 +4,8 @@ Feature: llama.cpp server
 
   Background: Server startup
     Given a server listening on localhost:8080
-    And   a model file bert-bge-small/ggml-model-f16.gguf from HF repo ggml-org/models
+    And   a model url https://huggingface.co/ggml-org/models/resolve/main/bert-bge-small/ggml-model-f16.gguf
+    And   a model file /tmp/ggml-model-f16.gguf
     And   a model alias bert-bge-small
     And   42 as server seed
     And   2 slots
diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py
index a59a52d21..19d064dfd 100644
--- a/examples/server/tests/features/steps/steps.py
+++ b/examples/server/tests/features/steps/steps.py
@@ -32,6 +32,8 @@ def step_server_config(context, server_fqdn, server_port):
     context.base_url = f'http://{context.server_fqdn}:{context.server_port}'
 
     context.model_alias = None
+    context.model_file = None
+    context.model_url = None
     context.n_batch = None
     context.n_ubatch = None
     context.n_ctx = None
@@ -65,6 +67,16 @@ def step_download_hf_model(context, hf_file, hf_repo):
         print(f"model file: {context.model_file}\n")
 
 
+@step('a model file {model_file}')
+def step_model_file(context, model_file):
+    context.model_file = model_file
+
+
+@step('a model url {model_url}')
+def step_model_url(context, model_url):
+    context.model_url = model_url
+
+
 @step('a model alias {model_alias}')
 def step_model_alias(context, model_alias):
     context.model_alias = model_alias
@@ -1038,8 +1050,11 @@ def start_server_background(context):
     server_args = [
         '--host', server_listen_addr,
         '--port', context.server_port,
-        '--model', context.model_file
     ]
+    if context.model_file:
+        server_args.extend(['--model', context.model_file])
+    if context.model_url:
+        server_args.extend(['--model-url', context.model_url])
     if context.n_batch:
         server_args.extend(['--batch-size', context.n_batch])
     if context.n_ubatch: