From 1c1fd405764f3cea8a8053f248d7c30762a7925d Mon Sep 17 00:00:00 2001
From: Pierrick HYMBERT <pierrick.hymbert@gmail.com>
Date: Fri, 23 Feb 2024 17:12:16 +0100
Subject: [PATCH] server: tests: allow to pass argument to the test file  add
 wrong_usage.feature to demonstrate user issue which will not be fixed.

---
 examples/server/tests/README.md               | 11 ++++++----
 .../server/tests/features/wrong_usage.feature | 22 +++++++++++++++++++
 examples/server/tests/tests.sh                | 10 +++++++--
 3 files changed, 37 insertions(+), 6 deletions(-)
 create mode 100644 examples/server/tests/features/wrong_usage.feature

diff --git a/examples/server/tests/README.md b/examples/server/tests/README.md
index 1892bafc9..6ebff8674 100644
--- a/examples/server/tests/README.md
+++ b/examples/server/tests/README.md
@@ -15,9 +15,12 @@ It's possible to override some scenario steps values with environment variables:
  -  `$PORT` -> `context.server_port` to set the listening port of the server during scenario, default: `8080`
  -  `$LLAMA_SERVER_BIN_PATH` -> to change the server binary path, default: `../../../build/bin/server`
 
-To change the server path, use `LLAMA_SERVER_BIN_PATH` environment variable.
+### Run @bug, @wip or @wrong_usage annotated scenario
 
-### Skipped scenario
+Feature or Scenario must be annotated with `@llama.cpp` to be included in the default scope.
+- `@bug` annotation aims to link a scenario with a GitHub issue.
+- `@wrong_usage` are meant to show user issue that are actually an expected behavior
+- `@wip` to focus on a scenario working in progress
 
-Feature or Scenario must be annotated with `@llama.cpp` to be included in the scope.
-`@bug` annotation aims to link a scenario with a GitHub issue.
+To run a scenario annotated with `@bug`, start:
+`./tests.sh --tags bug`
\ No newline at end of file
diff --git a/examples/server/tests/features/wrong_usage.feature b/examples/server/tests/features/wrong_usage.feature
new file mode 100644
index 000000000..59098a290
--- /dev/null
+++ b/examples/server/tests/features/wrong_usage.feature
@@ -0,0 +1,22 @@
+# run with ./test.sh --tags wrong_usage
+@wrong_usage
+Feature: Wrong usage of llama.cpp server
+
+  #3969 The user must always set --n-predict option
+  # to cap the number of tokens any completion request can generate
+  # or pass n_predict or max_tokens in the request.
+  Scenario: Infinite loop
+    Given a server listening on localhost:8080
+    And   a model file stories260K.gguf
+    And   1 slots
+    And   32 KV cache size
+    # Uncomment below to fix the issue
+    #And   64 server max tokens to predict
+    Then  the server is starting
+    Given a prompt:
+      """
+      Go to: infinite loop
+      """
+    Given concurrent completion requests
+
+    Then all prompts are predicted
\ No newline at end of file
diff --git a/examples/server/tests/tests.sh b/examples/server/tests/tests.sh
index 3b101ad3d..a2e29748d 100755
--- a/examples/server/tests/tests.sh
+++ b/examples/server/tests/tests.sh
@@ -2,5 +2,11 @@
 
 set -eu
 
-# Start @llama.cpp scenario
-behave --summary --stop --no-capture --tags llama.cpp
+if [ $# -lt 1 ]
+then
+  # Start @llama.cpp scenario
+  behave --summary --stop --no-capture --tags llama.cpp
+else
+  behave "$@"
+fi
+