From 852f6548bb6b41b9c4fee72e2d54087853c1fa79 Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <son@huggingface.co>
Date: Tue, 3 Sep 2024 13:14:52 +0200
Subject: [PATCH] add test

---
 .../server/tests/features/parallel.feature    | 29 +++++++++++++++++++
 1 file changed, 29 insertions(+)
diff --git a/examples/server/tests/features/parallel.feature b/examples/server/tests/features/parallel.feature
index 6cd306a2b..423d0f1d4 100644
--- a/examples/server/tests/features/parallel.feature
+++ b/examples/server/tests/features/parallel.feature
@@ -77,6 +77,35 @@ Feature: Parallel
       | disabled  | 128       |
       | enabled   | 64        |
 
+  Scenario Outline: Multi users with number of prompts exceeding number of slots
+    Given a system prompt You are a writer.
+    And   a model tinyllama-2
+    Given a prompt:
+      """
+      Write a very long book.
+      """
+    And a prompt:
+      """
+      Write another a poem.
+      """
+    And a prompt:
+      """
+      What is LLM?
+      """
+    And a prompt:
+      """
+      The sky is blue and I love it.
+      """
+    And <n_predict> max tokens to predict
+    And streaming is <streaming>
+    Given concurrent OAI completions requests
+    Then the server is busy
+    Then the server is idle
+    Then all prompts are predicted with <n_predict> tokens
+    Examples:
+      | streaming | n_predict |
+      | disabled  | 128       |
+      | enabled   | 64        |
 
   Scenario:  Multi users with total number of tokens to predict exceeds the KV Cache size #3969
     Given a prompt: