diff --git a/examples/server/tests/features/server.feature b/examples/server/tests/features/server.feature
index 968e288d0..6e54395b6 100644
--- a/examples/server/tests/features/server.feature
+++ b/examples/server/tests/features/server.feature
@@ -36,24 +36,13 @@ Feature: llama.cpp server
   Scenario: Multi users
     Given a prompt:
       """
-      Write a formal complaint email to Air France about my delayed
-      baggage from my flight on Tuesday, January 17th, from Paris to Toulouse. Be verbose.
+      Write a very long story about AI.
       """
     And a prompt:
       """
-      Translate the following War & Peace chapter into Russian: WELL, PRINCE,
-      Genoa and Lucca are now no more than private estates of the Bonaparte
-      family. No, I warn you, that if you do not tell me we are at war,
-      if you again allow yourself to palliate all the infamies and atrocities
-      of this Antichrist (upon my word, I believe he is), I don’t know you
-      in future, you are no longer my friend, no longer my faithful slave,
-      as you say. There, how do you do, how do you do? I see I’m scaring you,
-      sit down and talk to me.” These words were uttered in July 1805 by
-      Anna Pavlovna Scherer, a distinguished lady of the court,
-      and confidential maid-of-honour to the Empress Marya Fyodorovna.
-      It was her greeting to Prince Vassily, a man high in rank
-      and office, who was the first to arrive at her soirée.
+      Write another very long music lyrics.
       """
+    And 512 max tokens to predict
     Given concurrent completion requests
     Then the server is busy
     And  all slots are busy
@@ -65,8 +54,6 @@ Feature: llama.cpp server
   Scenario: Multi users OAI Compatibility
     Given a system prompt "You are an AI assistant."
     And a model tinyllama-2
-    And 1024 max tokens to predict
-    And streaming is enabled
     Given a prompt:
       """
       Write a very long story about AI.
@@ -75,6 +62,8 @@ Feature: llama.cpp server
       """
       Write another very long music lyrics.
       """
+    And 512 max tokens to predict
+    And streaming is enabled
     Given concurrent OAI completions requests
     Then the server is busy
     And  all slots are busy
@@ -82,3 +71,25 @@ Feature: llama.cpp server
     And  all slots are idle
     Then all prompts are predicted
 
+  # FIXME: infinite loop on the CI, not locally, if n_prompt * n_predict > kv_size
+  Scenario: Multi users with total number of tokens to predict exceeds the KV Cache size
+    Given a prompt:
+      """
+      Write a very long story about AI.
+      """
+    And a prompt:
+      """
+      Write another very long music lyrics.
+      """
+    And a prompt:
+      """
+      Write a very long poem.
+      """
+    And 1024 max tokens to predict
+    Given concurrent completion requests
+    Then the server is busy
+    And  all slots are busy
+    Then the server is idle
+    And  all slots are idle
+    Then all prompts are predicted
+
diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py
index 400b3c126..896d8e32d 100644
--- a/examples/server/tests/features/steps/steps.py
+++ b/examples/server/tests/features/steps/steps.py
@@ -105,7 +105,7 @@ def step_model(context, model):
 
 @step(u'{max_tokens} max tokens to predict')
 def step_max_tokens(context, max_tokens):
-    context.max_tokens = int(max_tokens)
+    context.n_predict = int(max_tokens)
 
 
 @step(u'streaming is {enable_streaming}')
@@ -154,7 +154,7 @@ def concurrent_requests(context, f_completion):
 def request_completion(context, prompt, n_predict=None):
     response = requests.post(f'{context.base_url}/completion', json={
         "prompt": prompt,
-        "n_predict": int(n_predict) if n_predict is not None else 4096,
+        "n_predict": int(n_predict) if n_predict is not None else context.n_predict,
         "seed": context.seed
     })
     assert response.status_code == 200
@@ -174,7 +174,7 @@ def oai_chat_completions(context, user_prompt):
             }
         ],
         model=context.model,
-        max_tokens=context.max_tokens,
+        max_tokens=context.n_predict,
         stream=context.enable_streaming,
         seed = context.seed
     )