server: tests: embeddings fix build type Debug is randomly failing (#5911)

* server: tests: embeddings, use different KV Cache size * server: tests: embeddings, fixed prompt do not exceed n_batch, increase embedding timeout, reduce number of concurrent embeddings
2024-03-06 21:25:55 +01:00 · 2024-03-06 21:25:55 +01:00 · 59850f18e5
commit 59850f18e5
parent 36e12f8fd3
2 changed files with 6 additions and 5 deletions
--- a/examples/server/tests/features/embeddings.feature
+++ b/examples/server/tests/features/embeddings.feature
@ -9,7 +9,7 @@ Feature: llama.cpp server
    And   42 as server seed
    And   2 slots
    And   1024 as batch size
-    And   1024 KV cache size
+    And   2048 KV cache size
    And   embeddings extraction
    Then  the server is starting
    Then  the server is healthy
@ -87,9 +87,8 @@ Feature: llama.cpp server
    Then the server is idle
    Then all embeddings are generated
  @wip
  Scenario: All embeddings should be the same
-    Given 20 fixed prompts
+    Given 10 fixed prompts
    And   a model bert-bge-small
    Given concurrent OAI embedding requests
    Then the server is busy
--- a/examples/server/tests/features/steps/steps.py
+++ b/examples/server/tests/features/steps/steps.py
@ -292,9 +292,10 @@ def step_impl(context, n_ga_w):
 def step_prompt_passkey(context):
    context.prompt_passkey = context.text
@step(u'{n_prompts:d} fixed prompts')
 def step_fixed_prompts(context, n_prompts):
-    context.prompts.extend([str(0)*1024 for i in range(n_prompts)])
+    context.prompts.extend([str(0)*(context.n_batch if context.n_batch is not None else 512) for i in range(n_prompts)])
    context.n_prompts = n_prompts
@ -818,7 +819,8 @@ async def request_oai_embeddings(input,
                                        "input": input,
                                        "model": model,
                                    },
-                                    headers=headers) as response:
+                                    headers=headers,
                                    timeout=3600) as response:
                assert response.status == 200, f"received status code not expected: {response.status}"
                assert response.headers['Access-Control-Allow-Origin'] == origin
                assert response.headers['Content-Type'] == "application/json; charset=utf-8"