server: tests: embeddings fix build type Debug is randomly failing (#5911)

* server: tests: embeddings, use different KV Cache size

* server: tests: embeddings, fixed prompt do not exceed n_batch, increase embedding timeout, reduce number of concurrent embeddings
This commit is contained in:
Pierrick Hymbert 2024-03-06 21:25:55 +01:00 committed by GitHub
parent 36e12f8fd3
commit 59850f18e5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 6 additions and 5 deletions

View file

@ -9,7 +9,7 @@ Feature: llama.cpp server
And 42 as server seed And 42 as server seed
And 2 slots And 2 slots
And 1024 as batch size And 1024 as batch size
And 1024 KV cache size And 2048 KV cache size
And embeddings extraction And embeddings extraction
Then the server is starting Then the server is starting
Then the server is healthy Then the server is healthy
@ -87,9 +87,8 @@ Feature: llama.cpp server
Then the server is idle Then the server is idle
Then all embeddings are generated Then all embeddings are generated
@wip
Scenario: All embeddings should be the same Scenario: All embeddings should be the same
Given 20 fixed prompts Given 10 fixed prompts
And a model bert-bge-small And a model bert-bge-small
Given concurrent OAI embedding requests Given concurrent OAI embedding requests
Then the server is busy Then the server is busy

View file

@ -292,9 +292,10 @@ def step_impl(context, n_ga_w):
def step_prompt_passkey(context): def step_prompt_passkey(context):
context.prompt_passkey = context.text context.prompt_passkey = context.text
@step(u'{n_prompts:d} fixed prompts') @step(u'{n_prompts:d} fixed prompts')
def step_fixed_prompts(context, n_prompts): def step_fixed_prompts(context, n_prompts):
context.prompts.extend([str(0)*1024 for i in range(n_prompts)]) context.prompts.extend([str(0)*(context.n_batch if context.n_batch is not None else 512) for i in range(n_prompts)])
context.n_prompts = n_prompts context.n_prompts = n_prompts
@ -818,7 +819,8 @@ async def request_oai_embeddings(input,
"input": input, "input": input,
"model": model, "model": model,
}, },
headers=headers) as response: headers=headers,
timeout=3600) as response:
assert response.status == 200, f"received status code not expected: {response.status}" assert response.status == 200, f"received status code not expected: {response.status}"
assert response.headers['Access-Control-Allow-Origin'] == origin assert response.headers['Access-Control-Allow-Origin'] == origin
assert response.headers['Content-Type'] == "application/json; charset=utf-8" assert response.headers['Content-Type'] == "application/json; charset=utf-8"