server: tests: embeddings, fixed prompt do not exceed n_batch, increase embedding timeout, reduce number of concurrent embeddings
This commit is contained in:
parent
ec6ba3bff1
commit
396106c532
2 changed files with 6 additions and 5 deletions
|
@ -9,7 +9,7 @@ Feature: llama.cpp server
|
||||||
And 42 as server seed
|
And 42 as server seed
|
||||||
And 2 slots
|
And 2 slots
|
||||||
And 1024 as batch size
|
And 1024 as batch size
|
||||||
And 4096 KV cache size
|
And 2048 KV cache size
|
||||||
And embeddings extraction
|
And embeddings extraction
|
||||||
Then the server is starting
|
Then the server is starting
|
||||||
Then the server is healthy
|
Then the server is healthy
|
||||||
|
@ -87,9 +87,8 @@ Feature: llama.cpp server
|
||||||
Then the server is idle
|
Then the server is idle
|
||||||
Then all embeddings are generated
|
Then all embeddings are generated
|
||||||
|
|
||||||
@wip
|
|
||||||
Scenario: All embeddings should be the same
|
Scenario: All embeddings should be the same
|
||||||
Given 20 fixed prompts
|
Given 10 fixed prompts
|
||||||
And a model bert-bge-small
|
And a model bert-bge-small
|
||||||
Given concurrent OAI embedding requests
|
Given concurrent OAI embedding requests
|
||||||
Then the server is busy
|
Then the server is busy
|
||||||
|
|
|
@ -292,9 +292,10 @@ def step_impl(context, n_ga_w):
|
||||||
def step_prompt_passkey(context):
|
def step_prompt_passkey(context):
|
||||||
context.prompt_passkey = context.text
|
context.prompt_passkey = context.text
|
||||||
|
|
||||||
|
|
||||||
@step(u'{n_prompts:d} fixed prompts')
|
@step(u'{n_prompts:d} fixed prompts')
|
||||||
def step_fixed_prompts(context, n_prompts):
|
def step_fixed_prompts(context, n_prompts):
|
||||||
context.prompts.extend([str(0)*1024 for i in range(n_prompts)])
|
context.prompts.extend([str(0)*(context.n_batch if context.n_batch is not None else 512) for i in range(n_prompts)])
|
||||||
context.n_prompts = n_prompts
|
context.n_prompts = n_prompts
|
||||||
|
|
||||||
|
|
||||||
|
@ -818,7 +819,8 @@ async def request_oai_embeddings(input,
|
||||||
"input": input,
|
"input": input,
|
||||||
"model": model,
|
"model": model,
|
||||||
},
|
},
|
||||||
headers=headers) as response:
|
headers=headers,
|
||||||
|
timeout=3600) as response:
|
||||||
assert response.status == 200, f"received status code not expected: {response.status}"
|
assert response.status == 200, f"received status code not expected: {response.status}"
|
||||||
assert response.headers['Access-Control-Allow-Origin'] == origin
|
assert response.headers['Access-Control-Allow-Origin'] == origin
|
||||||
assert response.headers['Content-Type'] == "application/json; charset=utf-8"
|
assert response.headers['Content-Type'] == "application/json; charset=utf-8"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue