fix passkey test
This commit is contained in:
parent
d3fedaa6d6
commit
fbebf65039
3 changed files with 29 additions and 18 deletions
|
@ -2690,7 +2690,7 @@ int main(int argc, char ** argv) {
|
|||
task.type = SERVER_TASK_TYPE_METRICS;
|
||||
|
||||
ctx_server.queue_results.add_waiting_task_id(task.id);
|
||||
ctx_server.queue_tasks.post(task);
|
||||
ctx_server.queue_tasks.post(task, true); // high-priority task
|
||||
|
||||
// get the result
|
||||
server_task_result result = ctx_server.queue_results.recv(task.id);
|
||||
|
@ -2722,7 +2722,7 @@ int main(int argc, char ** argv) {
|
|||
task.data.push_back({{"reset_bucket", true}});
|
||||
|
||||
ctx_server.queue_results.add_waiting_task_id(task.id);
|
||||
ctx_server.queue_tasks.post(task);
|
||||
ctx_server.queue_tasks.post(task, true); // high-priority task
|
||||
|
||||
// get the result
|
||||
server_task_result result = ctx_server.queue_results.recv(task.id);
|
||||
|
@ -2822,7 +2822,7 @@ int main(int argc, char ** argv) {
|
|||
task.data = {
|
||||
{ "id_slot", id_slot },
|
||||
{ "filename", filename },
|
||||
{ "filepath", filepath }
|
||||
{ "filepath", filepath },
|
||||
};
|
||||
|
||||
const int id_task = ctx_server.queue_tasks.post(task);
|
||||
|
@ -2852,7 +2852,7 @@ int main(int argc, char ** argv) {
|
|||
task.data = {
|
||||
{ "id_slot", id_slot },
|
||||
{ "filename", filename },
|
||||
{ "filepath", filepath }
|
||||
{ "filepath", filepath },
|
||||
};
|
||||
|
||||
const int id_task = ctx_server.queue_tasks.post(task);
|
||||
|
@ -2930,7 +2930,7 @@ int main(int argc, char ** argv) {
|
|||
{ "system_prompt", ctx_server.system_prompt.c_str() },
|
||||
{ "default_generation_settings", ctx_server.default_generation_settings_for_props },
|
||||
{ "total_slots", ctx_server.params.n_parallel },
|
||||
{ "chat_template", curr_tmpl.c_str() }
|
||||
{ "chat_template", curr_tmpl.c_str() },
|
||||
};
|
||||
|
||||
res_ok(res, data);
|
||||
|
|
|
@ -15,6 +15,7 @@ Feature: Passkey / Self-extend with context shift
|
|||
And <n_junk> as number of junk
|
||||
And <n_predicted> server max tokens to predict
|
||||
And 42 as seed
|
||||
And 0.0 temperature
|
||||
And <n_ctx> KV cache size
|
||||
And 1 slots
|
||||
And <n_ga> group attention factor to extend context size through self-extend
|
||||
|
@ -22,7 +23,8 @@ Feature: Passkey / Self-extend with context shift
|
|||
# Can be override with N_GPU_LAYERS
|
||||
And <ngl> GPU offloaded layers
|
||||
Then the server is starting
|
||||
Then the server is healthy
|
||||
# Higher timeout because the model may need to be downloaded from the internet
|
||||
Then the server is healthy with timeout 120 seconds
|
||||
Given available models
|
||||
Then model 0 is trained on <n_ctx_train> tokens context
|
||||
Given a prefix prompt:
|
||||
|
|
|
@ -200,17 +200,15 @@ def step_start_server(context):
|
|||
time.sleep(0.1)
|
||||
|
||||
|
||||
@step("the server is {expecting_status}")
|
||||
@async_run_until_complete
|
||||
async def step_wait_for_the_server_to_be_started(context, expecting_status: Literal['healthy', 'ready', 'idle', 'busy'] | str):
|
||||
async def wait_for_server_status_with_timeout(context, expecting_status: Literal['healthy', 'ready', 'idle', 'busy'] | str, timeout: int):
|
||||
match expecting_status:
|
||||
case 'healthy':
|
||||
await wait_for_slots_status(context, context.base_url, 200,
|
||||
timeout=30)
|
||||
timeout=timeout)
|
||||
|
||||
case 'ready' | 'idle':
|
||||
await wait_for_slots_status(context, context.base_url, 200,
|
||||
timeout=30,
|
||||
timeout=timeout,
|
||||
params={'fail_on_no_slot': 1},
|
||||
slots_idle=context.n_slots,
|
||||
slots_processing=0)
|
||||
|
@ -223,6 +221,17 @@ async def step_wait_for_the_server_to_be_started(context, expecting_status: Lite
|
|||
assert False, "unknown status"
|
||||
|
||||
|
||||
@step("the server is {expecting_status} with timeout {timeout:d} seconds")
|
||||
@async_run_until_complete
|
||||
async def step_wait_for_server_status_with_timeout(context, expecting_status: Literal['healthy', 'ready', 'idle', 'busy'] | str, timeout: int):
|
||||
await wait_for_server_status_with_timeout(context, expecting_status, timeout)
|
||||
|
||||
|
||||
@step("the server is {expecting_status}")
|
||||
async def step_wait_for_server_status(context, expecting_status: Literal['healthy', 'ready', 'idle', 'busy'] | str):
|
||||
await wait_for_server_status_with_timeout(context, expecting_status, 30)
|
||||
|
||||
|
||||
@step('all slots are {expected_slot_status_string}')
|
||||
@async_run_until_complete
|
||||
async def step_all_slots_status(context, expected_slot_status_string: Literal['idle', 'busy'] | str):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue