fix passkey test

This commit is contained in:
Xuan Son Nguyen 2024-09-03 11:54:41 +02:00
parent d3fedaa6d6
commit fbebf65039
3 changed files with 29 additions and 18 deletions

View file

@ -2690,7 +2690,7 @@ int main(int argc, char ** argv) {
task.type = SERVER_TASK_TYPE_METRICS;
ctx_server.queue_results.add_waiting_task_id(task.id);
ctx_server.queue_tasks.post(task);
ctx_server.queue_tasks.post(task, true); // high-priority task
// get the result
server_task_result result = ctx_server.queue_results.recv(task.id);
@ -2722,7 +2722,7 @@ int main(int argc, char ** argv) {
task.data.push_back({{"reset_bucket", true}});
ctx_server.queue_results.add_waiting_task_id(task.id);
ctx_server.queue_tasks.post(task);
ctx_server.queue_tasks.post(task, true); // high-priority task
// get the result
server_task_result result = ctx_server.queue_results.recv(task.id);
@ -2822,7 +2822,7 @@ int main(int argc, char ** argv) {
task.data = {
{ "id_slot", id_slot },
{ "filename", filename },
{ "filepath", filepath }
{ "filepath", filepath },
};
const int id_task = ctx_server.queue_tasks.post(task);
@ -2852,7 +2852,7 @@ int main(int argc, char ** argv) {
task.data = {
{ "id_slot", id_slot },
{ "filename", filename },
{ "filepath", filepath }
{ "filepath", filepath },
};
const int id_task = ctx_server.queue_tasks.post(task);
@ -2930,7 +2930,7 @@ int main(int argc, char ** argv) {
{ "system_prompt", ctx_server.system_prompt.c_str() },
{ "default_generation_settings", ctx_server.default_generation_settings_for_props },
{ "total_slots", ctx_server.params.n_parallel },
{ "chat_template", curr_tmpl.c_str() }
{ "chat_template", curr_tmpl.c_str() },
};
res_ok(res, data);

View file

@ -15,6 +15,7 @@ Feature: Passkey / Self-extend with context shift
And <n_junk> as number of junk
And <n_predicted> server max tokens to predict
And 42 as seed
And 0.0 temperature
And <n_ctx> KV cache size
And 1 slots
And <n_ga> group attention factor to extend context size through self-extend
@ -22,7 +23,8 @@ Feature: Passkey / Self-extend with context shift
# Can be override with N_GPU_LAYERS
And <ngl> GPU offloaded layers
Then the server is starting
Then the server is healthy
# Higher timeout because the model may need to be downloaded from the internet
Then the server is healthy with timeout 120 seconds
Given available models
Then model 0 is trained on <n_ctx_train> tokens context
Given a prefix prompt:

View file

@ -200,17 +200,15 @@ def step_start_server(context):
time.sleep(0.1)
@step("the server is {expecting_status}")
@async_run_until_complete
async def step_wait_for_the_server_to_be_started(context, expecting_status: Literal['healthy', 'ready', 'idle', 'busy'] | str):
async def wait_for_server_status_with_timeout(context, expecting_status: Literal['healthy', 'ready', 'idle', 'busy'] | str, timeout: int):
match expecting_status:
case 'healthy':
await wait_for_slots_status(context, context.base_url, 200,
timeout=30)
timeout=timeout)
case 'ready' | 'idle':
await wait_for_slots_status(context, context.base_url, 200,
timeout=30,
timeout=timeout,
params={'fail_on_no_slot': 1},
slots_idle=context.n_slots,
slots_processing=0)
@ -223,6 +221,17 @@ async def step_wait_for_the_server_to_be_started(context, expecting_status: Lite
assert False, "unknown status"
@step("the server is {expecting_status} with timeout {timeout:d} seconds")
@async_run_until_complete
async def step_wait_for_server_status_with_timeout(context, expecting_status: Literal['healthy', 'ready', 'idle', 'busy'] | str, timeout: int):
await wait_for_server_status_with_timeout(context, expecting_status, timeout)
@step("the server is {expecting_status}")
async def step_wait_for_server_status(context, expecting_status: Literal['healthy', 'ready', 'idle', 'busy'] | str):
await wait_for_server_status_with_timeout(context, expecting_status, 30)
@step('all slots are {expected_slot_status_string}')
@async_run_until_complete
async def step_all_slots_status(context, expected_slot_status_string: Literal['idle', 'busy'] | str):