From 4dcb3ea9439a36dcdf71db295d0c8b4fcbffc678 Mon Sep 17 00:00:00 2001 From: ochafik Date: Sun, 29 Sep 2024 01:09:41 +0100 Subject: [PATCH] `tests`: allow artificial slowdown of sampling for tests --- common/arg.cpp | 7 +++++++ common/common.h | 2 ++ examples/server/server.cpp | 3 +++ examples/server/tests/features/steps/steps.py | 6 ++++++ 4 files changed, 18 insertions(+) diff --git a/common/arg.cpp b/common/arg.cpp index 8266a16c2..1ae55b22c 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -1879,6 +1879,13 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex, params.slot_prompt_similarity = std::stof(value); } ).set_examples({LLAMA_EXAMPLE_SERVER})); + add_opt(llama_arg( + {"--testing-sampler-delay-millis"}, "N", + format("for tests: delay in milliseconds to add to each sampling (default: %d)", params.testing_sampler_delay_millis), + [](gpt_params & params, int value) { + params.testing_sampler_delay_millis = value; + } + ).set_examples({LLAMA_EXAMPLE_SERVER})); add_opt(llama_arg( {"--lora-init-without-apply"}, format("load LoRA adapters without applying them (apply later via POST /lora-adapters) (default: %s)", params.lora_init_without_apply ? "enabled" : "disabled"), diff --git a/common/common.h b/common/common.h index 8b84cf9ad..154d59846 100644 --- a/common/common.h +++ b/common/common.h @@ -299,6 +299,8 @@ struct gpt_params { float slot_prompt_similarity = 0.5f; + int testing_sampler_delay_millis = 0; + // batched-bench params bool is_pp_shared = false; diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 1ce4d7e26..c308e23ca 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2348,6 +2348,9 @@ struct server_context { } completion_token_output result; + if (params.testing_sampler_delay_millis > 0) { + std::this_thread::sleep_for(std::chrono::milliseconds(params.testing_sampler_delay_millis)); + } const llama_token id = gpt_sampler_sample(slot.smpl, ctx, slot.i_batch - i); gpt_sampler_accept(slot.smpl, id, true); diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py index 2611614ba..31bfb0b2b 100644 --- a/examples/server/tests/features/steps/steps.py +++ b/examples/server/tests/features/steps/steps.py @@ -78,6 +78,7 @@ def step_server_config(context, server_fqdn: str, server_port: str): context.response_format = None context.temperature = None context.lora_file = None + context.testing_sampler_delay_millis = None context.disable_ctx_shift = False context.tasks_result = [] @@ -455,6 +456,9 @@ def step_impl(context, n_ga): def step_impl(context, n_ga_w): context.n_ga_w = n_ga_w +@step('{testing_sampler_delay_millis:d} milliseconds delay in sampler for testing') +def step_testing_sampler_delay_millis(context, testing_sampler_delay_millis): + context.testing_sampler_delay_millis = testing_sampler_delay_millis @step('a passkey prompt template') def step_prompt_passkey(context): @@ -1436,6 +1440,8 @@ def start_server_background(context): server_args.append('--verbose') if context.lora_file: server_args.extend(['--lora', context.lora_file]) + if context.testing_sampler_delay_millis: + server_args.extend(['--testing-sampler-delay-millis', context.testing_sampler_delay_millis]) if context.disable_ctx_shift: server_args.extend(['--no-context-shift'])