tests
: allow artificial slowdown of sampling for tests
This commit is contained in:
parent
1da67a395c
commit
4dcb3ea943
4 changed files with 18 additions and 0 deletions
|
@ -1879,6 +1879,13 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
|
||||||
params.slot_prompt_similarity = std::stof(value);
|
params.slot_prompt_similarity = std::stof(value);
|
||||||
}
|
}
|
||||||
).set_examples({LLAMA_EXAMPLE_SERVER}));
|
).set_examples({LLAMA_EXAMPLE_SERVER}));
|
||||||
|
add_opt(llama_arg(
|
||||||
|
{"--testing-sampler-delay-millis"}, "N",
|
||||||
|
format("for tests: delay in milliseconds to add to each sampling (default: %d)", params.testing_sampler_delay_millis),
|
||||||
|
[](gpt_params & params, int value) {
|
||||||
|
params.testing_sampler_delay_millis = value;
|
||||||
|
}
|
||||||
|
).set_examples({LLAMA_EXAMPLE_SERVER}));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"--lora-init-without-apply"},
|
{"--lora-init-without-apply"},
|
||||||
format("load LoRA adapters without applying them (apply later via POST /lora-adapters) (default: %s)", params.lora_init_without_apply ? "enabled" : "disabled"),
|
format("load LoRA adapters without applying them (apply later via POST /lora-adapters) (default: %s)", params.lora_init_without_apply ? "enabled" : "disabled"),
|
||||||
|
|
|
@ -299,6 +299,8 @@ struct gpt_params {
|
||||||
|
|
||||||
float slot_prompt_similarity = 0.5f;
|
float slot_prompt_similarity = 0.5f;
|
||||||
|
|
||||||
|
int testing_sampler_delay_millis = 0;
|
||||||
|
|
||||||
// batched-bench params
|
// batched-bench params
|
||||||
bool is_pp_shared = false;
|
bool is_pp_shared = false;
|
||||||
|
|
||||||
|
|
|
@ -2348,6 +2348,9 @@ struct server_context {
|
||||||
}
|
}
|
||||||
|
|
||||||
completion_token_output result;
|
completion_token_output result;
|
||||||
|
if (params.testing_sampler_delay_millis > 0) {
|
||||||
|
std::this_thread::sleep_for(std::chrono::milliseconds(params.testing_sampler_delay_millis));
|
||||||
|
}
|
||||||
const llama_token id = gpt_sampler_sample(slot.smpl, ctx, slot.i_batch - i);
|
const llama_token id = gpt_sampler_sample(slot.smpl, ctx, slot.i_batch - i);
|
||||||
|
|
||||||
gpt_sampler_accept(slot.smpl, id, true);
|
gpt_sampler_accept(slot.smpl, id, true);
|
||||||
|
|
|
@ -78,6 +78,7 @@ def step_server_config(context, server_fqdn: str, server_port: str):
|
||||||
context.response_format = None
|
context.response_format = None
|
||||||
context.temperature = None
|
context.temperature = None
|
||||||
context.lora_file = None
|
context.lora_file = None
|
||||||
|
context.testing_sampler_delay_millis = None
|
||||||
context.disable_ctx_shift = False
|
context.disable_ctx_shift = False
|
||||||
|
|
||||||
context.tasks_result = []
|
context.tasks_result = []
|
||||||
|
@ -455,6 +456,9 @@ def step_impl(context, n_ga):
|
||||||
def step_impl(context, n_ga_w):
|
def step_impl(context, n_ga_w):
|
||||||
context.n_ga_w = n_ga_w
|
context.n_ga_w = n_ga_w
|
||||||
|
|
||||||
|
@step('{testing_sampler_delay_millis:d} milliseconds delay in sampler for testing')
|
||||||
|
def step_testing_sampler_delay_millis(context, testing_sampler_delay_millis):
|
||||||
|
context.testing_sampler_delay_millis = testing_sampler_delay_millis
|
||||||
|
|
||||||
@step('a passkey prompt template')
|
@step('a passkey prompt template')
|
||||||
def step_prompt_passkey(context):
|
def step_prompt_passkey(context):
|
||||||
|
@ -1436,6 +1440,8 @@ def start_server_background(context):
|
||||||
server_args.append('--verbose')
|
server_args.append('--verbose')
|
||||||
if context.lora_file:
|
if context.lora_file:
|
||||||
server_args.extend(['--lora', context.lora_file])
|
server_args.extend(['--lora', context.lora_file])
|
||||||
|
if context.testing_sampler_delay_millis:
|
||||||
|
server_args.extend(['--testing-sampler-delay-millis', context.testing_sampler_delay_millis])
|
||||||
if context.disable_ctx_shift:
|
if context.disable_ctx_shift:
|
||||||
server_args.extend(['--no-context-shift'])
|
server_args.extend(['--no-context-shift'])
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue