threadpool: make polling the default to match openmp behavior

All command line args now allow for setting poll to 0 (false).
This commit is contained in:
Max Krasnyansky 2024-08-06 18:35:53 -07:00 committed by fmz
parent 6fcc780b5f
commit 3b62f7c145
3 changed files with 32 additions and 16 deletions

View file

@ -518,11 +518,13 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
return true; return true;
} }
if (arg == "--cpu-strict") { if (arg == "--cpu-strict") {
params.cpuparams.strict_cpu = true; CHECK_ARG
params.cpuparams.strict_cpu = std::stoul(argv[i]);
return true; return true;
} }
if (arg == "--poll") { if (arg == "--poll") {
params.cpuparams.poll = true; CHECK_ARG
params.cpuparams.poll = std::stoul(argv[i]);
return true; return true;
} }
if (arg == "-tb" || arg == "--threads-batch") { if (arg == "-tb" || arg == "--threads-batch") {
@ -557,7 +559,8 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
return true; return true;
} }
if (arg == "--poll-batch") { if (arg == "--poll-batch") {
params.cpuparams_batch.poll = true; CHECK_ARG
params.cpuparams_batch.poll = std::stoul(argv[i]);
return true; return true;
} }
if (arg == "-td" || arg == "--threads-draft") { if (arg == "-td" || arg == "--threads-draft") {
@ -592,7 +595,8 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
return true; return true;
} }
if (arg == "--poll-draft") { if (arg == "--poll-draft") {
params.draft_cpuparams.poll = true; CHECK_ARG
params.draft_cpuparams.poll = std::stoul(argv[i]);
return true; return true;
} }
if (arg == "-tbd" || arg == "--threads-batch-draft") { if (arg == "-tbd" || arg == "--threads-batch-draft") {
@ -620,7 +624,8 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
return true; return true;
} }
if (arg == "--poll-batch-draft") { if (arg == "--poll-batch-draft") {
params.draft_cpuparams_batch.poll = true; CHECK_ARG
params.draft_cpuparams_batch.poll = std::stoul(argv[i]);
return true; return true;
} }
if (arg == "-p" || arg == "--prompt") { if (arg == "-p" || arg == "--prompt") {
@ -1710,34 +1715,37 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
options.push_back({ "*", "-t, --threads N", "number of threads to use during generation (default: %d)", params.cpuparams.n_threads }); options.push_back({ "*", "-t, --threads N", "number of threads to use during generation (default: %d)", params.cpuparams.n_threads });
options.push_back({ "*", "-C, --cpu-mask M", "CPU affinity mask: arbitrarily long hex. Complements cpu-range (default: \"\")"}); options.push_back({ "*", "-C, --cpu-mask M", "CPU affinity mask: arbitrarily long hex. Complements cpu-range (default: \"\")"});
options.push_back({ "*", "-Cr, --cpu-range lo-hi", "range of CPUs for affinity. Complements --cpu-mask"}); options.push_back({ "*", "-Cr, --cpu-range lo-hi", "range of CPUs for affinity. Complements --cpu-mask"});
options.push_back({ "*", " --cpu-strict", "use strict CPU placement (default: %u)\n", (unsigned) params.cpuparams.strict_cpu}); options.push_back({ "*", " --cpu-strict <0|1>", "use strict CPU placement (default: %u)\n", (unsigned) params.cpuparams.strict_cpu});
options.push_back({ "*", " --priority N", "set process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: %d)\n", params.cpuparams.priority}); options.push_back({ "*", " --priority N", "set process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: %d)\n", params.cpuparams.priority});
options.push_back({ "*", " --poll", "use polling to wait for work (default: %u)\n", (unsigned) params.cpuparams.poll}); options.push_back({ "*", " --poll <0|1>", "use polling to wait for work (default: %u)\n", (unsigned) params.cpuparams.poll});
options.push_back({ "*", "-tb, --threads-batch N", "number of threads to use during batch and prompt processing (default: same as --threads)" }); options.push_back({ "*", "-tb, --threads-batch N", "number of threads to use during batch and prompt processing (default: same as --threads)" });
options.push_back({ "*", "-Cb, --cpu-mask-batch M", "CPU affinity mask: arbitrarily long hex. Complements cpu-range-batch (default: same as --cpu-mask)"}); options.push_back({ "*", "-Cb, --cpu-mask-batch M", "CPU affinity mask: arbitrarily long hex. Complements cpu-range-batch (default: same as --cpu-mask)"});
options.push_back({ "*", "-Crb, --cpu-range-batch lo-hi", options.push_back({ "*", "-Crb, --cpu-range-batch lo-hi",
"ranges of CPUs for affinity. Complements --cpu-mask-batch"}); "ranges of CPUs for affinity. Complements --cpu-mask-batch"});
options.push_back({ "*", " --cpu-strict-batch", "use strict CPU placement (default: same as --cpu-strict)"}); options.push_back({ "*", " --cpu-strict-batch <0|1>",
"use strict CPU placement (default: same as --cpu-strict)"});
options.push_back({ "*", " --priority-batch N", "set process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: --priority)"}); options.push_back({ "*", " --priority-batch N", "set process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: --priority)"});
options.push_back({ "*", " --poll-batch", "use polling to wait for work (default: --poll)"}); options.push_back({ "*", " --poll-batch <0|1>", "use polling to wait for work (default: same as --poll"});
options.push_back({ "speculative", "-td, --threads-draft N", "number of threads to use during generation (default: same as --threads)" }); options.push_back({ "speculative", "-td, --threads-draft N", "number of threads to use during generation (default: same as --threads)" });
options.push_back({ "speculative", "-Cd, --cpu-mask-draft M", "Draft model CPU affinity mask. Complements cpu-range-draft (default: same as --cpu-mask)"}); options.push_back({ "speculative", "-Cd, --cpu-mask-draft M", "Draft model CPU affinity mask. Complements cpu-range-draft (default: same as --cpu-mask)"});
options.push_back({ "speculative", "-Crd, --cpu-range-draft lo-hi", options.push_back({ "speculative", "-Crd, --cpu-range-draft lo-hi",
"Ranges of CPUs for affinity. Complements --cpu-mask-draft"}); "Ranges of CPUs for affinity. Complements --cpu-mask-draft"});
options.push_back({ "speculative", " --cpu-strict-draft", "Use strict CPU placement for draft model (default: same as --cpu-strict)"}); options.push_back({ "speculative", " --cpu-strict-draft <0|1>",
"Use strict CPU placement for draft model (default: same as --cpu-strict)"});
options.push_back({ "speculative", " --priority-draft N", "Set draft process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: same as --priority)"}); options.push_back({ "speculative", " --priority-draft N", "Set draft process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: same as --priority)"});
options.push_back({ "speculative", " --poll-draft", "Use polling to wait for draft model work (default: same as --poll])"}); options.push_back({ "speculative", " --poll-draft <0|1>", "Use polling to wait for draft model work (default: same as --poll])"});
options.push_back({ "speculative", "-tbd, --threads-batch-draft N", options.push_back({ "speculative", "-tbd, --threads-batch-draft N",
"number of threads to use during batch and prompt processing (default: same as --threads-draft)" }); "number of threads to use during batch and prompt processing (default: same as --threads-draft)" });
options.push_back({ "speculative", "-Cbd, --cpu-mask-batch-draft M", options.push_back({ "speculative", "-Cbd, --cpu-mask-batch-draft M",
"Draft model CPU affinity mask. Complements cpu-range-draft-batch (default: same as --cpu-mask-draft)"}); "Draft model CPU affinity mask. Complements cpu-range-draft-batch (default: same as --cpu-mask-draft)"});
options.push_back({ "speculative", "-Crbd, --cpu-range-batch-draft lo-hi", options.push_back({ "speculative", "-Crbd, --cpu-range-batch-draft lo-hi",
"Ranges of CPUs for affinity. Complements --cpu-mask-draft-batch)"}); "Ranges of CPUs for affinity. Complements --cpu-mask-draft-batch)"});
options.push_back({ "speculative", " --cpu-strict-batch-draft", options.push_back({ "speculative", " --cpu-strict-batch-draft <0|1>",
"Use strict CPU placement for draft model (default: --cpu-strict-draft)"}); "Use strict CPU placement for draft model (default: --cpu-strict-draft)"});
options.push_back({ "speculative", " --priority-batch-draft N", options.push_back({ "speculative", " --priority-batch-draft N",
"Set draft process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: --priority-draft)"}); "Set draft process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: --priority-draft)"});
options.push_back({ "speculative", " --poll-batch-draft", "Use polling to wait for draft model work (default: --poll-draft)"}); options.push_back({ "speculative", " --poll-batch-draft <0|1>",
"Use polling to wait for draft model work (default: --poll-draft)"});
options.push_back({ "speculative", " --draft N", "number of tokens to draft for speculative decoding (default: %d)", params.n_draft }); options.push_back({ "speculative", " --draft N", "number of tokens to draft for speculative decoding (default: %d)", params.n_draft });
options.push_back({ "speculative", "-ps, --p-split N", "speculative decoding split probability (default: %.1f)", (double)params.p_split }); options.push_back({ "speculative", "-ps, --p-split N", "speculative decoding split probability (default: %.1f)", (double)params.p_split });

View file

@ -73,7 +73,7 @@ struct cpu_params {
bool mask_valid = false; // Default: any CPU bool mask_valid = false; // Default: any CPU
int32_t priority = 0; // Scheduling prio : (0 - normal, 1 - medium, 2 - high, 3 - realtime) int32_t priority = 0; // Scheduling prio : (0 - normal, 1 - medium, 2 - high, 3 - realtime)
bool strict_cpu = false; // Use strict CPU placement bool strict_cpu = false; // Use strict CPU placement
bool poll = false; // Use polling (busywait) to wait for work bool poll = true; // Use polling (busywait) to wait for work (default matches OpenMP)
}; };
struct gpt_params { struct gpt_params {

View file

@ -513,9 +513,17 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
} }
params.cpuparams.priority = std::stoul(argv[i]); params.cpuparams.priority = std::stoul(argv[i]);
} else if (arg == "--cpu-strict") { } else if (arg == "--cpu-strict") {
params.cpuparams.strict_cpu = true; if (++i >= argc) {
invalid_param = true;
break;
}
params.cpuparams.strict_cpu = std::stoul(argv[i]);
} else if (arg == "--poll") { } else if (arg == "--poll") {
params.cpuparams.poll = true; if (++i >= argc) {
invalid_param = true;
break;
}
params.cpuparams.poll = std::stoul(argv[i]);
} else if (arg == "-fa" || arg == "--flash-attn") { } else if (arg == "-fa" || arg == "--flash-attn") {
if (++i >= argc) { if (++i >= argc) {
invalid_param = true; invalid_param = true;