bench: create fresh threadpool for each test
For benchmarking it's better to start a fresh pool for each test with the exact number of threads needed for that test. Having larger pools is suboptimal (causes more load, etc).
This commit is contained in:
parent
96d6603dc7
commit
2953441563
1 changed files with 18 additions and 24 deletions
|
@ -291,7 +291,6 @@ static void print_usage(int /* argc */, char ** argv) {
|
||||||
printf(" -fa, --flash-attn <0|1> (default: %s)\n", join(cmd_params_defaults.flash_attn, ",").c_str());
|
printf(" -fa, --flash-attn <0|1> (default: %s)\n", join(cmd_params_defaults.flash_attn, ",").c_str());
|
||||||
printf(" -mmp, --mmap <0|1> (default: %s)\n", join(cmd_params_defaults.use_mmap, ",").c_str());
|
printf(" -mmp, --mmap <0|1> (default: %s)\n", join(cmd_params_defaults.use_mmap, ",").c_str());
|
||||||
printf(" --numa <distribute|isolate|numactl> (default: disabled)\n");
|
printf(" --numa <distribute|isolate|numactl> (default: disabled)\n");
|
||||||
printf(" -mt, --max-threads <n> (default: %d)\n", cmd_params_defaults.cpuparams.n_threads);
|
|
||||||
printf(" -C, --cpu-mask <hex> (default: 0x0)\n");
|
printf(" -C, --cpu-mask <hex> (default: 0x0)\n");
|
||||||
printf(" --cpu-strict <0|1> (default: %d)\n", cmd_params_defaults.cpuparams.strict_cpu);
|
printf(" --cpu-strict <0|1> (default: %d)\n", cmd_params_defaults.cpuparams.strict_cpu);
|
||||||
printf(" --priority <0|1|2|3> (default: %d)\n", cmd_params_defaults.cpuparams.priority);
|
printf(" --priority <0|1|2|3> (default: %d)\n", cmd_params_defaults.cpuparams.priority);
|
||||||
|
@ -499,12 +498,6 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
|
||||||
else if (value == "numactl") { params.numa = GGML_NUMA_STRATEGY_NUMACTL; }
|
else if (value == "numactl") { params.numa = GGML_NUMA_STRATEGY_NUMACTL; }
|
||||||
else { invalid_param = true; break; }
|
else { invalid_param = true; break; }
|
||||||
}
|
}
|
||||||
} else if (arg == "-mt" || arg == "--max-threads") {
|
|
||||||
if (++i >= argc) {
|
|
||||||
invalid_param = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
params.cpuparams.n_threads = std::stoi(argv[i]);
|
|
||||||
} else if (arg == "-C" || arg == "--cpu-mask") {
|
} else if (arg == "-C" || arg == "--cpu-mask") {
|
||||||
if (++i >= argc) {
|
if (++i >= argc) {
|
||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
|
@ -1435,21 +1428,6 @@ int main(int argc, char ** argv) {
|
||||||
|
|
||||||
postprocess_cpu_params(params.cpuparams);
|
postprocess_cpu_params(params.cpuparams);
|
||||||
|
|
||||||
struct ggml_threadpool_params tpp;
|
|
||||||
tpp.n_threads = params.cpuparams.n_threads;
|
|
||||||
tpp.mask_specified = params.cpuparams.mask_valid;
|
|
||||||
tpp.strict_cpu = params.cpuparams.strict_cpu;
|
|
||||||
tpp.prio = params.cpuparams.priority;
|
|
||||||
tpp.poll = params.cpuparams.poll;
|
|
||||||
|
|
||||||
std::memcpy(&tpp.cpumask[0], ¶ms.cpuparams.cpumask[0], GGML_MAX_N_THREADS);
|
|
||||||
|
|
||||||
struct ggml_compute_threadpool* threadpool = ggml_create_threadpool(&tpp);
|
|
||||||
if (!threadpool) {
|
|
||||||
LOG_TEE("%s: threadpool create failed : n_threads %d\n", __func__, tpp.n_threads);
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (const auto & inst : params_instances) {
|
for (const auto & inst : params_instances) {
|
||||||
// keep the same model between tests when possible
|
// keep the same model between tests when possible
|
||||||
if (!lmodel || !prev_inst || !inst.equal_mparams(*prev_inst)) {
|
if (!lmodel || !prev_inst || !inst.equal_mparams(*prev_inst)) {
|
||||||
|
@ -1475,6 +1453,22 @@ int main(int argc, char ** argv) {
|
||||||
test t(inst, lmodel, ctx);
|
test t(inst, lmodel, ctx);
|
||||||
|
|
||||||
llama_kv_cache_clear(ctx);
|
llama_kv_cache_clear(ctx);
|
||||||
|
|
||||||
|
struct ggml_threadpool_params tpp;
|
||||||
|
tpp.n_threads = t.n_threads;
|
||||||
|
tpp.mask_specified = params.cpuparams.mask_valid;
|
||||||
|
tpp.strict_cpu = params.cpuparams.strict_cpu;
|
||||||
|
tpp.prio = params.cpuparams.priority;
|
||||||
|
tpp.poll = params.cpuparams.poll;
|
||||||
|
|
||||||
|
std::memcpy(&tpp.cpumask[0], ¶ms.cpuparams.cpumask[0], GGML_MAX_N_THREADS);
|
||||||
|
|
||||||
|
struct ggml_compute_threadpool* threadpool = ggml_create_threadpool(&tpp);
|
||||||
|
if (!threadpool) {
|
||||||
|
LOG_TEE("%s: threadpool create failed : n_threads %d\n", __func__, tpp.n_threads);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
llama_attach_threadpool(ctx, threadpool);
|
llama_attach_threadpool(ctx, threadpool);
|
||||||
|
|
||||||
// warmup run
|
// warmup run
|
||||||
|
@ -1515,9 +1509,9 @@ int main(int argc, char ** argv) {
|
||||||
llama_print_timings(ctx);
|
llama_print_timings(ctx);
|
||||||
|
|
||||||
llama_free(ctx);
|
llama_free(ctx);
|
||||||
}
|
|
||||||
|
|
||||||
ggml_release_threadpool(threadpool);
|
ggml_release_threadpool(threadpool);
|
||||||
|
}
|
||||||
|
|
||||||
llama_free_model(lmodel);
|
llama_free_model(lmodel);
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue