threadpool: move process priority setting into the apps (bench and cli)
This avoids changing the overall process priority on Windows for the apps that use ggml/llama.cpp directy.
This commit is contained in:
parent
3bcc4dee9a
commit
5d4c0a1327
6 changed files with 108 additions and 64 deletions
|
@ -251,6 +251,57 @@ int32_t cpu_get_num_math() {
|
|||
return cpu_get_num_physical_cores();
|
||||
}
|
||||
|
||||
// Helper for setting process priority
|
||||
|
||||
#if defined(_WIN32)
|
||||
|
||||
bool set_process_priority(enum ggml_sched_priority prio) {
|
||||
if (prio == GGML_SCHED_PRIO_NORMAL) {
|
||||
return true;
|
||||
}
|
||||
|
||||
DWORD p = NORMAL_PRIORITY_CLASS;
|
||||
switch (prio) {
|
||||
case GGML_SCHED_PRIO_NORMAL: p = NORMAL_PRIORITY_CLASS; break;
|
||||
case GGML_SCHED_PRIO_MEDIUM: p = ABOVE_NORMAL_PRIORITY_CLASS; break;
|
||||
case GGML_SCHED_PRIO_HIGH: p = HIGH_PRIORITY_CLASS; break;
|
||||
case GGML_SCHED_PRIO_REALTIME: p = REALTIME_PRIORITY_CLASS; break;
|
||||
}
|
||||
|
||||
if (!SetPriorityClass(GetCurrentProcess(), p)) {
|
||||
fprintf(stderr, "warn: failed to set process priority class %d : (%d)\n", prio, (int) GetLastError());
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#else // MacOS and POSIX
|
||||
#include <sys/types.h>
|
||||
#include <sys/resource.h>
|
||||
|
||||
bool set_process_priority(enum ggml_sched_priority prio) {
|
||||
if (prio == GGML_SCHED_PRIO_NORMAL) {
|
||||
return true;
|
||||
}
|
||||
|
||||
int32_t p = 0;
|
||||
switch (prio) {
|
||||
case GGML_SCHED_PRIO_NORMAL: p = 0; break;
|
||||
case GGML_SCHED_PRIO_MEDIUM: p = -5; break;
|
||||
case GGML_SCHED_PRIO_HIGH: p = -10; break;
|
||||
case GGML_SCHED_PRIO_REALTIME: p = -20; break;
|
||||
}
|
||||
|
||||
if (!setpriority(PRIO_PROCESS, 0, p)) {
|
||||
fprintf(stderr, "warn: failed to set process priority class %d : %s (%d)\n", prio, strerror(errno), errno);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
//
|
||||
// CLI argument parsing
|
||||
//
|
||||
|
@ -508,7 +559,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
|
|||
}
|
||||
if (arg == "--prio") {
|
||||
CHECK_ARG
|
||||
params.cpuparams.priority = std::stoul(argv[i]);
|
||||
params.cpuparams.priority = (enum ggml_sched_priority) std::stoul(argv[i]);
|
||||
return true;
|
||||
}
|
||||
if (arg == "--cpu-strict") {
|
||||
|
@ -545,7 +596,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
|
|||
}
|
||||
if (arg == "--prio-batch") {
|
||||
CHECK_ARG
|
||||
params.cpuparams_batch.priority = std::stoul(argv[i]);
|
||||
params.cpuparams_batch.priority = (enum ggml_sched_priority) std::stoul(argv[i]);
|
||||
return true;
|
||||
}
|
||||
if (arg == "--cpu-strict-batch") {
|
||||
|
@ -581,7 +632,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
|
|||
}
|
||||
if (arg == "--prio-draft") {
|
||||
CHECK_ARG
|
||||
params.draft_cpuparams.priority = std::stoul(argv[i]);
|
||||
params.draft_cpuparams.priority = (enum ggml_sched_priority) std::stoul(argv[i]);
|
||||
return true;
|
||||
}
|
||||
if (arg == "--cpu-strict-draft") {
|
||||
|
@ -610,7 +661,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
|
|||
}
|
||||
if (arg == "--prio-batch-draft") {
|
||||
CHECK_ARG
|
||||
params.draft_cpuparams_batch.priority = std::stoul(argv[i]);
|
||||
params.draft_cpuparams_batch.priority = (enum ggml_sched_priority) std::stoul(argv[i]);
|
||||
return true;
|
||||
}
|
||||
if (arg == "--cpu-strict-batch-draft") {
|
||||
|
|
|
@ -71,7 +71,7 @@ struct cpu_params {
|
|||
int n_threads = -1;
|
||||
bool cpumask[GGML_MAX_N_THREADS] = {false}; // CPU affinity mask.
|
||||
bool mask_valid = false; // Default: any CPU
|
||||
int32_t priority = 0; // Scheduling prio : (0 - normal, 1 - medium, 2 - high, 3 - realtime)
|
||||
enum ggml_sched_priority priority = GGML_SCHED_PRIO_NORMAL; // Scheduling prio : (0 - normal, 1 - medium, 2 - high, 3 - realtime)
|
||||
bool strict_cpu = false; // Use strict CPU placement
|
||||
uint32_t poll = 50; // Polling (busywait) level (0 - no polling, 100 - mostly polling)
|
||||
};
|
||||
|
@ -290,6 +290,7 @@ std::string gpt_params_get_system_info(const gpt_params & params);
|
|||
bool parse_cpu_range(const std::string& range, bool(&boolmask)[GGML_MAX_N_THREADS]);
|
||||
bool parse_cpu_mask(const std::string& mask, bool(&boolmask)[GGML_MAX_N_THREADS]);
|
||||
void postprocess_cpu_params(cpu_params& cpuparams, const cpu_params* role_model = nullptr);
|
||||
bool set_process_priority(enum ggml_sched_priority prio);
|
||||
|
||||
//
|
||||
// String utils
|
||||
|
|
|
@ -240,7 +240,7 @@ struct cmd_params {
|
|||
std::vector<bool> embeddings;
|
||||
ggml_numa_strategy numa;
|
||||
int reps;
|
||||
int prio;
|
||||
ggml_sched_priority prio;
|
||||
int delay;
|
||||
bool verbose;
|
||||
output_formats output_format;
|
||||
|
@ -271,7 +271,7 @@ static const cmd_params cmd_params_defaults = {
|
|||
/* embeddings */ {false},
|
||||
/* numa */ GGML_NUMA_STRATEGY_DISABLED,
|
||||
/* reps */ 5,
|
||||
/* prio */ 0,
|
||||
/* prio */ GGML_SCHED_PRIO_NORMAL,
|
||||
/* delay */ 0,
|
||||
/* verbose */ false,
|
||||
/* output_format */ MARKDOWN,
|
||||
|
@ -585,7 +585,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
|
|||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
params.prio = std::stoi(argv[i]);
|
||||
params.prio = (enum ggml_sched_priority) std::stoi(argv[i]);
|
||||
} else if (arg == "--delay") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
|
@ -1470,6 +1470,8 @@ int main(int argc, char ** argv) {
|
|||
llama_backend_init();
|
||||
llama_numa_init(params.numa);
|
||||
|
||||
set_process_priority(params.prio);
|
||||
|
||||
// initialize printer
|
||||
std::unique_ptr<printer> p = create_printer(params.output_format);
|
||||
std::unique_ptr<printer> p_err = create_printer(params.output_format_stderr);
|
||||
|
@ -1525,9 +1527,9 @@ int main(int argc, char ** argv) {
|
|||
LOG_TEE("%s: failed to parse cpu-mask: %s\n", __func__, t.cpu_mask.c_str());
|
||||
exit(1);
|
||||
}
|
||||
tpp.strict_cpu = t.cpu_strict;
|
||||
tpp.poll = t.poll;
|
||||
tpp.prio = params.prio;
|
||||
tpp.strict_cpu = t.cpu_strict;
|
||||
tpp.poll = t.poll;
|
||||
tpp.prio = params.prio;
|
||||
|
||||
struct ggml_compute_threadpool* threadpool = ggml_create_threadpool(&tpp);
|
||||
if (!threadpool) {
|
||||
|
|
|
@ -230,6 +230,8 @@ int main(int argc, char ** argv) {
|
|||
struct ggml_threadpool_params tpp =
|
||||
ggml_threadpool_params_from_cpu_params(params.cpuparams);
|
||||
|
||||
set_process_priority(params.cpuparams.priority);
|
||||
|
||||
struct ggml_compute_threadpool * threadpool_batch = NULL;
|
||||
if (!ggml_threadpool_params_match(&tpp, &tpp_batch)) {
|
||||
threadpool_batch = ggml_create_threadpool(&tpp_batch);
|
||||
|
|
|
@ -626,15 +626,23 @@ extern "C" {
|
|||
// If it returns true, the computation is aborted
|
||||
typedef bool (*ggml_abort_callback)(void * data);
|
||||
|
||||
// Scheduling priorities
|
||||
enum ggml_sched_priority {
|
||||
GGML_SCHED_PRIO_NORMAL,
|
||||
GGML_SCHED_PRIO_MEDIUM,
|
||||
GGML_SCHED_PRIO_HIGH,
|
||||
GGML_SCHED_PRIO_REALTIME
|
||||
};
|
||||
|
||||
// Threadpool params
|
||||
// Use ggml_threadpool_params_default() or ggml_threadpool_params_init() to populate the defaults
|
||||
struct ggml_threadpool_params {
|
||||
bool cpumask[GGML_MAX_N_THREADS]; // mask of cpu cores (all-zeros means use default affinity settings)
|
||||
int n_threads; // number of threads
|
||||
int32_t prio; // thread priority
|
||||
uint32_t poll; // polling level (0 - no polling, 100 - aggressive polling)
|
||||
bool strict_cpu; // strict cpu placement
|
||||
bool paused; // start in paused state
|
||||
bool cpumask[GGML_MAX_N_THREADS]; // mask of cpu cores (all-zeros means use default affinity settings)
|
||||
int n_threads; // number of threads
|
||||
enum ggml_sched_priority prio; // thread priority
|
||||
uint32_t poll; // polling level (0 - no polling, 100 - aggressive polling)
|
||||
bool strict_cpu; // strict cpu placement
|
||||
bool paused; // start in paused state
|
||||
};
|
||||
|
||||
struct ggml_compute_threadpool; // forward declaration, see ggml.c
|
||||
|
|
|
@ -18655,18 +18655,11 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|||
|
||||
static thread_ret_t ggml_graph_compute_secondary_thread(void* data);
|
||||
|
||||
enum {
|
||||
SCHED_PRIO_NORMAL,
|
||||
SCHED_PRIO_MEDIUM,
|
||||
SCHED_PRIO_HIGH,
|
||||
SCHED_PRIO_REALTIME
|
||||
};
|
||||
|
||||
#if defined(_WIN32)
|
||||
#include "windows.h"
|
||||
|
||||
// TODO: support > 64 CPUs
|
||||
static bool ggml_thread_apply_affinity(bool * mask) {
|
||||
bool ggml_thread_apply_affinity(bool * mask) {
|
||||
HANDLE h = GetCurrentThread();
|
||||
uint64_t bitmask = 0ULL;
|
||||
|
||||
|
@ -18700,35 +18693,22 @@ static bool ggml_thread_apply_affinity(bool * mask) {
|
|||
return m != 0;
|
||||
}
|
||||
|
||||
static bool ggml_thread_apply_thread_priority(int32_t prio) {
|
||||
DWORD p = NORMAL_PRIORITY_CLASS;
|
||||
static bool ggml_thread_apply_priority(int32_t prio) {
|
||||
// Note that on Windows the Process Priority Class must be updated in order to set Thread priority.
|
||||
// This is up to the applications.
|
||||
DWORD p = THREAD_PRIORITY_NORMAL;
|
||||
switch (prio) {
|
||||
case GGML_SCHED_PRIO_NORMAL: p = THREAD_PRIORITY_NORMAL; break;
|
||||
case GGML_SCHED_PRIO_MEDIUM: p = THREAD_PRIORITY_ABOVE_NORMAL; break;
|
||||
case GGML_SCHED_PRIO_HIGH: p = THREAD_PRIORITY_HIGHEST; break;
|
||||
case GGML_SCHED_PRIO_REALTIME: p = THREAD_PRIORITY_TIME_CRITICAL; break;
|
||||
}
|
||||
|
||||
if (prio == SCHED_PRIO_NORMAL) {
|
||||
if (prio == GGML_SCHED_PRIO_NORMAL) {
|
||||
// Keep inherited policy/priority
|
||||
return true;
|
||||
}
|
||||
|
||||
// On Windows we have to update Process Priority Class in order to set Thread priority.
|
||||
|
||||
switch (prio) {
|
||||
case SCHED_PRIO_NORMAL: p = NORMAL_PRIORITY_CLASS; break;
|
||||
case SCHED_PRIO_MEDIUM: p = ABOVE_NORMAL_PRIORITY_CLASS; break;
|
||||
case SCHED_PRIO_HIGH: p = HIGH_PRIORITY_CLASS; break;
|
||||
case SCHED_PRIO_REALTIME: p = REALTIME_PRIORITY_CLASS; break;
|
||||
}
|
||||
|
||||
if (!SetPriorityClass(GetCurrentProcess(), p)) {
|
||||
fprintf(stderr, "warn: failed to set process priority class %d : (%d)\n", prio, (int) GetLastError());
|
||||
return false;
|
||||
}
|
||||
|
||||
switch (prio) {
|
||||
case SCHED_PRIO_NORMAL: p = THREAD_PRIORITY_NORMAL; break;
|
||||
case SCHED_PRIO_MEDIUM: p = THREAD_PRIORITY_ABOVE_NORMAL; break;
|
||||
case SCHED_PRIO_HIGH: p = THREAD_PRIORITY_HIGHEST; break;
|
||||
case SCHED_PRIO_REALTIME: p = THREAD_PRIORITY_TIME_CRITICAL; break;
|
||||
}
|
||||
|
||||
if (!SetThreadPriority(GetCurrentThread(), p)) {
|
||||
fprintf(stderr, "warn: failed to set thread priority %d : (%d)\n", prio, (int) GetLastError());
|
||||
return false;
|
||||
|
@ -18747,17 +18727,17 @@ static bool ggml_thread_apply_affinity(const bool * mask) {
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool ggml_thread_apply_thread_priority(int32_t prio) {
|
||||
static bool ggml_thread_apply_priority(int32_t prio) {
|
||||
struct sched_param p;
|
||||
int32_t policy = SCHED_OTHER;
|
||||
switch (prio) {
|
||||
case SCHED_PRIO_NORMAL: policy = SCHED_OTHER; p.sched_priority = 0; break;
|
||||
case SCHED_PRIO_MEDIUM: policy = SCHED_FIFO; p.sched_priority = 40; break;
|
||||
case SCHED_PRIO_HIGH: policy = SCHED_FIFO; p.sched_priority = 80; break;
|
||||
case SCHED_PRIO_REALTIME: policy = SCHED_FIFO; p.sched_priority = 90; break;
|
||||
case GGML_SCHED_PRIO_NORMAL: policy = SCHED_OTHER; p.sched_priority = 0; break;
|
||||
case GGML_SCHED_PRIO_MEDIUM: policy = SCHED_FIFO; p.sched_priority = 40; break;
|
||||
case GGML_SCHED_PRIO_HIGH: policy = SCHED_FIFO; p.sched_priority = 80; break;
|
||||
case GGML_SCHED_PRIO_REALTIME: policy = SCHED_FIFO; p.sched_priority = 90; break;
|
||||
}
|
||||
|
||||
if (prio == SCHED_PRIO_NORMAL) {
|
||||
if (prio == GGML_SCHED_PRIO_NORMAL) {
|
||||
// Keep inherited policy/priority
|
||||
return true;
|
||||
}
|
||||
|
@ -18802,17 +18782,17 @@ static bool ggml_thread_apply_affinity(const bool * mask) {
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool ggml_thread_apply_thread_priority(int32_t prio) {
|
||||
static bool ggml_thread_apply_priority(int32_t prio) {
|
||||
struct sched_param p;
|
||||
int32_t policy = SCHED_OTHER;
|
||||
switch (prio) {
|
||||
case SCHED_PRIO_NORMAL: policy = SCHED_OTHER; p.sched_priority = 0; break;
|
||||
case SCHED_PRIO_MEDIUM: policy = SCHED_FIFO; p.sched_priority = 40; break;
|
||||
case SCHED_PRIO_HIGH: policy = SCHED_FIFO; p.sched_priority = 80; break;
|
||||
case SCHED_PRIO_REALTIME: policy = SCHED_FIFO; p.sched_priority = 90; break;
|
||||
case GGML_SCHED_PRIO_NORMAL: policy = SCHED_OTHER; p.sched_priority = 0; break;
|
||||
case GGML_SCHED_PRIO_MEDIUM: policy = SCHED_FIFO; p.sched_priority = 40; break;
|
||||
case GGML_SCHED_PRIO_HIGH: policy = SCHED_FIFO; p.sched_priority = 80; break;
|
||||
case GGML_SCHED_PRIO_REALTIME: policy = SCHED_FIFO; p.sched_priority = 90; break;
|
||||
}
|
||||
|
||||
if (prio == SCHED_PRIO_NORMAL) {
|
||||
if (prio == GGML_SCHED_PRIO_NORMAL) {
|
||||
// Keep inherited policy/priority
|
||||
return true;
|
||||
}
|
||||
|
@ -19190,7 +19170,7 @@ static thread_ret_t ggml_graph_compute_secondary_thread(void* data) {
|
|||
struct ggml_compute_state * state = (struct ggml_compute_state *) data;
|
||||
struct ggml_compute_threadpool * threadpool = state->threadpool;
|
||||
|
||||
ggml_thread_apply_thread_priority(threadpool->prio);
|
||||
ggml_thread_apply_priority(threadpool->prio);
|
||||
if (ggml_thread_cpumask_is_valid(state->cpumask)) {
|
||||
ggml_thread_apply_affinity(state->cpumask);
|
||||
}
|
||||
|
@ -19238,7 +19218,7 @@ static void ggml_graph_compute_kickoff(struct ggml_compute_threadpool * threadpo
|
|||
|
||||
if (threadpool->pause) {
|
||||
// Update main thread prio and affinity to match the threadpool settings
|
||||
ggml_thread_apply_thread_priority(threadpool->prio);
|
||||
ggml_thread_apply_priority(threadpool->prio);
|
||||
if (ggml_thread_cpumask_is_valid(threadpool->workers[0].cpumask)) {
|
||||
ggml_thread_apply_affinity(threadpool->workers[0].cpumask);
|
||||
}
|
||||
|
@ -19333,7 +19313,7 @@ static struct ggml_compute_threadpool * ggml_create_threadpool_impl(
|
|||
|
||||
if (!threadpool->pause) {
|
||||
// Update main thread prio and affinity at the start, otherwise we'll do it in resume
|
||||
ggml_thread_apply_thread_priority(threadpool->prio);
|
||||
ggml_thread_apply_priority(threadpool->prio);
|
||||
if (ggml_thread_cpumask_is_valid(threadpool->workers[0].cpumask)) {
|
||||
ggml_thread_apply_affinity(threadpool->workers[0].cpumask);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue