threadpool: add support for ggml_threadpool_params_default/init
Also removes the need for explicit mask_specified param. all-zero cpumask means use default (usually inherited) cpu affinity mask.
This commit is contained in:
parent
4a4d71501b
commit
c4452edfea
4 changed files with 41 additions and 43 deletions
|
@ -295,13 +295,7 @@ void postprocess_cpu_params(cpu_params& cpuparams, const cpu_params* role_model)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (n_set == 0) {
|
if (n_set && n_set < cpuparams.n_threads) {
|
||||||
// You hit the jackpot!
|
|
||||||
memset(&cpuparams.cpumask[0], 1, GGML_MAX_N_THREADS);
|
|
||||||
n_set = GGML_MAX_N_THREADS;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (n_set < cpuparams.n_threads) {
|
|
||||||
// Not enough set bits, may experience performance issues.
|
// Not enough set bits, may experience performance issues.
|
||||||
fprintf(stderr, "warn: Not enough set bits in CPU mask (%d) to satisfy requested thread count: %d\n", n_set, cpuparams.n_threads);
|
fprintf(stderr, "warn: Not enough set bits in CPU mask (%d) to satisfy requested thread count: %d\n", n_set, cpuparams.n_threads);
|
||||||
}
|
}
|
||||||
|
@ -2606,16 +2600,15 @@ struct llama_context_params llama_context_params_from_gpt_params(const gpt_param
|
||||||
struct ggml_threadpool_params ggml_threadpool_params_from_cpu_params(const cpu_params & params) {
|
struct ggml_threadpool_params ggml_threadpool_params_from_cpu_params(const cpu_params & params) {
|
||||||
struct ggml_threadpool_params tpp;
|
struct ggml_threadpool_params tpp;
|
||||||
|
|
||||||
tpp.mask_specified = params.mask_valid;
|
ggml_threadpool_params_init(&tpp, params.n_threads); // setup the defaults
|
||||||
|
|
||||||
if (params.mask_valid) {
|
if (params.mask_valid) {
|
||||||
std::memcpy(&tpp.cpumask, ¶ms.cpumask, GGML_MAX_N_THREADS);
|
std::memcpy(&tpp.cpumask, ¶ms.cpumask, GGML_MAX_N_THREADS);
|
||||||
}
|
}
|
||||||
|
|
||||||
tpp.n_threads = params.n_threads;
|
|
||||||
tpp.prio = params.priority;
|
tpp.prio = params.priority;
|
||||||
tpp.poll = params.poll;
|
tpp.poll = params.poll;
|
||||||
tpp.strict_cpu = params.strict_cpu;
|
tpp.strict_cpu = params.strict_cpu;
|
||||||
tpp.paused = false;
|
|
||||||
|
|
||||||
return tpp;
|
return tpp;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1462,14 +1462,13 @@ int main(int argc, char ** argv) {
|
||||||
|
|
||||||
llama_kv_cache_clear(ctx);
|
llama_kv_cache_clear(ctx);
|
||||||
|
|
||||||
struct ggml_threadpool_params tpp;
|
struct ggml_threadpool_params tpp = ggml_threadpool_params_default(t.n_threads);
|
||||||
tpp.n_threads = t.n_threads;
|
|
||||||
tpp.mask_specified = params.cpuparams.mask_valid;
|
|
||||||
tpp.strict_cpu = params.cpuparams.strict_cpu;
|
tpp.strict_cpu = params.cpuparams.strict_cpu;
|
||||||
tpp.prio = params.cpuparams.priority;
|
tpp.prio = params.cpuparams.priority;
|
||||||
tpp.poll = params.cpuparams.poll;
|
tpp.poll = params.cpuparams.poll;
|
||||||
|
if (params.cpuparams.mask_valid) {
|
||||||
std::memcpy(&tpp.cpumask[0], ¶ms.cpuparams.cpumask[0], GGML_MAX_N_THREADS);
|
std::memcpy(&tpp.cpumask[0], ¶ms.cpuparams.cpumask[0], GGML_MAX_N_THREADS);
|
||||||
|
}
|
||||||
|
|
||||||
struct ggml_compute_threadpool* threadpool = ggml_create_threadpool(&tpp);
|
struct ggml_compute_threadpool* threadpool = ggml_create_threadpool(&tpp);
|
||||||
if (!threadpool) {
|
if (!threadpool) {
|
||||||
|
|
|
@ -626,9 +626,10 @@ extern "C" {
|
||||||
// If it returns true, the computation is aborted
|
// If it returns true, the computation is aborted
|
||||||
typedef bool (*ggml_abort_callback)(void * data);
|
typedef bool (*ggml_abort_callback)(void * data);
|
||||||
|
|
||||||
|
// Threadpool params
|
||||||
|
// Use ggml_threadpool_params_default() or ggml_threadpool_params_init() to populate the defaults
|
||||||
struct ggml_threadpool_params {
|
struct ggml_threadpool_params {
|
||||||
bool cpumask[GGML_MAX_N_THREADS]; // mask of cpu cores
|
bool cpumask[GGML_MAX_N_THREADS]; // mask of cpu cores (all-zeros means use default affinity settings)
|
||||||
bool mask_specified; // mask is non-empty
|
|
||||||
int n_threads; // number of threads
|
int n_threads; // number of threads
|
||||||
int32_t prio; // thread priority
|
int32_t prio; // thread priority
|
||||||
uint32_t poll; // polling level (0 - no polling, 100 - aggressive polling)
|
uint32_t poll; // polling level (0 - no polling, 100 - aggressive polling)
|
||||||
|
@ -2025,6 +2026,8 @@ extern "C" {
|
||||||
GGML_API size_t ggml_graph_overhead(void);
|
GGML_API size_t ggml_graph_overhead(void);
|
||||||
GGML_API size_t ggml_graph_overhead_custom(size_t size, bool grads);
|
GGML_API size_t ggml_graph_overhead_custom(size_t size, bool grads);
|
||||||
|
|
||||||
|
GGML_API struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads);
|
||||||
|
GGML_API void ggml_threadpool_params_init(struct ggml_threadpool_params *p, int n_threads);
|
||||||
GGML_API bool ggml_threadpool_params_match (const struct ggml_threadpool_params *p0, const struct ggml_threadpool_params *p1);
|
GGML_API bool ggml_threadpool_params_match (const struct ggml_threadpool_params *p0, const struct ggml_threadpool_params *p1);
|
||||||
GGML_API struct ggml_compute_threadpool* ggml_create_threadpool (struct ggml_threadpool_params * params);
|
GGML_API struct ggml_compute_threadpool* ggml_create_threadpool (struct ggml_threadpool_params * params);
|
||||||
GGML_API void ggml_release_threadpool (struct ggml_compute_threadpool * threadpool);
|
GGML_API void ggml_release_threadpool (struct ggml_compute_threadpool * threadpool);
|
||||||
|
|
|
@ -1987,7 +1987,6 @@ struct ggml_compute_state {
|
||||||
#ifndef GGML_USE_OPENMP
|
#ifndef GGML_USE_OPENMP
|
||||||
ggml_thread_t thrd;
|
ggml_thread_t thrd;
|
||||||
bool cpumask[GGML_MAX_N_THREADS];
|
bool cpumask[GGML_MAX_N_THREADS];
|
||||||
bool mask_specified;
|
|
||||||
int last_graph;
|
int last_graph;
|
||||||
bool pending;
|
bool pending;
|
||||||
#endif
|
#endif
|
||||||
|
@ -18828,11 +18827,14 @@ static bool ggml_thread_apply_thread_priority(int32_t prio) {
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static void ggml_thread_cpumask_next(const bool * global_mask, bool * local_mask, bool strict, int32_t* iter) {
|
static bool ggml_thread_cpumask_is_valid(const bool * mask) {
|
||||||
if (!global_mask) {
|
for (int i = 0; i < GGML_MAX_N_THREADS; i++) {
|
||||||
memset(local_mask, 1, GGML_MAX_N_THREADS);
|
if (mask[i]) { return true; }
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_thread_cpumask_next(const bool * global_mask, bool * local_mask, bool strict, int32_t* iter) {
|
||||||
if (!strict) {
|
if (!strict) {
|
||||||
memcpy(local_mask, global_mask, GGML_MAX_N_THREADS);
|
memcpy(local_mask, global_mask, GGML_MAX_N_THREADS);
|
||||||
return;
|
return;
|
||||||
|
@ -19189,8 +19191,10 @@ static thread_ret_t ggml_graph_compute_secondary_thread(void* data) {
|
||||||
struct ggml_compute_threadpool * threadpool = state->threadpool;
|
struct ggml_compute_threadpool * threadpool = state->threadpool;
|
||||||
|
|
||||||
ggml_thread_apply_thread_priority(threadpool->prio);
|
ggml_thread_apply_thread_priority(threadpool->prio);
|
||||||
if (state->mask_specified)
|
|
||||||
|
if (ggml_thread_cpumask_is_valid(state->cpumask)) {
|
||||||
ggml_thread_apply_affinity(state->cpumask);
|
ggml_thread_apply_affinity(state->cpumask);
|
||||||
|
}
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
// Check if we need to sleep
|
// Check if we need to sleep
|
||||||
|
@ -19249,17 +19253,27 @@ static void ggml_graph_compute_kickoff(struct ggml_compute_threadpool * threadpo
|
||||||
|
|
||||||
#endif // GGML_USE_OPENMP
|
#endif // GGML_USE_OPENMP
|
||||||
|
|
||||||
|
void ggml_threadpool_params_init(struct ggml_threadpool_params * p, int n_threads) {
|
||||||
|
p->n_threads = n_threads;
|
||||||
|
p->prio = 0; // default priority (usually means normal or inherited)
|
||||||
|
p->poll = 50; // hybrid-polling enabled
|
||||||
|
p->strict_cpu = false; // no strict placement (all threads share same cpumask)
|
||||||
|
p->paused = false; // threads are ready to go
|
||||||
|
memset(p->cpumask, 0, GGML_MAX_N_THREADS); // all-zero means use the default affinity (usually inherited)
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads) {
|
||||||
|
struct ggml_threadpool_params p;
|
||||||
|
ggml_threadpool_params_init(&p, n_threads);
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
bool ggml_threadpool_params_match(const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1) {
|
bool ggml_threadpool_params_match(const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1) {
|
||||||
if (p0->n_threads != p1->n_threads ) return false;
|
if (p0->n_threads != p1->n_threads ) return false;
|
||||||
if (p0->prio != p1->prio ) return false;
|
if (p0->prio != p1->prio ) return false;
|
||||||
if (p0->poll != p1->poll ) return false;
|
if (p0->poll != p1->poll ) return false;
|
||||||
if (p0->strict_cpu != p1->strict_cpu ) return false;
|
if (p0->strict_cpu != p1->strict_cpu ) return false;
|
||||||
if (p0->mask_specified != p1->mask_specified) return false;
|
|
||||||
if (p0->mask_specified) {
|
|
||||||
return memcmp(p0->cpumask, p1->cpumask, GGML_MAX_N_THREADS) == 0;
|
return memcmp(p0->cpumask, p1->cpumask, GGML_MAX_N_THREADS) == 0;
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct ggml_compute_threadpool * ggml_create_threadpool_impl(
|
static struct ggml_compute_threadpool * ggml_create_threadpool_impl(
|
||||||
|
@ -19312,16 +19326,13 @@ static struct ggml_compute_threadpool * ggml_create_threadpool_impl(
|
||||||
for (int j = 0; j < tpp->n_threads; j++) {
|
for (int j = 0; j < tpp->n_threads; j++) {
|
||||||
workers[j] = (struct ggml_compute_state) {
|
workers[j] = (struct ggml_compute_state) {
|
||||||
.thrd = 0,
|
.thrd = 0,
|
||||||
.mask_specified = tpp->mask_specified,
|
|
||||||
.threadpool = threadpool,
|
.threadpool = threadpool,
|
||||||
.ith = j,
|
.ith = j,
|
||||||
.last_graph = 0,
|
.last_graph = 0,
|
||||||
.pending = false
|
.pending = false
|
||||||
};
|
};
|
||||||
|
|
||||||
if (tpp->mask_specified) {
|
|
||||||
ggml_thread_cpumask_next(tpp->cpumask, workers[j].cpumask, tpp->strict_cpu, &cpumask_iter);
|
ggml_thread_cpumask_next(tpp->cpumask, workers[j].cpumask, tpp->strict_cpu, &cpumask_iter);
|
||||||
}
|
|
||||||
|
|
||||||
// Spin threads for all secondary workers
|
// Spin threads for all secondary workers
|
||||||
if (j > 0) {
|
if (j > 0) {
|
||||||
|
@ -19357,15 +19368,7 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
|
||||||
GGML_PRINT_DEBUG("Threadpool is not specified. Will create a disposable threadpool : n_threads %d\n", n_threads);
|
GGML_PRINT_DEBUG("Threadpool is not specified. Will create a disposable threadpool : n_threads %d\n", n_threads);
|
||||||
disposable_threadpool = true;
|
disposable_threadpool = true;
|
||||||
|
|
||||||
struct ggml_threadpool_params ttp = {
|
struct ggml_threadpool_params ttp = ggml_threadpool_params_default(n_threads);
|
||||||
.mask_specified = false,
|
|
||||||
.n_threads = n_threads,
|
|
||||||
.prio = 0,
|
|
||||||
.poll = 1,
|
|
||||||
.strict_cpu = false,
|
|
||||||
.paused = false
|
|
||||||
};
|
|
||||||
|
|
||||||
threadpool = ggml_create_threadpool_impl(&ttp, cgraph, cplan);
|
threadpool = ggml_create_threadpool_impl(&ttp, cgraph, cplan);
|
||||||
} else {
|
} else {
|
||||||
// Reset some of the parameters that need resetting
|
// Reset some of the parameters that need resetting
|
||||||
|
@ -19407,7 +19410,7 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
// Update main thread affinity to match the current threadpool
|
// Update main thread affinity to match the current threadpool
|
||||||
if (threadpool->workers[0].mask_specified) {
|
if (!ggml_thread_cpumask_is_valid(threadpool->workers[0].cpumask)) {
|
||||||
ggml_thread_apply_affinity(threadpool->workers[0].cpumask);
|
ggml_thread_apply_affinity(threadpool->workers[0].cpumask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue