threadpool: proper handling for non-specified cpumask

This commit is contained in:
Max Krasnyansky 2024-05-25 05:14:29 -07:00
parent ef1b87d1af
commit e77167446a

20
ggml.c
View file

@ -1834,6 +1834,7 @@ struct ggml_compute_threadpool {
struct ggml_compute_state { struct ggml_compute_state {
ggml_thread_t thrd; ggml_thread_t thrd;
bool cpumask[GGML_N_CORES_MAX]; bool cpumask[GGML_N_CORES_MAX];
bool mask_specified;
int ith; int ith;
struct ggml_compute_threadpool * threadpool; struct ggml_compute_threadpool * threadpool;
enum ggml_status ec; enum ggml_status ec;
@ -19472,13 +19473,6 @@ static bool __thread_priority(int32_t prio) {
#endif #endif
static void __init_stack(size_t size) {
void* ptr = alloca(size);
if (ptr) {
memset(ptr, 0, size);
}
}
#ifdef __aarch64__ #ifdef __aarch64__
static inline void __cpu_relax(void) { static inline void __cpu_relax(void) {
@ -19553,8 +19547,6 @@ struct ggml_compute_threadpool * ggml_create_threadpool(struct ggml_threadpool_p
threadpool->workers = workers; threadpool->workers = workers;
__init_stack(2ULL * 1024 * 1024);
int cpumask_iter = 0; int cpumask_iter = 0;
__process_priority(tpp->prio); __process_priority(tpp->prio);
@ -19566,12 +19558,12 @@ struct ggml_compute_threadpool * ggml_create_threadpool(struct ggml_threadpool_p
.ith = j, .ith = j,
.threadpool = threadpool, .threadpool = threadpool,
.ec = GGML_STATUS_SUCCESS, .ec = GGML_STATUS_SUCCESS,
.mask_specified = false
}; };
if (tpp->mask_specified) { if (tpp->mask_specified) {
__cpumask_next(tpp->cpumask, workers[j].cpumask, tpp->strict_cpu, &cpumask_iter); __cpumask_next(tpp->cpumask, workers[j].cpumask, tpp->strict_cpu, &cpumask_iter);
} else { workers[j].mask_specified = true;
workers[j].cpumask[j] = true;
} }
// Spin threads for all secondary workers // Spin threads for all secondary workers
@ -19841,11 +19833,8 @@ static thread_ret_t ggml_graph_compute_secondary_thread(void* data) {
struct ggml_compute_state * state = (struct ggml_compute_state *) data; struct ggml_compute_state * state = (struct ggml_compute_state *) data;
struct ggml_compute_threadpool * threadpool = state->threadpool; struct ggml_compute_threadpool * threadpool = state->threadpool;
#ifndef __aarch64__
__init_stack(2ULL * 1024 * 1024);
#endif
__thread_priority(threadpool->prio); __thread_priority(threadpool->prio);
if (state->mask_specified)
__thread_affinity(state->cpumask); __thread_affinity(state->cpumask);
// Indicate that we're ready to go // Indicate that we're ready to go
@ -20118,6 +20107,7 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
} }
// Update main thread affinity to match the current threadpool // Update main thread affinity to match the current threadpool
if (threadpool->workers[0].mask_specified)
__thread_affinity(threadpool->workers[0].cpumask); __thread_affinity(threadpool->workers[0].cpumask);
// Set up work // Set up work