threadpool: simplify threadpool init logic and fix main thread affinity application

Most of the init code is now exactly the same between threadpool and openmp.
This commit is contained in:
Max Krasnyansky 2024-08-24 17:35:34 -07:00 committed by fmz
parent 8008463aee
commit 49ac51f2a3

View file

@ -19191,7 +19191,6 @@ static thread_ret_t ggml_graph_compute_secondary_thread(void* data) {
struct ggml_compute_threadpool * threadpool = state->threadpool; struct ggml_compute_threadpool * threadpool = state->threadpool;
ggml_thread_apply_thread_priority(threadpool->prio); ggml_thread_apply_thread_priority(threadpool->prio);
if (ggml_thread_cpumask_is_valid(state->cpumask)) { if (ggml_thread_cpumask_is_valid(state->cpumask)) {
ggml_thread_apply_affinity(state->cpumask); ggml_thread_apply_affinity(state->cpumask);
} }
@ -19296,51 +19295,35 @@ static struct ggml_compute_threadpool * ggml_create_threadpool_impl(
threadpool->ec = GGML_STATUS_SUCCESS; threadpool->ec = GGML_STATUS_SUCCESS;
} }
#ifndef GGML_USE_OPENMP // Allocate and init workers state
ggml_mutex_init(&threadpool->mutex); const size_t workers_size = sizeof(struct ggml_compute_state) * tpp->n_threads;
ggml_cond_init(&threadpool->cond); struct ggml_compute_state * workers = GGML_ALIGNED_MALLOC(workers_size);
#endif // GGML_USE_OPENMP
struct ggml_compute_state * workers = memset(workers, 0, workers_size);
GGML_ALIGNED_MALLOC(sizeof(struct ggml_compute_state) * tpp->n_threads); for (int j = 0; j < tpp->n_threads; j++) {
workers[j].threadpool = threadpool;
workers[j].ith = j;
}
threadpool->workers = workers; threadpool->workers = workers;
#ifdef GGML_USE_OPENMP #ifndef GGML_USE_OPENMP
for (int j = 0; j < tpp->n_threads; j++) { ggml_mutex_init(&threadpool->mutex);
workers[j] = (struct ggml_compute_state) { ggml_cond_init(&threadpool->cond);
.threadpool = threadpool,
.ith = j // Spin the threads for all workers, and update CPU placements.
}; // Place the main thread last (towards the higher numbered CPU cores).
}
#else // Not using OPENMP
int32_t cpumask_iter = 0; int32_t cpumask_iter = 0;
ggml_thread_apply_process_priority(tpp->prio); for (int j = 1; j < tpp->n_threads; j++) {
ggml_thread_apply_thread_priority(tpp->prio);
for (int j = 0; j < tpp->n_threads; j++) {
workers[j] = (struct ggml_compute_state) {
.thrd = 0,
.threadpool = threadpool,
.ith = j,
.last_graph = 0,
.pending = false
};
ggml_thread_cpumask_next(tpp->cpumask, workers[j].cpumask, tpp->strict_cpu, &cpumask_iter); ggml_thread_cpumask_next(tpp->cpumask, workers[j].cpumask, tpp->strict_cpu, &cpumask_iter);
// Spin threads for all secondary workers int32_t rc = ggml_thread_create(&workers[j].thrd, NULL, ggml_graph_compute_secondary_thread, &workers[j]);
if (j > 0) {
int32_t rc = ggml_thread_create(
&workers[j].thrd,
NULL,
ggml_graph_compute_secondary_thread,
&workers[j]
);
GGML_ASSERT(rc == 0); GGML_ASSERT(rc == 0);
} }
}
ggml_thread_cpumask_next(tpp->cpumask, workers[0].cpumask, tpp->strict_cpu, &cpumask_iter);
#endif // GGML_USE_OPENMP #endif // GGML_USE_OPENMP
return threadpool; return threadpool;
@ -19391,22 +19374,16 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
threadpool->n_threads_cur = n_threads; threadpool->n_threads_cur = n_threads;
} }
struct ggml_compute_state worker = { ggml_graph_compute_thread(&threadpool->workers[omp_get_thread_num()]);
.ith = omp_get_thread_num(),
.threadpool = threadpool,
};
ggml_graph_compute_thread(&worker);
} }
} else { } else {
struct ggml_compute_state worker = { ggml_graph_compute_thread(&threadpool->workers[0]);
.ith = 0,
.threadpool = threadpool,
};
ggml_graph_compute_thread(&worker);
} }
#else #else
// Update main thread affinity to match the current threadpool // Update main thread prio and affinity to match the current threadpool
if (!ggml_thread_cpumask_is_valid(threadpool->workers[0].cpumask)) { ggml_thread_apply_process_priority(threadpool->prio);
ggml_thread_apply_thread_priority(threadpool->prio);
if (ggml_thread_cpumask_is_valid(threadpool->workers[0].cpumask)) {
ggml_thread_apply_affinity(threadpool->workers[0].cpumask); ggml_thread_apply_affinity(threadpool->workers[0].cpumask);
} }