threadpool: simplify threadpool init logic and fix main thread affinity application
Most of the init code is now exactly the same between threadpool and openmp.
This commit is contained in:
parent
8008463aee
commit
49ac51f2a3
1 changed files with 26 additions and 49 deletions
|
@ -19191,7 +19191,6 @@ static thread_ret_t ggml_graph_compute_secondary_thread(void* data) {
|
||||||
struct ggml_compute_threadpool * threadpool = state->threadpool;
|
struct ggml_compute_threadpool * threadpool = state->threadpool;
|
||||||
|
|
||||||
ggml_thread_apply_thread_priority(threadpool->prio);
|
ggml_thread_apply_thread_priority(threadpool->prio);
|
||||||
|
|
||||||
if (ggml_thread_cpumask_is_valid(state->cpumask)) {
|
if (ggml_thread_cpumask_is_valid(state->cpumask)) {
|
||||||
ggml_thread_apply_affinity(state->cpumask);
|
ggml_thread_apply_affinity(state->cpumask);
|
||||||
}
|
}
|
||||||
|
@ -19296,51 +19295,35 @@ static struct ggml_compute_threadpool * ggml_create_threadpool_impl(
|
||||||
threadpool->ec = GGML_STATUS_SUCCESS;
|
threadpool->ec = GGML_STATUS_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef GGML_USE_OPENMP
|
// Allocate and init workers state
|
||||||
ggml_mutex_init(&threadpool->mutex);
|
const size_t workers_size = sizeof(struct ggml_compute_state) * tpp->n_threads;
|
||||||
ggml_cond_init(&threadpool->cond);
|
struct ggml_compute_state * workers = GGML_ALIGNED_MALLOC(workers_size);
|
||||||
#endif // GGML_USE_OPENMP
|
|
||||||
|
|
||||||
struct ggml_compute_state * workers =
|
memset(workers, 0, workers_size);
|
||||||
GGML_ALIGNED_MALLOC(sizeof(struct ggml_compute_state) * tpp->n_threads);
|
for (int j = 0; j < tpp->n_threads; j++) {
|
||||||
|
workers[j].threadpool = threadpool;
|
||||||
|
workers[j].ith = j;
|
||||||
|
}
|
||||||
|
|
||||||
threadpool->workers = workers;
|
threadpool->workers = workers;
|
||||||
|
|
||||||
#ifdef GGML_USE_OPENMP
|
#ifndef GGML_USE_OPENMP
|
||||||
for (int j = 0; j < tpp->n_threads; j++) {
|
ggml_mutex_init(&threadpool->mutex);
|
||||||
workers[j] = (struct ggml_compute_state) {
|
ggml_cond_init(&threadpool->cond);
|
||||||
.threadpool = threadpool,
|
|
||||||
.ith = j
|
// Spin the threads for all workers, and update CPU placements.
|
||||||
};
|
// Place the main thread last (towards the higher numbered CPU cores).
|
||||||
}
|
|
||||||
#else // Not using OPENMP
|
|
||||||
int32_t cpumask_iter = 0;
|
int32_t cpumask_iter = 0;
|
||||||
|
|
||||||
ggml_thread_apply_process_priority(tpp->prio);
|
for (int j = 1; j < tpp->n_threads; j++) {
|
||||||
ggml_thread_apply_thread_priority(tpp->prio);
|
|
||||||
|
|
||||||
for (int j = 0; j < tpp->n_threads; j++) {
|
|
||||||
workers[j] = (struct ggml_compute_state) {
|
|
||||||
.thrd = 0,
|
|
||||||
.threadpool = threadpool,
|
|
||||||
.ith = j,
|
|
||||||
.last_graph = 0,
|
|
||||||
.pending = false
|
|
||||||
};
|
|
||||||
|
|
||||||
ggml_thread_cpumask_next(tpp->cpumask, workers[j].cpumask, tpp->strict_cpu, &cpumask_iter);
|
ggml_thread_cpumask_next(tpp->cpumask, workers[j].cpumask, tpp->strict_cpu, &cpumask_iter);
|
||||||
|
|
||||||
// Spin threads for all secondary workers
|
int32_t rc = ggml_thread_create(&workers[j].thrd, NULL, ggml_graph_compute_secondary_thread, &workers[j]);
|
||||||
if (j > 0) {
|
GGML_ASSERT(rc == 0);
|
||||||
int32_t rc = ggml_thread_create(
|
|
||||||
&workers[j].thrd,
|
|
||||||
NULL,
|
|
||||||
ggml_graph_compute_secondary_thread,
|
|
||||||
&workers[j]
|
|
||||||
);
|
|
||||||
GGML_ASSERT(rc == 0);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ggml_thread_cpumask_next(tpp->cpumask, workers[0].cpumask, tpp->strict_cpu, &cpumask_iter);
|
||||||
#endif // GGML_USE_OPENMP
|
#endif // GGML_USE_OPENMP
|
||||||
|
|
||||||
return threadpool;
|
return threadpool;
|
||||||
|
@ -19391,22 +19374,16 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
|
||||||
threadpool->n_threads_cur = n_threads;
|
threadpool->n_threads_cur = n_threads;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_compute_state worker = {
|
ggml_graph_compute_thread(&threadpool->workers[omp_get_thread_num()]);
|
||||||
.ith = omp_get_thread_num(),
|
|
||||||
.threadpool = threadpool,
|
|
||||||
};
|
|
||||||
ggml_graph_compute_thread(&worker);
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
struct ggml_compute_state worker = {
|
ggml_graph_compute_thread(&threadpool->workers[0]);
|
||||||
.ith = 0,
|
|
||||||
.threadpool = threadpool,
|
|
||||||
};
|
|
||||||
ggml_graph_compute_thread(&worker);
|
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
// Update main thread affinity to match the current threadpool
|
// Update main thread prio and affinity to match the current threadpool
|
||||||
if (!ggml_thread_cpumask_is_valid(threadpool->workers[0].cpumask)) {
|
ggml_thread_apply_process_priority(threadpool->prio);
|
||||||
|
ggml_thread_apply_thread_priority(threadpool->prio);
|
||||||
|
if (ggml_thread_cpumask_is_valid(threadpool->workers[0].cpumask)) {
|
||||||
ggml_thread_apply_affinity(threadpool->workers[0].cpumask);
|
ggml_thread_apply_affinity(threadpool->workers[0].cpumask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue