threadpool: proper handling for non-specified cpumask
This commit is contained in:
parent
ef1b87d1af
commit
e77167446a
1 changed files with 8 additions and 18 deletions
22
ggml.c
22
ggml.c
|
@ -1834,6 +1834,7 @@ struct ggml_compute_threadpool {
|
||||||
struct ggml_compute_state {
|
struct ggml_compute_state {
|
||||||
ggml_thread_t thrd;
|
ggml_thread_t thrd;
|
||||||
bool cpumask[GGML_N_CORES_MAX];
|
bool cpumask[GGML_N_CORES_MAX];
|
||||||
|
bool mask_specified;
|
||||||
int ith;
|
int ith;
|
||||||
struct ggml_compute_threadpool * threadpool;
|
struct ggml_compute_threadpool * threadpool;
|
||||||
enum ggml_status ec;
|
enum ggml_status ec;
|
||||||
|
@ -19472,13 +19473,6 @@ static bool __thread_priority(int32_t prio) {
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static void __init_stack(size_t size) {
|
|
||||||
void* ptr = alloca(size);
|
|
||||||
if (ptr) {
|
|
||||||
memset(ptr, 0, size);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef __aarch64__
|
#ifdef __aarch64__
|
||||||
|
|
||||||
static inline void __cpu_relax(void) {
|
static inline void __cpu_relax(void) {
|
||||||
|
@ -19553,8 +19547,6 @@ struct ggml_compute_threadpool * ggml_create_threadpool(struct ggml_threadpool_p
|
||||||
|
|
||||||
threadpool->workers = workers;
|
threadpool->workers = workers;
|
||||||
|
|
||||||
__init_stack(2ULL * 1024 * 1024);
|
|
||||||
|
|
||||||
int cpumask_iter = 0;
|
int cpumask_iter = 0;
|
||||||
|
|
||||||
__process_priority(tpp->prio);
|
__process_priority(tpp->prio);
|
||||||
|
@ -19566,12 +19558,12 @@ struct ggml_compute_threadpool * ggml_create_threadpool(struct ggml_threadpool_p
|
||||||
.ith = j,
|
.ith = j,
|
||||||
.threadpool = threadpool,
|
.threadpool = threadpool,
|
||||||
.ec = GGML_STATUS_SUCCESS,
|
.ec = GGML_STATUS_SUCCESS,
|
||||||
|
.mask_specified = false
|
||||||
};
|
};
|
||||||
|
|
||||||
if (tpp->mask_specified) {
|
if (tpp->mask_specified) {
|
||||||
__cpumask_next(tpp->cpumask, workers[j].cpumask, tpp->strict_cpu, &cpumask_iter);
|
__cpumask_next(tpp->cpumask, workers[j].cpumask, tpp->strict_cpu, &cpumask_iter);
|
||||||
} else {
|
workers[j].mask_specified = true;
|
||||||
workers[j].cpumask[j] = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Spin threads for all secondary workers
|
// Spin threads for all secondary workers
|
||||||
|
@ -19841,11 +19833,8 @@ static thread_ret_t ggml_graph_compute_secondary_thread(void* data) {
|
||||||
struct ggml_compute_state * state = (struct ggml_compute_state *) data;
|
struct ggml_compute_state * state = (struct ggml_compute_state *) data;
|
||||||
struct ggml_compute_threadpool * threadpool = state->threadpool;
|
struct ggml_compute_threadpool * threadpool = state->threadpool;
|
||||||
|
|
||||||
#ifndef __aarch64__
|
|
||||||
__init_stack(2ULL * 1024 * 1024);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
__thread_priority(threadpool->prio);
|
__thread_priority(threadpool->prio);
|
||||||
|
if (state->mask_specified)
|
||||||
__thread_affinity(state->cpumask);
|
__thread_affinity(state->cpumask);
|
||||||
|
|
||||||
// Indicate that we're ready to go
|
// Indicate that we're ready to go
|
||||||
|
@ -20096,7 +20085,7 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
|
||||||
bool disposable_threadpool = false;
|
bool disposable_threadpool = false;
|
||||||
|
|
||||||
if (threadpool == NULL) {
|
if (threadpool == NULL) {
|
||||||
//GGML_PRINT("NOTE: Threadpool is not specified. Will create a disposable threadpool\n");
|
// GGML_PRINT("NOTE: Threadpool is not specified. Will create a disposable threadpool\n");
|
||||||
struct ggml_threadpool_params tpp = {
|
struct ggml_threadpool_params tpp = {
|
||||||
.mask_specified = false,
|
.mask_specified = false,
|
||||||
.n_threads = n_threads,
|
.n_threads = n_threads,
|
||||||
|
@ -20118,6 +20107,7 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update main thread affinity to match the current threadpool
|
// Update main thread affinity to match the current threadpool
|
||||||
|
if (threadpool->workers[0].mask_specified)
|
||||||
__thread_affinity(threadpool->workers[0].cpumask);
|
__thread_affinity(threadpool->workers[0].cpumask);
|
||||||
|
|
||||||
// Set up work
|
// Set up work
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue