threadpool: proper handling for non-specified cpumask

2024-05-25 05:14:29 -07:00 · 2024-05-25 05:14:29 -07:00 · e77167446a
commit e77167446a
parent ef1b87d1af
1 changed files with 8 additions and 18 deletions
--- a/ggml.c
+++ b/ggml.c
@ -1834,6 +1834,7 @@ struct ggml_compute_threadpool {
 struct ggml_compute_state {
    ggml_thread_t thrd;
    bool cpumask[GGML_N_CORES_MAX];
    bool mask_specified;
    int ith;
    struct ggml_compute_threadpool * threadpool;
    enum ggml_status ec;
@ -19472,13 +19473,6 @@ static bool __thread_priority(int32_t prio) {
 #endif
 static void __init_stack(size_t size) {
    void* ptr = alloca(size);
    if (ptr) {
        memset(ptr, 0, size);
    }
 }
 #ifdef __aarch64__
 static inline void __cpu_relax(void) {
@ -19553,8 +19547,6 @@ struct ggml_compute_threadpool * ggml_create_threadpool(struct ggml_threadpool_p
    threadpool->workers = workers;
    __init_stack(2ULL * 1024 * 1024);
    int cpumask_iter = 0;
    __process_priority(tpp->prio);
@ -19566,12 +19558,12 @@ struct ggml_compute_threadpool * ggml_create_threadpool(struct ggml_threadpool_p
            .ith        = j,
            .threadpool = threadpool,
            .ec         = GGML_STATUS_SUCCESS,
            .mask_specified = false
        };
        if (tpp->mask_specified) {
            __cpumask_next(tpp->cpumask, workers[j].cpumask, tpp->strict_cpu, &cpumask_iter);
-        } else {
+            workers[j].mask_specified = true;
            workers[j].cpumask[j] = true;
        }
        // Spin threads for all secondary workers
@ -19841,11 +19833,8 @@ static thread_ret_t ggml_graph_compute_secondary_thread(void* data) {
    struct ggml_compute_state * state = (struct ggml_compute_state *) data;
    struct ggml_compute_threadpool * threadpool = state->threadpool;
 #ifndef __aarch64__
    __init_stack(2ULL * 1024 * 1024);
 #endif
    __thread_priority(threadpool->prio);
    if (state->mask_specified)
        __thread_affinity(state->cpumask);
    // Indicate that we're ready to go
@ -20118,6 +20107,7 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
    }
    // Update main thread affinity to match the current threadpool
    if (threadpool->workers[0].mask_specified)
        __thread_affinity(threadpool->workers[0].cpumask);
    // Set up work