Added OMP Barrier in ggml.c to avoid atomic operations

2024-06-19 10:39:23 +05:30 · 2024-06-19 10:39:23 +05:30 · 4147a04581
commit 4147a04581
parent 623494a478
1 changed files with 34 additions and 3 deletions
--- a/ggml.c
+++ b/ggml.c
@ -19033,8 +19033,11 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
            state->ec = GGML_STATUS_ABORTED;
            return 0;
        }
-
+#ifdef GGML_USE_OPENMP
        if (state->ith == 0) {
 #else
        if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
 #endif
            // all other threads are finished and spinning
            // do finalize and init here so we don't have synchronize again
            struct ggml_compute_params params = {
@ -19094,6 +19097,15 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
            }
            task_phase = GGML_TASK_TYPE_INIT;
 #ifdef GGML_USE_OPENMP
            state->shared->n_active = n_threads;
            state->shared->node_n = node_n;
            state->shared->node_task = task_phase;
        }
        #pragma omp barrier
        node_n = state->shared->node_n;
        task_phase = state->shared->node_task;
 #else
            atomic_store(&state->shared->n_active,  n_threads);
            atomic_store(&state->shared->node_n,    node_n);
            atomic_store(&state->shared->node_task, task_phase);
@ -19101,6 +19113,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
            ggml_graph_compute_thread_sync_node(&node_n,     state, false);
            ggml_graph_compute_thread_sync_task(&task_phase, state, false);
        }
 #endif
        // check if we should stop
        if (node_n >= cgraph->n_nodes) break;
@ -19122,7 +19135,15 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
                ggml_compute_forward(&params, node, state);
            }
        }
-
+#ifdef GGML_USE_OPENMP
        if (state->ith == 0) {
            task_phase = GGML_TASK_TYPE_COMPUTE;
            state->shared->n_active = n_threads;
            state->shared->node_task = task_phase;
        }
        #pragma omp barrier
        task_phase = state->shared->node_task;
 #else
        if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
            task_phase = GGML_TASK_TYPE_COMPUTE;
            atomic_store(&state->shared->n_active,  n_threads);
@ -19137,12 +19158,21 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
            const bool do_yield = node_n < 0 || cgraph->nodes[node_n]->op == GGML_OP_MUL_MAT;
            ggml_graph_compute_thread_sync_task(&task_phase, state, do_yield);
        }
 #endif
        if (state->ith < n_tasks) {
            params.type = GGML_TASK_TYPE_COMPUTE;
            ggml_compute_forward(&params, node, state);
        }
-
+#ifdef GGML_USE_OPENMP
        if (state->ith == 0) {
            task_phase = GGML_TASK_TYPE_FINALIZE;
            state->shared->n_active = n_threads;
            state->shared->node_task = task_phase;
        }
        #pragma omp barrier
        task_phase = state->shared->node_task;
 #else
        if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
            task_phase = GGML_TASK_TYPE_FINALIZE;
            atomic_store(&state->shared->n_active,  n_threads);
@ -19151,6 +19181,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
        else {
            ggml_graph_compute_thread_sync_task(&task_phase, state, false);
        }
 #endif
    }
    return 0;