Fix formatting

2024-05-14 17:15:47 -05:00 · 2024-05-14 17:15:47 -05:00 · d9ba30a204
commit d9ba30a204
parent bd80601ea8
1 changed files with 27 additions and 28 deletions
--- a/ggml.c
+++ b/ggml.c
@ -2435,7 +2435,6 @@ static void ggml_setup_op_has_task_pass(void) {
    }
 }

-
 //
 // NUMA support
 //
@ -12085,8 +12084,9 @@ UseGgmlGemm2:;
    int chunk_size = 16;

    // We need to step up the size if it's small
-    if (nr0 == 1 || nr1 == 1)
+    if (nr0 == 1 || nr1 == 1) {
        chunk_size = 64;
+    }

    // distribute the work across the inner or outer loop based on which one is larger
    // The number of chunks in the 0/1 dim.
@ -12097,8 +12097,7 @@ UseGgmlGemm2:;
    // If the chunking is poor for the number of threads on this setup, scrap the whole plan.  Re-chunk it by thread.
    //   Also, chunking by thread was measured to have perform better on NUMA systems.  See https://github.com/ggerganov/llama.cpp/pull/6915
    //   In theory, chunking should be just as useful on NUMA and non NUMA systems, but testing disagreed with that.
-    if (nchunk0 * nchunk1 < nth * 4 || ggml_is_numa())
-    {
+    if (nchunk0 * nchunk1 < nth * 4 || ggml_is_numa()) {
        // distribute the thread work across the inner or outer loop based on which one is larger
        nchunk0 = nr0 > nr1 ? nth : 1; // parallelize by src0 rows
        nchunk1 = nr0 > nr1 ? 1 : nth; // parallelize by src1 rows
@ -12114,8 +12113,7 @@ UseGgmlGemm2:;
    //The first chunk comes from our thread_id, the rest will get auto-assigned.
    int current_chunk = ith;

-    while (current_chunk < nchunk0 * nchunk1)
-    {
+    while (current_chunk < nchunk0 * nchunk1) {
        const int64_t ith0 = current_chunk % nchunk0;
        const int64_t ith1 = current_chunk / nchunk0;

@ -12131,8 +12129,9 @@ UseGgmlGemm2:;
        chunks_executed++;
 #endif

-        if (nth >= nchunk0 * nchunk1)
+        if (nth >= nchunk0 * nchunk1) {
            break;
+        }

        current_chunk = atomic_fetch_add(&state->shared->current_chunk, 1);
    }