From fc7dc515f187e96ecd0c6e3ac906c6c5faf357c2 Mon Sep 17 00:00:00 2001 From: Kunnis Date: Thu, 9 May 2024 23:29:49 -0500 Subject: [PATCH] adding the looping structure based on the chunk configuration. --- ggml.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ggml.c b/ggml.c index 896f540ea..f04a11b69 100644 --- a/ggml.c +++ b/ggml.c @@ -12091,8 +12091,10 @@ UseGgmlGemm2:; const int64_t nchunk0 = nr0 > nr1 ? nth : 1; // parallelize by src0 rows const int64_t nchunk1 = nr0 > nr1 ? 1 : nth; // parallelize by src1 rows + //The first chunk comes from our thread_id, the rest will get auto-assigned. int current_chunk = ith; + while (current_chunk < nchunk0 * nchunk1) { const int64_t ith0 = current_chunk % nchunk0; const int64_t ith1 = current_chunk / nchunk0; @@ -12112,6 +12114,10 @@ UseGgmlGemm2:; chunks_executed++; #endif + if (nth >= nchunk0 * nchunk1) + break; + + current_chunk = atomic_fetch_add(&state->shared->current_chunk, 1); } #ifdef GGML_PERF