diff --git a/ggml/src/ggml-cpu/amx/common.h b/ggml/src/ggml-cpu/amx/common.h index 0b0657289..40074c3fc 100644 --- a/ggml/src/ggml-cpu/amx/common.h +++ b/ggml/src/ggml-cpu/amx/common.h @@ -78,7 +78,6 @@ inline void parallel_for_ggml(const ggml_compute_params * params, int n, const f int tbegin, tend; balance211(n, params->nth, params->ith, tbegin, tend); f(tbegin, tend); - ggml_barrier(params->threadpool); // TODO: might not always be needed } // quantized types that have AMX support diff --git a/ggml/src/ggml-cpu/amx/mmq.cpp b/ggml/src/ggml-cpu/amx/mmq.cpp index b9a73f76d..0ec3aa86d 100644 --- a/ggml/src/ggml-cpu/amx/mmq.cpp +++ b/ggml/src/ggml-cpu/amx/mmq.cpp @@ -1349,10 +1349,10 @@ struct tinygemm_kernel_avx constexpr int row = idx / COLS; constexpr int col = idx % COLS; - if (col == 0) { + if constexpr (col == 0) { va = _mm512_loadu_ps(A + row * K + k); } - if (row == 0) { + if constexpr (row == 0) { vb[col] = _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)(B + col * K + k))); } vc[idx] = _mm512_fmadd_ps(va, vb[col], vc[idx]);