diff --git a/ggml/src/ggml-quants.c b/ggml/src/ggml-quants.c
index 2e6c16160..684e6c788 100644
--- a/ggml/src/ggml-quants.c
+++ b/ggml/src/ggml-quants.c
@@ -4232,9 +4232,9 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r
         const __m128i p16_2_0 = mul_add_epi8_sse(q4b_2_0, q8b_2_0);
         const __m128i p16_2_1 = mul_add_epi8_sse(q4b_2_1, q8b_2_1);
         __m128i p_1 = _mm_add_epi16(p16_1_0, p16_1_1);
-        p_1 = _mm_add_epi32(_mm_cvtepi16_epi32(_mm_bsrli_si128(p_1, 8)), _mm_cvtepi16_epi32(p_1));
+        p_1 = _mm_madd_epi16(p_1, _mm_set1_epi16(1));
         __m128i p_2 = _mm_add_epi16(p16_2_0, p16_2_1);
-        p_2 = _mm_add_epi32(_mm_cvtepi16_epi32(_mm_bsrli_si128(p_2, 8)), _mm_cvtepi16_epi32(p_2));
+        p_2 = _mm_madd_epi16(p_2, _mm_set1_epi16(1));
 
         const __m256 deltas = _mm256_set_m128(_mm_set1_ps(GGML_FP16_TO_FP32(x[ib + 1].d) * GGML_FP16_TO_FP32(y[ib + 1].d)),
                               _mm_set1_ps(GGML_FP16_TO_FP32(x[ib].d) * GGML_FP16_TO_FP32(y[ib].d)));