From b0d0bdd07bb56e7344e234df01ba0f19d3b068e6 Mon Sep 17 00:00:00 2001 From: Iwan Kawrakow Date: Tue, 26 Mar 2024 19:19:05 +0100 Subject: [PATCH] iq1_m: QK_K = 64 seems to work on Metal and ARM_NEON --- ggml-metal.metal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml-metal.metal b/ggml-metal.metal index 742adc4ab..744b2a8b4 100644 --- a/ggml-metal.metal +++ b/ggml-metal.metal @@ -4540,7 +4540,7 @@ void kernel_mul_mv_iq1_m_f32_impl( const float delta1 = sumy[0] * (qh[0] & 0x08 ? -1 - IQ1M_DELTA : -1 + IQ1M_DELTA) + sumy[1] * (qh[0] & 0x80 ? -1 - IQ1M_DELTA : -1 + IQ1M_DELTA); const float delta2 = sumy[2] * (qh[1] & 0x08 ? -1 - IQ1M_DELTA : -1 + IQ1M_DELTA) + sumy[3] * (qh[1] & 0x80 ? -1 - IQ1M_DELTA : -1 + IQ1M_DELTA); #if QK_K == 64 - const float d = (float) *(device const half *)(sc - 2); + const float d = (float) *((device const half *)(sc - 1)); sumf[row] += d * ((sum[0] + delta1) * (2*((sc[0] >> (8*(ib%2)+0)) & 0xf) + 1) + (sum[1] + delta2) * (2*((sc[0] >> (8*(ib%2)+4)) & 0xf) + 1)); #else