From 222c679842ca7a50ef84d1ade673cc47d2204dfd Mon Sep 17 00:00:00 2001 From: Steven Roussey Date: Tue, 13 Jun 2023 12:51:08 -0700 Subject: [PATCH] Move a repeated calc to const --- ggml-metal.metal | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/ggml-metal.metal b/ggml-metal.metal index 09e12a879..5d19fcffa 100644 --- a/ggml-metal.metal +++ b/ggml-metal.metal @@ -1190,15 +1190,17 @@ kernel void kernel_mul_mat_q4_k_f32( float sumf = 0; for (int i = tpitg.x; i < nb; i += tptg.x) { - device const uint8_t * q1 = (x + i)->qs + q_offset; - device const uint8_t * q2 = q1 + 64; - device const float * y1 = yy + i*QK_K + y_offset; - device const float * y2 = y1 + 128; + device const block_q4_k * xi = x + i; + device const uint8_t * q1 = xi->qs + q_offset; + device const uint8_t * q2 = q1 + 64; + device const float * y1 = yy + i*QK_K + y_offset; + device const float * y2 = y1 + 128; + device const uint8_t * scales = xi->scales; - const float dall = (float)((x + i)->d); - const float dmin = (float)((x + i)->dmin); + const float dall = (float)(xi->d); + const float dmin = (float)(xi->dmin); - device const uint16_t * a = (device const uint16_t *)(x + i)->scales; + device const uint16_t * a = (device const uint16_t *)xi->scales; sc1 = as_type((uint16_t)(a[im+0] & kmask1)); sc2 = as_type((uint16_t)(a[im+2] & kmask1)); sc3 = as_type((uint16_t)(((a[im+4] >> 0) & kmask2) | ((a[im+0] & kmask3) >> 2)));