From d53f76760d7b067fd0cef67a994a90e662bdfb50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20H=2E=20Hitland?= Date: Thu, 27 Apr 2023 22:48:46 +0200 Subject: [PATCH] q4_0c: disable prefetching on M1 --- ggml.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/ggml.c b/ggml.c index a70aa4773..2c5c796fd 100644 --- a/ggml.c +++ b/ggml.c @@ -1154,13 +1154,17 @@ static void quantize_row_q4_0c_reference(const float * restrict x, uint8_t * res float id[2]; for (int j = 0; j < 2; j++) { float amax = 0.0f; // absolute max + float max = 0.0f; for (int l = 0; l < QK4_0; l++) { const float v = xb[j][l]; - amax = MAX(amax, fabsf(v)); + if (amax < fabsf(v)) { + amax = fabsf(v); + max = v; + } } - d[j] = amax / ((1 << 3) - 1); + d[j] = max / -8; id[j] = d[j] ? 1.0f/d[j] : 0.0f; } @@ -1169,10 +1173,10 @@ static void quantize_row_q4_0c_reference(const float * restrict x, uint8_t * res for (int l = 0; l < QK4_0; l++) { const float v0 = xb[0][l]*id[0]; - const uint8_t vi0 = (int8_t)roundf(v0) + 8; + const uint8_t vi0 = MIN(15, (int8_t)roundf(v0) + 8); const float v1 = xb[1][l]*id[1]; - const uint8_t vi1 = (int8_t)roundf(v1) + 8; + const uint8_t vi1 = MIN(15, (int8_t)roundf(v1) + 8); assert(vi0 < 16); assert(vi1 < 16); @@ -3126,16 +3130,19 @@ static void ggml_vec_dot_q4_0c_q8_0c(const int n, float * restrict s, const void float sumf = 0.0; #if defined(__ARM_NEON) - const int ahead=80; float32x4_t sumv0 = vdupq_n_f32(0.0f); float32x4_t sumv1 = vdupq_n_f32(0.0f); for (int i = 0; i < nb/2; i++) { + // Disable prefetching on M1 for now. +#ifndef __APPLE__ + const int ahead=80; __builtin_prefetch(&xqs[i*QK4_0 + 64*ahead]); __builtin_prefetch(&yqs[2*i*QK8_0C + 64*ahead]); __builtin_prefetch(&yqs[2*i*QK8_0C + 64*ahead + 64]); __builtin_prefetch(&xds[2*i + 64/4*ahead]); __builtin_prefetch(&yds[2*i + 64/4*ahead]); +#endif const int dst0 = i + i/2*2; // 0, 1, 4, 5, 8, 9, ... const int dst1 = i + i/2*2 + 2; // 2, 3, 6, 7, 10, 11 ... @@ -9738,11 +9745,13 @@ static void ggml_compute_forward_alibi( ggml_compute_forward_alibi_f32(params, src0, src1, dst); } break; case GGML_TYPE_Q4_0: + case GGML_TYPE_Q4_0C: case GGML_TYPE_Q4_1: case GGML_TYPE_Q4_2: case GGML_TYPE_Q5_0: case GGML_TYPE_Q5_1: case GGML_TYPE_Q8_0: + case GGML_TYPE_Q8_0C: case GGML_TYPE_Q8_1: case GGML_TYPE_I8: case GGML_TYPE_I16: