From 5db246a547b004eef139f37ddbec37c8c875c58a Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Fri, 21 Apr 2023 20:12:42 +0300 Subject: [PATCH] ggml : prefer vzip to vuzp This way we always use the same type of instruction across all quantizations --- ggml.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ggml.c b/ggml.c index 6cea937c8..6f915517d 100644 --- a/ggml.c +++ b/ggml.c @@ -2632,18 +2632,18 @@ static void ggml_vec_dot_q4_1_q8_0(const int n, float * restrict s, const void * const int8x16_t v0_1l = vreinterpretq_s8_u8(vandq_u8 (v0_1, m4b)); const int8x16_t v0_1h = vreinterpretq_s8_u8(vshrq_n_u8(v0_1, 4)); + // interleave + const int8x16_t v0_0lz = vzip1q_s8(v0_0l, v0_0h); + const int8x16_t v0_0hz = vzip2q_s8(v0_0l, v0_0h); + const int8x16_t v0_1lz = vzip1q_s8(v0_1l, v0_1h); + const int8x16_t v0_1hz = vzip2q_s8(v0_1l, v0_1h); + // load y const int8x16_t v1_0l = vld1q_s8(y0->qs); const int8x16_t v1_0h = vld1q_s8(y0->qs + 16); const int8x16_t v1_1l = vld1q_s8(y1->qs); const int8x16_t v1_1h = vld1q_s8(y1->qs + 16); - // interleave - const int8x16_t v1_0ls = vuzp1q_s8(v1_0l, v1_0h); - const int8x16_t v1_0hs = vuzp2q_s8(v1_0l, v1_0h); - const int8x16_t v1_1ls = vuzp1q_s8(v1_1l, v1_1h); - const int8x16_t v1_1hs = vuzp2q_s8(v1_1l, v1_1h); - #if defined(__ARM_FEATURE_DOTPROD) // dot product into int32x4_t const int32x4_t p_0 = vdotq_s32(vdotq_s32(vdupq_n_s32(0), v0_0l, v1_0ls), v0_0h, v1_0hs);