From 0e6c910db95577a02b1f9f2b69687448267f9d55 Mon Sep 17 00:00:00 2001 From: Julia Longtin Date: Sat, 23 Mar 2024 14:19:47 +0000 Subject: [PATCH] begin work on targeting dot_q5_K_q8_K. --- Makefile | 2 +- ggml-phi-knc-dot_q5_K_q8_K.c | 49 ++++++++++++++++++++++++++++++++++++ ggml-phi-knc-dot_q5_K_q8_K.h | 14 +++++++++++ 3 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 ggml-phi-knc-dot_q5_K_q8_K.c create mode 100644 ggml-phi-knc-dot_q5_K_q8_K.h diff --git a/Makefile b/Makefile index b7ce36492..c33b70268 100644 --- a/Makefile +++ b/Makefile @@ -325,7 +325,7 @@ ifeq "${K1OM}" "" #MK_CFLAGS += -mssse3 #MK_CXXFLAGS += -mssse3 else - OBJS += ggml-phi-knc.o + OBJS += ggml-phi-knc.o ggml-phi-knc-dot_q5_K_q8_K.o MK_CFLAGS += -march=knc -mtune=knc endif diff --git a/ggml-phi-knc-dot_q5_K_q8_K.c b/ggml-phi-knc-dot_q5_K_q8_K.c new file mode 100644 index 000000000..9104a939c --- /dev/null +++ b/ggml-phi-knc-dot_q5_K_q8_K.c @@ -0,0 +1,49 @@ + +/* A forward declaration, to keep GCC happy. */ +void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc); + +void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) { + + const block_q5_K * restrict x = vx; + const block_q8_K * restrict y = vy; + + const int nb = n / QK_K; + + static const uint32_t kmask1 = 0x3f3f3f3f; + static const uint32_t kmask2 = 0x0f0f0f0f; + static const uint32_t kmask3 = 0x03030303; + + uint32_t utmp[4]; + int8_t aux8[QK_K]; + int16_t aux16[16]; + float sums [8]; + memset(sums, 0, 8*sizeof(float)); + + float sumf = 0; + for (int i = 0; i < nb; ++i) { + const uint8_t * restrict q4 = x[i].qs; + const uint8_t * restrict hm = x[i].qh; + const int8_t * restrict q8 = y[i].qs; + int8_t * restrict a = aux8; + for (int l = 0; l < 32; ++l) { + a[l+ 0] = q4[l] & 0xF; + a[l+32] = q4[l] >> 4; + } + for (int is = 0; is < 8; ++is) { + uint8_t m = 1 << is; + for (int l = 0; l < 8; ++l) a[8*is + l] -= (hm[l] & m ? 0 : 16); + } + + const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d); + const int8_t * restrict sc = x[i].scales; + + for (int j = 0; j < QK_K/16; ++j) { + const float dl = d * sc[j]; + for (int l = 0; l < 16; ++l) aux16[l] = q8[l] * a[l]; + for (int l = 0; l < 8; ++l) sums[l] += dl * (aux16[l] + aux16[8+l]); + q8 += 16; a += 16; + } + } + for (int l = 0; l < 8; ++l) sumf += sums[l]; + *s = sumf; +} diff --git a/ggml-phi-knc-dot_q5_K_q8_K.h b/ggml-phi-knc-dot_q5_K_q8_K.h new file mode 100644 index 000000000..b416803e0 --- /dev/null +++ b/ggml-phi-knc-dot_q5_K_q8_K.h @@ -0,0 +1,14 @@ +#pragma once + +#include "ggml.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* A forward declaration, to keep GCC happy. */ +void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc); + +#ifdef __cplusplus +} +#endif