begin work on targeting dot_q5_K_q8_K.
This commit is contained in:
parent
9185e14922
commit
a7bd64c130
3 changed files with 64 additions and 1 deletions
2
Makefile
2
Makefile
|
@ -292,7 +292,7 @@ ifeq "${K1OM}" ""
|
|||
#MK_CFLAGS += -mssse3
|
||||
#MK_CXXFLAGS += -mssse3
|
||||
else
|
||||
OBJS += ggml-phi-knc.o
|
||||
OBJS += ggml-phi-knc.o ggml-phi-knc-dot_q5_K_q8_K.o
|
||||
MK_CFLAGS += -march=knc -mtune=knc
|
||||
endif
|
||||
|
||||
|
|
49
ggml-phi-knc-dot_q5_K_q8_K.c
Normal file
49
ggml-phi-knc-dot_q5_K_q8_K.c
Normal file
|
@ -0,0 +1,49 @@
|
|||
|
||||
/* A forward declaration, to keep GCC happy. */
|
||||
void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc);
|
||||
|
||||
void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) {
|
||||
|
||||
const block_q5_K * restrict x = vx;
|
||||
const block_q8_K * restrict y = vy;
|
||||
|
||||
const int nb = n / QK_K;
|
||||
|
||||
static const uint32_t kmask1 = 0x3f3f3f3f;
|
||||
static const uint32_t kmask2 = 0x0f0f0f0f;
|
||||
static const uint32_t kmask3 = 0x03030303;
|
||||
|
||||
uint32_t utmp[4];
|
||||
int8_t aux8[QK_K];
|
||||
int16_t aux16[16];
|
||||
float sums [8];
|
||||
memset(sums, 0, 8*sizeof(float));
|
||||
|
||||
float sumf = 0;
|
||||
for (int i = 0; i < nb; ++i) {
|
||||
const uint8_t * restrict q4 = x[i].qs;
|
||||
const uint8_t * restrict hm = x[i].qh;
|
||||
const int8_t * restrict q8 = y[i].qs;
|
||||
int8_t * restrict a = aux8;
|
||||
for (int l = 0; l < 32; ++l) {
|
||||
a[l+ 0] = q4[l] & 0xF;
|
||||
a[l+32] = q4[l] >> 4;
|
||||
}
|
||||
for (int is = 0; is < 8; ++is) {
|
||||
uint8_t m = 1 << is;
|
||||
for (int l = 0; l < 8; ++l) a[8*is + l] -= (hm[l] & m ? 0 : 16);
|
||||
}
|
||||
|
||||
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
||||
const int8_t * restrict sc = x[i].scales;
|
||||
|
||||
for (int j = 0; j < QK_K/16; ++j) {
|
||||
const float dl = d * sc[j];
|
||||
for (int l = 0; l < 16; ++l) aux16[l] = q8[l] * a[l];
|
||||
for (int l = 0; l < 8; ++l) sums[l] += dl * (aux16[l] + aux16[8+l]);
|
||||
q8 += 16; a += 16;
|
||||
}
|
||||
}
|
||||
for (int l = 0; l < 8; ++l) sumf += sums[l];
|
||||
*s = sumf;
|
||||
}
|
14
ggml-phi-knc-dot_q5_K_q8_K.h
Normal file
14
ggml-phi-knc-dot_q5_K_q8_K.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* A forward declaration, to keep GCC happy. */
|
||||
void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
Loading…
Add table
Add a link
Reference in a new issue