From 80f69694e520a69fabe835629b3266f991498800 Mon Sep 17 00:00:00 2001 From: chooper1 Date: Mon, 18 Sep 2023 20:17:06 -0700 Subject: [PATCH] CI Fix --- ggml.c | 3 ++- sqllm.c | 26 +++++++++++++------------- sqllm.h | 2 +- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/ggml.c b/ggml.c index 3d4afc237..26f5f15b0 100644 --- a/ggml.c +++ b/ggml.c @@ -1792,7 +1792,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .to_float = NULL, .from_float = (ggml_from_float_t) ggml_fp32_to_fp16_row, .from_float_reference = NULL, - .vec_dot = ggml_vec_dot_q4_sq_fp16, + .vec_dot = (ggml_vec_dot_t) ggml_vec_dot_q4_sq_fp16, .vec_dot_type = GGML_TYPE_F16, } #endif @@ -12640,6 +12640,7 @@ static void ggml_compute_forward_clamp( case GGML_TYPE_Q5_K: case GGML_TYPE_Q6_K: case GGML_TYPE_Q8_K: + case GGML_TYPE_Q4_SQ: case GGML_TYPE_I8: case GGML_TYPE_I16: case GGML_TYPE_I32: diff --git a/sqllm.c b/sqllm.c index f520bfc40..604db972f 100644 --- a/sqllm.c +++ b/sqllm.c @@ -8,7 +8,7 @@ #include -void ggml_vec_dot_q4_sq_fp16(const int n, float * restrict s, const void * restrict v, const ggml_fp16_t * restrict y) { +void ggml_vec_dot_q4_sq_fp16(const int n, float * restrict s, void * restrict v, ggml_fp16_t * restrict y) { const int nb = n / 8; @@ -17,7 +17,7 @@ void ggml_vec_dot_q4_sq_fp16(const int n, float * restrict s, const void * restr // pointer initialization int32_t * baselut = v; int32_t * qweight = baselut + 8; // get start of row - float * yvector = y; + float * yvector = (void *) y; // initialize sum float16x8_t sumf1 = vdupq_n_f16(0); @@ -26,15 +26,15 @@ void ggml_vec_dot_q4_sq_fp16(const int n, float * restrict s, const void * restr float16x8_t sumf4 = vdupq_n_f16(0); // initialize lookup table - uint8x16_t lut1 = vld1q_u8(baselut); - uint8x16_t lut2 = vld1q_u8(baselut+4); + uint8x16_t lut1 = vld1q_u8((void *) baselut); + uint8x16_t lut2 = vld1q_u8((void *) (baselut+4)); uint8x16_t lutl = vuzp1q_u8(lut1, lut2); uint8x16_t luth = vuzp2q_u8(lut1, lut2); for (int i = 0; i < nb; i+=4) { // get packed vector uint8x16_t m4b = vdupq_n_u8(0x0F); - uint8x16_t packed_vector = vld1q_u8(&qweight[i]); + uint8x16_t packed_vector = vld1q_u8((void *) &qweight[i]); // 4-bit -> 2 8-bit vectors uint8x16_t packed_vector_lb = vandq_u8 (packed_vector, m4b); @@ -51,16 +51,16 @@ void ggml_vec_dot_q4_sq_fp16(const int n, float * restrict s, const void * restr uint8x16_t lookup_1h = vqtbl1q_u8 (luth, packed_vector_1); // interleave lookup values - float16x8_t lookup_0_z1 = vzip1q_u8(lookup_0l, lookup_0h); - float16x8_t lookup_0_z2 = vzip2q_u8(lookup_0l, lookup_0h); - float16x8_t lookup_1_z1 = vzip1q_u8(lookup_1l, lookup_1h); - float16x8_t lookup_1_z2 = vzip2q_u8(lookup_1l, lookup_1h); + float16x8_t lookup_0_z1 = (float16x8_t) vzip1q_u8(lookup_0l, lookup_0h); + float16x8_t lookup_0_z2 = (float16x8_t) vzip2q_u8(lookup_0l, lookup_0h); + float16x8_t lookup_1_z1 = (float16x8_t) vzip1q_u8(lookup_1l, lookup_1h); + float16x8_t lookup_1_z2 = (float16x8_t) vzip2q_u8(lookup_1l, lookup_1h); //load int8 values - float16x8_t tmp1 = vld1q_f16(&yvector[4*i]); - float16x8_t tmp2 = vld1q_f16(&yvector[4*i+4]); - float16x8_t tmp3 = vld1q_f16(&yvector[4*i+8]); - float16x8_t tmp4 = vld1q_f16(&yvector[4*i+12]); + float16x8_t tmp1 = vld1q_f16(((void *) &yvector[4*i])); + float16x8_t tmp2 = vld1q_f16(((void *) &yvector[4*i+4])); + float16x8_t tmp3 = vld1q_f16(((void *) &yvector[4*i+8])); + float16x8_t tmp4 = vld1q_f16(((void *) &yvector[4*i+12])); //fp16 mul sumf1 = vfmaq_f16(sumf1, lookup_0_z1, tmp1); diff --git a/sqllm.h b/sqllm.h index b3f45a337..4462b4310 100644 --- a/sqllm.h +++ b/sqllm.h @@ -10,4 +10,4 @@ #ifdef __ARM_NEON #include #endif -void ggml_vec_dot_q4_sq_fp16(const int n, float * restrict s, const void * restrict v, const ggml_fp16_t * restrict y); +void ggml_vec_dot_q4_sq_fp16(const int n, float * restrict s, void * restrict v, ggml_fp16_t * restrict y);