From 80196bd76c2978debdb0026ec39a20789411b093 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 22 Feb 2024 19:10:20 +0200 Subject: [PATCH] cuda : no longer ggml headers last ggml-ci --- ggml-cuda.cu | 9 ++++----- ggml-quants.c | 7 +++---- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/ggml-cuda.cu b/ggml-cuda.cu index e7c211d7d..b0e454e02 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -1,3 +1,7 @@ +#include "ggml-cuda.h" +#include "ggml.h" +#include "ggml-backend-impl.h" + #include #include #include @@ -121,11 +125,6 @@ #endif // defined(GGML_USE_HIPBLAS) -// ggml-cuda need half type so keep ggml headers include at last -#include "ggml-cuda.h" -#include "ggml.h" -#include "ggml-backend-impl.h" - #define CUDART_HMAX 11070 // CUDA 11.7, min. ver. for which __hmax and __hmax2 are known to work (may be higher than needed) #define CC_PASCAL 600 diff --git a/ggml-quants.c b/ggml-quants.c index 5d8e66a47..e62ef70e9 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -9498,7 +9498,6 @@ void ggml_vec_dot_iq4_nl_q8_0(int n, float * restrict s, size_t bs, const void * float sumf = 0; for (int ib = 0; ib < nb; ib += 2) { - q4bits.val[0] = vld1q_u8(x[ib+0].qs); q4bits.val[1] = vld1q_u8(x[ib+1].qs); q8b.val[0] = vld1q_s8(y[ib+0].qs); @@ -9514,9 +9513,9 @@ void ggml_vec_dot_iq4_nl_q8_0(int n, float * restrict s, size_t bs, const void * prod_1 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), q4b.val[0], q8b.val[0]), q4b.val[1], q8b.val[1]); prod_2 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), q4b.val[2], q8b.val[2]), q4b.val[3], q8b.val[3]); - //sumf += (float)x[ib+0].d * (float)y[ib+0].d * vaddvq_s32(prod_1) + (float)x[ib+1].d * (float)y[ib+1].d * vaddvq_s32(prod_2); - sumf += GGML_FP16_TO_FP32(x[ib+0].d) * GGML_FP16_TO_FP32(y[ib+0].d) * vaddvq_s32(prod_1) + GGML_FP16_TO_FP32(x[ib+1].d) * GGML_FP16_TO_FP32(y[ib+1].d) * vaddvq_s32(prod_2); - + sumf += + GGML_FP16_TO_FP32(x[ib+0].d) * GGML_FP16_TO_FP32(y[ib+0].d) * vaddvq_s32(prod_1) + + GGML_FP16_TO_FP32(x[ib+1].d) * GGML_FP16_TO_FP32(y[ib+1].d) * vaddvq_s32(prod_2); } *s = sumf;