From a79756b210ba76928bc939451da04f2f042f216d Mon Sep 17 00:00:00 2001 From: Slaren <2141330+slaren@users.noreply.github.com> Date: Mon, 1 May 2023 14:51:03 +0200 Subject: [PATCH] cuBLAS: update block_q5_1 --- ggml-cuda.cu | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 443378c6c..e8a1e77cb 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -68,7 +68,7 @@ static_assert(sizeof(block_q5_0) == sizeof(ggml_fp16_t) + sizeof(uint32_t) + QK5 typedef struct { half d; // delta half m; // min - uint32_t qh; // 5-th bit of quants + uint8_t qh[4]; // 5-th bit of quants uint8_t qs[QK5_1 / 2]; // nibbles / quants } block_q5_1; static_assert(sizeof(block_q5_1) == 2 * sizeof(ggml_fp16_t) + sizeof(uint32_t) + QK5_1 / 2, "wrong q5_1 block size/padding"); @@ -189,7 +189,8 @@ static __global__ void dequantize_block_q5_1(const void * vx, float * y) { const uint8_t * pp = x[i].qs; - const uint32_t qh = x[i].qh; + uint32_t qh; + memcpy(&qh, x[i].qh, sizeof(qh)); for (int l = 0; l < QK5_1; l += 2) { const uint8_t vi = pp[l/2];