From a79756b210ba76928bc939451da04f2f042f216d Mon Sep 17 00:00:00 2001
From: Slaren <2141330+slaren@users.noreply.github.com>
Date: Mon, 1 May 2023 14:51:03 +0200
Subject: [PATCH] cuBLAS: update block_q5_1

---
 ggml-cuda.cu | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/ggml-cuda.cu b/ggml-cuda.cu
index 443378c6c..e8a1e77cb 100644
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@@ -68,7 +68,7 @@ static_assert(sizeof(block_q5_0) == sizeof(ggml_fp16_t) + sizeof(uint32_t) + QK5
 typedef struct {
     half d;                 // delta
     half m;                 // min
-    uint32_t qh;            // 5-th bit of quants
+    uint8_t qh[4];          // 5-th bit of quants
     uint8_t qs[QK5_1 / 2];  // nibbles / quants
 } block_q5_1;
 static_assert(sizeof(block_q5_1) == 2 * sizeof(ggml_fp16_t) + sizeof(uint32_t) + QK5_1 / 2, "wrong q5_1 block size/padding");
@@ -189,7 +189,8 @@ static __global__ void dequantize_block_q5_1(const void * vx, float * y) {
 
     const uint8_t * pp = x[i].qs;
 
-    const uint32_t qh = x[i].qh;
+    uint32_t qh;
+    memcpy(&qh, x[i].qh, sizeof(qh));
 
     for (int l = 0; l < QK5_1; l += 2) {
         const uint8_t vi = pp[l/2];