From e69c924ad1c58fc9015e0f828e726f80beda4634 Mon Sep 17 00:00:00 2001 From: 0cc4m Date: Sun, 30 Apr 2023 10:44:48 +0200 Subject: [PATCH] Use two memcpy calls for q5_0 buffer transfer --- ggml-opencl.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ggml-opencl.c b/ggml-opencl.c index c93deef62..4389eca39 100644 --- a/ggml-opencl.c +++ b/ggml-opencl.c @@ -3,6 +3,7 @@ #define CL_TARGET_OPENCL_VERSION 110 #include +#include #include #include @@ -308,7 +309,8 @@ void ggml_cl_sgemm_wrapper( cl_host_b = (cl_block_q5_0*) malloc(sizeof(cl_block_q5_0) * global / 32); for (size_t i = 0; i < global / 32; i++) { cl_host_b[i].d = ggml_fp16_to_fp32(b[i].d); - memcpy(&cl_host_b[i].qh, b[i].qh, sizeof(uint32_t) + QK5_0 / 2); + memcpy(&cl_host_b[i].qh, b[i].qh, sizeof(uint32_t)); + memcpy(&cl_host_b[i].qs, b[i].qs, QK5_0 / 2); } host_b = (const float*) cl_host_b; size_qb = global * (sizeof(float) + sizeof(uint32_t) + local) / 32;