From 3ba9d04c2ae51a77bf09c2e3f6c06eecf7193b44 Mon Sep 17 00:00:00 2001 From: OuadiElfarouki Date: Mon, 2 Sep 2024 14:06:58 +0100 Subject: [PATCH] Fixed dmmv dequant for k<= GGML_SYCL_DMMV_X --- ggml/src/ggml-sycl/dmmv.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ggml/src/ggml-sycl/dmmv.cpp b/ggml/src/ggml-sycl/dmmv.cpp index 5c343822f..e2bc81032 100644 --- a/ggml/src/ggml-sycl/dmmv.cpp +++ b/ggml/src/ggml-sycl/dmmv.cpp @@ -47,6 +47,7 @@ static void dequantize_mul_mat_vec(const void * __restrict__ vx, const dfloat * for (int i = 0; i < ncols; i += iter_stride) { const int col = i + vals_per_iter*tid; + if (col >= ncols) break; const int ib = (row*ncols + col)/qk; // x block index const int iqs = (col%qk)/qr; // x quant index const int iybs = col - col%qk; // y block start index @@ -54,6 +55,7 @@ static void dequantize_mul_mat_vec(const void * __restrict__ vx, const dfloat * // processing >2 values per i iter is faster for fast GPUs #pragma unroll for (int j = 0; j < vals_per_iter; j += 2) { + if (col + j >= ncols) break; // process 2 vals per j iter // dequantize