From b7ddc8bf121451aec31ff9a86513b1036a3e1105 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 16 Jan 2024 23:06:18 +0200 Subject: [PATCH] cuda : fix out-of-bounds-access in `mul_mat_vec_q` ggml-ci --- ggml-cuda.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 46d85a8ea..b2211d858 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -5131,10 +5131,10 @@ static __global__ void mul_mat_vec_q(const void * __restrict__ vx, const void * const block_q_t * x = (const block_q_t *) vx; const block_q8_1 * y = (const block_q8_1 *) vy; - for (int i = 0; i < blocks_per_row; i += blocks_per_warp) { - const int ibx = row*blocks_per_row + i + threadIdx.x / (qi/vdr); // x block index + for (int i = threadIdx.x / (qi/vdr); i < blocks_per_row; i += blocks_per_warp) { + const int ibx = row*blocks_per_row + i; // x block index - const int iby = (i + threadIdx.x / (qi/vdr)) * (qk/QK8_1); // y block index that aligns with ibx + const int iby = i * (qk/QK8_1); // y block index that aligns with ibx const int iqs = vdr * (threadIdx.x % (qi/vdr)); // x block quant index when casting the quants to int