diff --git a/ggml-cuda/mmq.cuh b/ggml-cuda/mmq.cuh index 62df7c45f..dda1e341e 100644 --- a/ggml-cuda/mmq.cuh +++ b/ggml-cuda/mmq.cuh @@ -1124,7 +1124,7 @@ static __global__ void mul_mat_q( for (int i0 = 0; i0 < mmq_y; i0 += WARP_SIZE) { const int i = blockIdx.x*mmq_y + i0 + threadIdx.x; - if (need_check && i > ne0) { + if (need_check && i >= ne0) { continue; }