From 8002d1014f2808c07f735078674cb1473dd483f6 Mon Sep 17 00:00:00 2001 From: slaren Date: Fri, 3 Nov 2023 12:12:08 +0100 Subject: [PATCH] add comment --- ggml-cuda.cu | 1 + 1 file changed, 1 insertion(+) diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 1727f9a05..bdbcca0ca 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -4874,6 +4874,7 @@ static void dequantize_row_q6_K_cuda(const void * vx, dst_t * y, const int k, cu static void dequantize_mul_mat_vec_q4_0_cuda(const void * vx, const dfloat * y, float * dst, const int ncols, const int nrows, cudaStream_t stream) { GGML_ASSERT(ncols % GGML_CUDA_DMMV_X == 0); const int block_num_y = (nrows + GGML_CUDA_MMV_Y - 1) / GGML_CUDA_MMV_Y; + // the number of rows may exceed maximum grid size in the y or z dimensions, use the x dimension instead const dim3 block_nums(block_num_y, 1, 1); const dim3 block_dims(WARP_SIZE, GGML_CUDA_MMV_Y, 1); dequantize_mul_mat_vec