From 038ed63195888f842ff05ed9a31de96c4b562052 Mon Sep 17 00:00:00 2001 From: JohannesGaessler Date: Sat, 29 Jul 2023 08:03:30 +0200 Subject: [PATCH] Updated Makefile --- Makefile | 5 +++++ ggml-cuda.cu | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index cebcf782e..8068097e7 100644 --- a/Makefile +++ b/Makefile @@ -220,6 +220,11 @@ else ifdef LLAMA_CUDA_DMMV_Y else NVCCFLAGS += -DGGML_CUDA_MMV_Y=1 endif # LLAMA_CUDA_MMV_Y +ifdef LLAMA_CUDA_MMQ_Y + NVCCFLAGS += -DGGML_CUDA_MMQ_Y=$(LLAMA_CUDA_MMQ_Y) +else + NVCCFLAGS += -DGGML_CUDA_MMQ_Y=64 +endif # LLAMA_CUDA_MMQ_Y ifdef LLAMA_CUDA_DMMV_F16 NVCCFLAGS += -DGGML_CUDA_DMMV_F16 endif # LLAMA_CUDA_DMMV_F16 diff --git a/ggml-cuda.cu b/ggml-cuda.cu index f0d783418..9932c3b9d 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -263,8 +263,8 @@ static_assert(sizeof(block_q6_K) == sizeof(ggml_fp16_t) + 13*QK_K/16, "wrong q6_ #define CUDA_DEQUANTIZE_BLOCK_SIZE 256 #ifndef GGML_CUDA_MMQ_Y -#define GGML_CUDA_MMQ_Y 128 -#endif +#define GGML_CUDA_MMQ_Y 64 +#endif // GGML_CUDA_MMQ_Y // dmmv = dequantize_mul_mat_vec #ifndef GGML_CUDA_DMMV_X