CUDA: tuned mul_mat_q kernels (#2546)

This commit is contained in:
Johannes Gäßler 2023-08-09 09:42:34 +02:00 committed by GitHub
parent f5bfea0580
commit 25d43e0eb5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 676 additions and 386 deletions

View file

@ -253,11 +253,6 @@ ifdef LLAMA_CUDA_KQUANTS_ITER
else
NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2
endif
ifdef LLAMA_CUDA_MMQ_Y
NVCCFLAGS += -DGGML_CUDA_MMQ_Y=$(LLAMA_CUDA_MMQ_Y)
else
NVCCFLAGS += -DGGML_CUDA_MMQ_Y=64
endif # LLAMA_CUDA_MMQ_Y
#ifdef LLAMA_CUDA_CUBLAS
# NVCCFLAGS += -DGGML_CUDA_CUBLAS
#endif # LLAMA_CUDA_CUBLAS