CUDA: tuned mul_mat_q kernels (#2546)
This commit is contained in:
parent
f5bfea0580
commit
25d43e0eb5
3 changed files with 676 additions and 386 deletions
5
Makefile
5
Makefile
|
@ -253,11 +253,6 @@ ifdef LLAMA_CUDA_KQUANTS_ITER
|
|||
else
|
||||
NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2
|
||||
endif
|
||||
ifdef LLAMA_CUDA_MMQ_Y
|
||||
NVCCFLAGS += -DGGML_CUDA_MMQ_Y=$(LLAMA_CUDA_MMQ_Y)
|
||||
else
|
||||
NVCCFLAGS += -DGGML_CUDA_MMQ_Y=64
|
||||
endif # LLAMA_CUDA_MMQ_Y
|
||||
#ifdef LLAMA_CUDA_CUBLAS
|
||||
# NVCCFLAGS += -DGGML_CUDA_CUBLAS
|
||||
#endif # LLAMA_CUDA_CUBLAS
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue