mmq implementation for non k-quants

2023-07-08 19:12:39 +02:00 · 2023-07-08 19:12:39 +02:00 · ddb37bf8a0
commit ddb37bf8a0
parent d91f3f0c55
2 changed files with 784 additions and 265 deletions
--- a/2
+++ b/2
@ -194,7 +194,7 @@ ifdef LLAMA_CUBLAS
 	CXXFLAGS  += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
 	LDFLAGS   += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib
 	OBJS      += ggml-cuda.o
-	NVCCFLAGS = --forward-unknown-to-host-compiler
+	NVCCFLAGS = --forward-unknown-to-host-compiler -use_fast_math
 ifdef LLAMA_CUDA_NVCC
 	NVCC = $(LLAMA_CUDA_NVCC)
 else
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu