mmq implementation for non k-quants
This commit is contained in:
parent
d91f3f0c55
commit
ddb37bf8a0
2 changed files with 784 additions and 265 deletions
2
Makefile
2
Makefile
|
@ -194,7 +194,7 @@ ifdef LLAMA_CUBLAS
|
|||
CXXFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
|
||||
LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib
|
||||
OBJS += ggml-cuda.o
|
||||
NVCCFLAGS = --forward-unknown-to-host-compiler
|
||||
NVCCFLAGS = --forward-unknown-to-host-compiler -use_fast_math
|
||||
ifdef LLAMA_CUDA_NVCC
|
||||
NVCC = $(LLAMA_CUDA_NVCC)
|
||||
else
|
||||
|
|
1047
ggml-cuda.cu
1047
ggml-cuda.cu
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue