mmq implementation for non k-quants
This commit is contained in:
parent
d91f3f0c55
commit
ddb37bf8a0
2 changed files with 784 additions and 265 deletions
2
Makefile
2
Makefile
|
@ -194,7 +194,7 @@ ifdef LLAMA_CUBLAS
|
||||||
CXXFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
|
CXXFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
|
||||||
LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib
|
LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib
|
||||||
OBJS += ggml-cuda.o
|
OBJS += ggml-cuda.o
|
||||||
NVCCFLAGS = --forward-unknown-to-host-compiler
|
NVCCFLAGS = --forward-unknown-to-host-compiler -use_fast_math
|
||||||
ifdef LLAMA_CUDA_NVCC
|
ifdef LLAMA_CUDA_NVCC
|
||||||
NVCC = $(LLAMA_CUDA_NVCC)
|
NVCC = $(LLAMA_CUDA_NVCC)
|
||||||
else
|
else
|
||||||
|
|
1047
ggml-cuda.cu
1047
ggml-cuda.cu
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue