CUDA: quantized KV support for FA vec
This commit is contained in:
parent
10b1e45876
commit
672244a88b
11 changed files with 826 additions and 142 deletions
5
Makefile
5
Makefile
|
@ -493,7 +493,10 @@ ifdef LLAMA_CUDA_NO_PEER_COPY
|
|||
endif # LLAMA_CUDA_NO_PEER_COPY
|
||||
ifdef LLAMA_CUDA_CCBIN
|
||||
MK_NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
|
||||
endif
|
||||
endif # LLAMA_CUDA_CCBIN
|
||||
ifdef LLAMA_CUDA_FA_ALL_QUANTS
|
||||
MK_NVCCFLAGS += -DGGML_CUDA_FA_ALL_QUANTS
|
||||
endif # LLAMA_CUDA_FA_ALL_QUANTS
|
||||
|
||||
ifdef JETSON_EOL_MODULE_DETECT
|
||||
define NVCC_COMPILE
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue