CUDA: quantized KV support for FA vec

This commit is contained in:
Johannes Gäßler 2024-05-21 19:38:25 +02:00
parent 10b1e45876
commit 672244a88b
11 changed files with 826 additions and 142 deletions

View file

@ -493,7 +493,10 @@ ifdef LLAMA_CUDA_NO_PEER_COPY
endif # LLAMA_CUDA_NO_PEER_COPY
ifdef LLAMA_CUDA_CCBIN
MK_NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
endif
endif # LLAMA_CUDA_CCBIN
ifdef LLAMA_CUDA_FA_ALL_QUANTS
MK_NVCCFLAGS += -DGGML_CUDA_FA_ALL_QUANTS
endif # LLAMA_CUDA_FA_ALL_QUANTS
ifdef JETSON_EOL_MODULE_DETECT
define NVCC_COMPILE