Convert vector to f16 for dequantize mul mat vec (#1913)

* Convert vector to f16 for dmmv

* compile option

* Added compilation option description to README

* Changed cmake CUDA_ARCHITECTURES from "OFF" to "native"
This commit is contained in:
Johannes Gäßler 2023-06-19 10:23:56 +02:00 committed by GitHub
parent b24c3049d9
commit 16b9cd1939
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 158 additions and 68 deletions

View file

@ -169,6 +169,9 @@ ifdef LLAMA_CUDA_DMMV_Y
else
NVCCFLAGS += -DGGML_CUDA_DMMV_Y=1
endif # LLAMA_CUDA_DMMV_Y
ifdef LLAMA_CUDA_DMMV_F16
NVCCFLAGS += -DGGML_CUDA_DMMV_F16
endif # LLAMA_CUDA_DMMV_F16
ifdef LLAMA_CUDA_KQUANTS_ITER
NVCCFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
else