Convert vector to f16 for dequantize mul mat vec (#1913)
* Convert vector to f16 for dmmv * compile option * Added compilation option description to README * Changed cmake CUDA_ARCHITECTURES from "OFF" to "native"
This commit is contained in:
parent
b24c3049d9
commit
16b9cd1939
5 changed files with 158 additions and 68 deletions
3
Makefile
3
Makefile
|
@ -169,6 +169,9 @@ ifdef LLAMA_CUDA_DMMV_Y
|
|||
else
|
||||
NVCCFLAGS += -DGGML_CUDA_DMMV_Y=1
|
||||
endif # LLAMA_CUDA_DMMV_Y
|
||||
ifdef LLAMA_CUDA_DMMV_F16
|
||||
NVCCFLAGS += -DGGML_CUDA_DMMV_F16
|
||||
endif # LLAMA_CUDA_DMMV_F16
|
||||
ifdef LLAMA_CUDA_KQUANTS_ITER
|
||||
NVCCFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
|
||||
else
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue