cuda : improve cuda pool efficiency using virtual memory (#4606)
* cuda : improve cuda pool efficiency using virtual memory * fix mixtral * fix cmake build * check for vmm support, disable for hip ggml-ci * fix hip build * clarify granularity * move all caps to g_device_caps * refactor error checking * add cuda_pool_alloc, refactor most pool allocations ggml-ci * fix hip build * CUBLAS_TF32_TENSOR_OP_MATH is not a macro * more hip crap * llama : fix msvc warnings * ggml : fix msvc warnings * minor * minor * cuda : fallback to CPU on host buffer alloc fail * Update ggml-cuda.cu Co-authored-by: Johannes Gäßler <johannesg@5d6.de> * Update ggml-cuda.cu Co-authored-by: Johannes Gäßler <johannesg@5d6.de> * ensure allocations are always aligned * act_size -> actual_size --------- Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
This commit is contained in:
parent
708e179e85
commit
5bf3953d7e
8 changed files with 328 additions and 208 deletions
6
Makefile
6
Makefile
|
@ -367,17 +367,15 @@ endif # LLAMA_BLIS
|
|||
|
||||
ifdef LLAMA_CUBLAS
|
||||
MK_CPPFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include
|
||||
MK_LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib
|
||||
MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib
|
||||
OBJS += ggml-cuda.o
|
||||
MK_NVCCFLAGS = -use_fast_math
|
||||
ifndef JETSON_EOL_MODULE_DETECT
|
||||
MK_NVCCFLAGS += --forward-unknown-to-host-compiler
|
||||
endif # JETSON_EOL_MODULE_DETECT
|
||||
|
||||
ifdef LLAMA_DEBUG
|
||||
MK_NVCCFLAGS += -lineinfo
|
||||
endif
|
||||
|
||||
endif # LLAMA_DEBUG
|
||||
ifdef LLAMA_CUDA_NVCC
|
||||
NVCC = $(LLAMA_CUDA_NVCC)
|
||||
else
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue