Improve cuBLAS performance by using a memory pool (#1094)
* Improve cuBLAS performance by using a memory pool * Move cuda specific definitions to ggml-cuda.h/cu * Add CXX flags to nvcc * Change memory pool synchronization mechanism to a spin lock General code cleanup
This commit is contained in:
parent
25d7abbd1f
commit
50cb666b8a
4 changed files with 170 additions and 109 deletions
10
Makefile
10
Makefile
|
@ -101,11 +101,13 @@ ifdef LLAMA_OPENBLAS
|
|||
LDFLAGS += -lopenblas
|
||||
endif
|
||||
ifdef LLAMA_CUBLAS
|
||||
CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include
|
||||
LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64
|
||||
OBJS += ggml-cuda.o
|
||||
CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include
|
||||
LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64
|
||||
OBJS += ggml-cuda.o
|
||||
NVCC = nvcc
|
||||
NVCCFLAGS = --forward-unknown-to-host-linker -arch=native
|
||||
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
|
||||
nvcc -arch=native -c -o $@ $<
|
||||
$(NVCC) $(NVCCFLAGS) $(CXXFLAGS) -c $< -o $@
|
||||
endif
|
||||
ifdef LLAMA_GPROF
|
||||
CFLAGS += -pg
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue