cuda : refactor into multiple files (#6269)
This commit is contained in:
parent
ad3a0505e3
commit
ae1f211ce2
59 changed files with 9154 additions and 8987 deletions
23
Makefile
23
Makefile
|
@ -398,6 +398,7 @@ ifdef LLAMA_CUBLAS
|
|||
MK_CPPFLAGS += -DGGML_USE_CUBLAS -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
|
||||
MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L/usr/lib/wsl/lib
|
||||
OBJS += ggml-cuda.o
|
||||
OBJS += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu))
|
||||
MK_NVCCFLAGS += -use_fast_math
|
||||
ifdef LLAMA_FATAL_WARNINGS
|
||||
MK_NVCCFLAGS += -Werror all-warnings
|
||||
|
@ -458,12 +459,23 @@ endif # LLAMA_CUDA_NO_PEER_COPY
|
|||
ifdef LLAMA_CUDA_CCBIN
|
||||
MK_NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
|
||||
endif
|
||||
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml-common.h
|
||||
|
||||
ifdef JETSON_EOL_MODULE_DETECT
|
||||
define NVCC_COMPILE
|
||||
$(NVCC) -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -O3 $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
|
||||
endef # NVCC_COMPILE
|
||||
else
|
||||
define NVCC_COMPILE
|
||||
$(NVCC) $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
|
||||
endef # NVCC_COMPILE
|
||||
endif # JETSON_EOL_MODULE_DETECT
|
||||
|
||||
ggml-cuda/%.o: ggml-cuda/%.cu ggml-cuda/%.cuh ggml.h ggml-common.h ggml-cuda/common.cuh
|
||||
$(NVCC_COMPILE)
|
||||
|
||||
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/*.cuh)
|
||||
$(NVCC_COMPILE)
|
||||
|
||||
endif # LLAMA_CUBLAS
|
||||
|
||||
ifdef LLAMA_CLBLAST
|
||||
|
@ -510,7 +522,6 @@ ggml-vulkan.o: ggml-vulkan.cpp ggml-vulkan.h
|
|||
endif # LLAMA_VULKAN
|
||||
|
||||
ifdef LLAMA_HIPBLAS
|
||||
|
||||
ifeq ($(wildcard /opt/rocm),)
|
||||
ROCM_PATH ?= /usr
|
||||
GPU_TARGETS ?= $(shell $(shell which amdgpu-arch))
|
||||
|
@ -539,8 +550,13 @@ ifdef LLAMA_CUDA_NO_PEER_COPY
|
|||
HIPFLAGS += -DGGML_CUDA_NO_PEER_COPY
|
||||
endif # LLAMA_CUDA_NO_PEER_COPY
|
||||
OBJS += ggml-cuda.o
|
||||
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
|
||||
OBJS += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu))
|
||||
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/*.cuh)
|
||||
$(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
|
||||
|
||||
ggml-cuda/%.o: ggml-cuda/%.cu ggml-cuda/%.cuh ggml.h ggml-common.h ggml-cuda/common.cuh
|
||||
$(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
|
||||
|
||||
endif # LLAMA_HIPBLAS
|
||||
|
||||
ifdef LLAMA_METAL
|
||||
|
@ -687,6 +703,7 @@ libllama.a: llama.o ggml.o $(OBJS) $(COMMON_DEPS)
|
|||
|
||||
clean:
|
||||
rm -vrf *.o tests/*.o *.so *.a *.dll benchmark-matmult lookup-create lookup-merge lookup-stats common/build-info.cpp *.dot $(COV_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
|
||||
rm -vrf ggml-cuda/*.o
|
||||
find examples pocs -type f -name "*.o" -delete
|
||||
|
||||
#
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue