ggml: separate musa into its own section in the Makefile
Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com>
This commit is contained in:
parent
dc2313564d
commit
08318dc178
2 changed files with 115 additions and 46 deletions
155
Makefile
155
Makefile
|
@ -531,21 +531,10 @@ ifndef GGML_NO_ACCELERATE
|
|||
endif
|
||||
endif # GGML_NO_ACCELERATE
|
||||
|
||||
ifdef GGML_MUSA
|
||||
CC := clang
|
||||
CXX := clang++
|
||||
GGML_CUDA := 1
|
||||
MK_CPPFLAGS += -DGGML_USE_MUSA
|
||||
endif
|
||||
|
||||
ifndef GGML_NO_OPENMP
|
||||
MK_CPPFLAGS += -DGGML_USE_OPENMP
|
||||
MK_CFLAGS += -fopenmp
|
||||
MK_CXXFLAGS += -fopenmp
|
||||
ifdef GGML_MUSA
|
||||
MK_CPPFLAGS += -I/usr/lib/llvm-10/include/openmp
|
||||
MK_LDFLAGS += -L/usr/lib/llvm-10/lib
|
||||
endif # GGML_MUSA
|
||||
endif # GGML_NO_OPENMP
|
||||
|
||||
ifdef GGML_OPENBLAS
|
||||
|
@ -601,27 +590,15 @@ else
|
|||
endif # GGML_CUDA_FA_ALL_QUANTS
|
||||
|
||||
ifdef GGML_CUDA
|
||||
ifdef GGML_MUSA
|
||||
ifneq ('', '$(wildcard /opt/musa)')
|
||||
CUDA_PATH ?= /opt/musa
|
||||
else
|
||||
CUDA_PATH ?= /usr/local/musa
|
||||
endif
|
||||
|
||||
MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include
|
||||
MK_LDFLAGS += -lmusa -lmublas -lmusart -lpthread -ldl -lrt -L$(CUDA_PATH)/lib -L/usr/lib64
|
||||
MK_NVCCFLAGS += -x musa -mtgpu --cuda-gpu-arch=mp_21 --cuda-gpu-arch=mp_22
|
||||
ifneq ('', '$(wildcard /opt/cuda)')
|
||||
CUDA_PATH ?= /opt/cuda
|
||||
else
|
||||
ifneq ('', '$(wildcard /opt/cuda)')
|
||||
CUDA_PATH ?= /opt/cuda
|
||||
else
|
||||
CUDA_PATH ?= /usr/local/cuda
|
||||
endif
|
||||
CUDA_PATH ?= /usr/local/cuda
|
||||
endif
|
||||
|
||||
MK_CPPFLAGS += -DGGML_USE_CUDA -DGGML_CUDA_USE_GRAPHS -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
|
||||
MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
|
||||
MK_NVCCFLAGS += -use_fast_math
|
||||
endif # GGML_MUSA
|
||||
MK_CPPFLAGS += -DGGML_USE_CUDA -DGGML_CUDA_USE_GRAPHS -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
|
||||
MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
|
||||
MK_NVCCFLAGS += -use_fast_math
|
||||
|
||||
OBJ_GGML += ggml/src/ggml-cuda/ggml-cuda.o
|
||||
OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
|
||||
|
@ -631,11 +608,9 @@ ifdef LLAMA_FATAL_WARNINGS
|
|||
MK_NVCCFLAGS += -Werror all-warnings
|
||||
endif # LLAMA_FATAL_WARNINGS
|
||||
|
||||
ifndef GGML_MUSA
|
||||
ifndef JETSON_EOL_MODULE_DETECT
|
||||
MK_NVCCFLAGS += --forward-unknown-to-host-compiler
|
||||
endif # JETSON_EOL_MODULE_DETECT
|
||||
endif # GGML_MUSA
|
||||
|
||||
ifdef LLAMA_DEBUG
|
||||
MK_NVCCFLAGS += -lineinfo
|
||||
|
@ -648,11 +623,7 @@ endif # GGML_CUDA_DEBUG
|
|||
ifdef GGML_CUDA_NVCC
|
||||
NVCC = $(CCACHE) $(GGML_CUDA_NVCC)
|
||||
else
|
||||
ifdef GGML_MUSA
|
||||
NVCC = $(CCACHE) mcc
|
||||
else
|
||||
NVCC = $(CCACHE) nvcc
|
||||
endif # GGML_MUSA
|
||||
NVCC = $(CCACHE) nvcc
|
||||
endif # GGML_CUDA_NVCC
|
||||
|
||||
ifdef CUDA_DOCKER_ARCH
|
||||
|
@ -724,15 +695,9 @@ define NVCC_COMPILE
|
|||
$(NVCC) -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUDA -I/usr/local/cuda/include -I/opt/cuda/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -O3 $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
|
||||
endef # NVCC_COMPILE
|
||||
else
|
||||
ifdef GGML_MUSA
|
||||
define NVCC_COMPILE
|
||||
$(NVCC) $(NVCCFLAGS) $(CPPFLAGS) -c $< -o $@
|
||||
endef # NVCC_COMPILE
|
||||
else
|
||||
define NVCC_COMPILE
|
||||
$(NVCC) $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
|
||||
endef # NVCC_COMPILE
|
||||
endif # GGML_MUSA
|
||||
endif # JETSON_EOL_MODULE_DETECT
|
||||
|
||||
ggml/src/ggml-cuda/%.o: \
|
||||
|
@ -874,6 +839,107 @@ ggml/src/ggml-cuda/%.o: \
|
|||
$(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
|
||||
endif # GGML_HIPBLAS
|
||||
|
||||
ifdef GGML_MUSA
|
||||
ifeq ($(wildcard /opt/musa),)
|
||||
MUSA_PATH ?= /usr/local/musa
|
||||
else
|
||||
MUSA_PATH ?= /opt/musa
|
||||
endif
|
||||
MTGPU_TARGETS ?= mp_21 mp_22
|
||||
|
||||
MK_CPPFLAGS += -DGGML_USE_MUSA -DGGML_USE_CUDA
|
||||
MK_LDFLAGS += -L$(MUSA_PATH)/lib -Wl,-rpath=$(MUSA_PATH)/lib
|
||||
MK_LDFLAGS += -lmusa -lmusart -lmublas
|
||||
|
||||
ifndef GGML_NO_OPENMP
|
||||
# For Ubuntu Focal
|
||||
MK_CPPFLAGS += -I/usr/lib/llvm-10/include/openmp
|
||||
MK_LDFLAGS += -L/usr/lib/llvm-10/lib
|
||||
# For Ubuntu Jammy
|
||||
MK_CPPFLAGS += -I/usr/lib/llvm-14/lib/clang/14.0.0/include
|
||||
MK_LDFLAGS += -L/usr/lib/llvm-14/lib
|
||||
endif # GGML_NO_OPENMP
|
||||
|
||||
CC := $(MUSA_PATH)/bin/clang
|
||||
CXX := $(MUSA_PATH)/bin/clang++
|
||||
MCC := $(CCACHE) $(MUSA_PATH)/bin/mcc
|
||||
|
||||
MUSAFLAGS += $(addprefix --cuda-gpu-arch=, $(MTGPU_TARGETS))
|
||||
|
||||
ifdef GGML_CUDA_FORCE_DMMV
|
||||
MUSAFLAGS += -DGGML_CUDA_FORCE_DMMV
|
||||
endif # GGML_CUDA_FORCE_DMMV
|
||||
|
||||
ifdef GGML_CUDA_FORCE_MMQ
|
||||
MUSAFLAGS += -DGGML_CUDA_FORCE_MMQ
|
||||
endif # GGML_CUDA_FORCE_MMQ
|
||||
|
||||
ifdef GGML_CUDA_FORCE_CUBLAS
|
||||
MUSAFLAGS += -DGGML_CUDA_FORCE_CUBLAS
|
||||
endif # GGML_CUDA_FORCE_CUBLAS
|
||||
|
||||
ifdef GGML_CUDA_DMMV_X
|
||||
MUSAFLAGS += -DGGML_CUDA_DMMV_X=$(GGML_CUDA_DMMV_X)
|
||||
else
|
||||
MUSAFLAGS += -DGGML_CUDA_DMMV_X=32
|
||||
endif # GGML_CUDA_DMMV_X
|
||||
|
||||
ifdef GGML_CUDA_MMV_Y
|
||||
MUSAFLAGS += -DGGML_CUDA_MMV_Y=$(GGML_CUDA_MMV_Y)
|
||||
else
|
||||
MUSAFLAGS += -DGGML_CUDA_MMV_Y=1
|
||||
endif # GGML_CUDA_MMV_Y
|
||||
|
||||
ifdef GGML_CUDA_F16
|
||||
MUSAFLAGS += -DGGML_CUDA_F16
|
||||
endif # GGML_CUDA_F16
|
||||
|
||||
ifdef GGML_CUDA_DMMV_F16
|
||||
MUSAFLAGS += -DGGML_CUDA_F16
|
||||
endif # GGML_CUDA_DMMV_F16
|
||||
|
||||
ifdef GGML_CUDA_KQUANTS_ITER
|
||||
MUSAFLAGS += -DK_QUANTS_PER_ITERATION=$(GGML_CUDA_KQUANTS_ITER)
|
||||
else
|
||||
MUSAFLAGS += -DK_QUANTS_PER_ITERATION=2
|
||||
endif
|
||||
|
||||
ifdef GGML_CUDA_PEER_MAX_BATCH_SIZE
|
||||
MUSAFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=$(GGML_CUDA_PEER_MAX_BATCH_SIZE)
|
||||
else
|
||||
MUSAFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128
|
||||
endif # GGML_CUDA_PEER_MAX_BATCH_SIZE
|
||||
|
||||
ifdef GGML_CUDA_NO_PEER_COPY
|
||||
MUSAFLAGS += -DGGML_CUDA_NO_PEER_COPY
|
||||
endif # GGML_CUDA_NO_PEER_COPY
|
||||
|
||||
ifdef GGML_CUDA_FA_ALL_QUANTS
|
||||
MUSAFLAGS += -DGGML_CUDA_FA_ALL_QUANTS
|
||||
endif # GGML_CUDA_FA_ALL_QUANTS
|
||||
|
||||
OBJ_GGML += ggml/src/ggml-cuda/ggml-cuda.o
|
||||
OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
|
||||
OBJ_GGML += $(OBJ_CUDA_TMPL)
|
||||
|
||||
ggml/src/ggml-cuda/ggml-cuda.o: \
|
||||
ggml/src/ggml-cuda/ggml-cuda.cu \
|
||||
ggml/include/ggml-cuda.h \
|
||||
ggml/include/ggml.h \
|
||||
ggml/include/ggml-backend.h \
|
||||
ggml/src/ggml-backend-impl.h \
|
||||
ggml/src/ggml-common.h \
|
||||
$(wildcard ggml/src/ggml-cuda/*.cuh)
|
||||
$(MCC) $(CXXFLAGS) $(MUSAFLAGS) -x musa -mtgpu -c -o $@ $<
|
||||
|
||||
ggml/src/ggml-cuda/%.o: \
|
||||
ggml/src/ggml-cuda/%.cu \
|
||||
ggml/include/ggml.h \
|
||||
ggml/src/ggml-common.h \
|
||||
ggml/src/ggml-cuda/common.cuh
|
||||
$(MCC) $(CXXFLAGS) $(MUSAFLAGS) -x musa -mtgpu -c -o $@ $<
|
||||
endif # GGML_MUSA
|
||||
|
||||
ifdef GGML_METAL
|
||||
MK_CPPFLAGS += -DGGML_USE_METAL
|
||||
MK_LDFLAGS += -framework Foundation -framework Metal -framework MetalKit
|
||||
|
@ -1002,7 +1068,6 @@ $(info I CXX: $(shell $(CXX) --version | head -n 1))
|
|||
ifdef GGML_CUDA
|
||||
$(info I NVCC: $(shell $(NVCC) --version | tail -n 1))
|
||||
CUDA_VERSION := $(shell $(NVCC) --version | grep -oP 'release (\K[0-9]+\.[0-9])')
|
||||
ifndef GGML_MUSA
|
||||
ifeq ($(shell awk -v "v=$(CUDA_VERSION)" 'BEGIN { print (v < 11.7) }'),1)
|
||||
|
||||
ifndef CUDA_DOCKER_ARCH
|
||||
|
@ -1012,7 +1077,6 @@ endif # CUDA_POWER_ARCH
|
|||
endif # CUDA_DOCKER_ARCH
|
||||
|
||||
endif # eq ($(shell echo "$(CUDA_VERSION) < 11.7" | bc),1)
|
||||
endif # GGML_MUSA
|
||||
endif # GGML_CUDA
|
||||
$(info )
|
||||
|
||||
|
@ -1283,6 +1347,7 @@ clean:
|
|||
rm -vrf ggml/src/ggml-rpc/*.o
|
||||
rm -vrf ggml/src/ggml-sycl/*.o
|
||||
rm -vrf ggml/src/ggml-vulkan/*.o
|
||||
rm -vrf ggml/src/ggml-musa/*.o
|
||||
rm -rvf $(BUILD_TARGETS)
|
||||
rm -rvf $(TEST_TARGETS)
|
||||
rm -f vulkan-shaders-gen ggml/src/ggml-vulkan-shaders.hpp ggml/src/ggml-vulkan-shaders.cpp
|
||||
|
|
|
@ -1,5 +1,9 @@
|
|||
if (NOT EXISTS $ENV{MUSA_PATH})
|
||||
set(MUSA_PATH /usr/local/musa)
|
||||
if (NOT EXISTS /opt/musa)
|
||||
set(MUSA_PATH /usr/local/musa)
|
||||
else()
|
||||
set(MUSA_PATH /opt/musa)
|
||||
endif()
|
||||
else()
|
||||
set(MUSA_PATH $ENV{MUSA_PATH})
|
||||
endif()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue