From 08318dc178ba2043ec5a134350c64b527ff5ed5a Mon Sep 17 00:00:00 2001 From: Xiaodong Ye Date: Thu, 14 Nov 2024 22:45:06 +0800 Subject: [PATCH] ggml: separate musa into its own section in the Makefile Signed-off-by: Xiaodong Ye --- Makefile | 155 +++++++++++++++++++++--------- ggml/src/ggml-musa/CMakeLists.txt | 6 +- 2 files changed, 115 insertions(+), 46 deletions(-) diff --git a/Makefile b/Makefile index e67f56e9a..de06cb8b0 100644 --- a/Makefile +++ b/Makefile @@ -531,21 +531,10 @@ ifndef GGML_NO_ACCELERATE endif endif # GGML_NO_ACCELERATE -ifdef GGML_MUSA - CC := clang - CXX := clang++ - GGML_CUDA := 1 - MK_CPPFLAGS += -DGGML_USE_MUSA -endif - ifndef GGML_NO_OPENMP MK_CPPFLAGS += -DGGML_USE_OPENMP MK_CFLAGS += -fopenmp MK_CXXFLAGS += -fopenmp - ifdef GGML_MUSA - MK_CPPFLAGS += -I/usr/lib/llvm-10/include/openmp - MK_LDFLAGS += -L/usr/lib/llvm-10/lib - endif # GGML_MUSA endif # GGML_NO_OPENMP ifdef GGML_OPENBLAS @@ -601,27 +590,15 @@ else endif # GGML_CUDA_FA_ALL_QUANTS ifdef GGML_CUDA - ifdef GGML_MUSA - ifneq ('', '$(wildcard /opt/musa)') - CUDA_PATH ?= /opt/musa - else - CUDA_PATH ?= /usr/local/musa - endif - - MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include - MK_LDFLAGS += -lmusa -lmublas -lmusart -lpthread -ldl -lrt -L$(CUDA_PATH)/lib -L/usr/lib64 - MK_NVCCFLAGS += -x musa -mtgpu --cuda-gpu-arch=mp_21 --cuda-gpu-arch=mp_22 + ifneq ('', '$(wildcard /opt/cuda)') + CUDA_PATH ?= /opt/cuda else - ifneq ('', '$(wildcard /opt/cuda)') - CUDA_PATH ?= /opt/cuda - else - CUDA_PATH ?= /usr/local/cuda - endif + CUDA_PATH ?= /usr/local/cuda + endif - MK_CPPFLAGS += -DGGML_USE_CUDA -DGGML_CUDA_USE_GRAPHS -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include - MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib - MK_NVCCFLAGS += -use_fast_math - endif # GGML_MUSA + MK_CPPFLAGS += -DGGML_USE_CUDA -DGGML_CUDA_USE_GRAPHS -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include + MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib + MK_NVCCFLAGS += -use_fast_math OBJ_GGML += ggml/src/ggml-cuda/ggml-cuda.o OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu)) @@ -631,11 +608,9 @@ ifdef LLAMA_FATAL_WARNINGS MK_NVCCFLAGS += -Werror all-warnings endif # LLAMA_FATAL_WARNINGS -ifndef GGML_MUSA ifndef JETSON_EOL_MODULE_DETECT MK_NVCCFLAGS += --forward-unknown-to-host-compiler endif # JETSON_EOL_MODULE_DETECT -endif # GGML_MUSA ifdef LLAMA_DEBUG MK_NVCCFLAGS += -lineinfo @@ -648,11 +623,7 @@ endif # GGML_CUDA_DEBUG ifdef GGML_CUDA_NVCC NVCC = $(CCACHE) $(GGML_CUDA_NVCC) else - ifdef GGML_MUSA - NVCC = $(CCACHE) mcc - else - NVCC = $(CCACHE) nvcc - endif # GGML_MUSA + NVCC = $(CCACHE) nvcc endif # GGML_CUDA_NVCC ifdef CUDA_DOCKER_ARCH @@ -724,15 +695,9 @@ define NVCC_COMPILE $(NVCC) -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUDA -I/usr/local/cuda/include -I/opt/cuda/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -O3 $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@ endef # NVCC_COMPILE else - ifdef GGML_MUSA -define NVCC_COMPILE - $(NVCC) $(NVCCFLAGS) $(CPPFLAGS) -c $< -o $@ -endef # NVCC_COMPILE - else define NVCC_COMPILE $(NVCC) $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@ endef # NVCC_COMPILE - endif # GGML_MUSA endif # JETSON_EOL_MODULE_DETECT ggml/src/ggml-cuda/%.o: \ @@ -874,6 +839,107 @@ ggml/src/ggml-cuda/%.o: \ $(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $< endif # GGML_HIPBLAS +ifdef GGML_MUSA + ifeq ($(wildcard /opt/musa),) + MUSA_PATH ?= /usr/local/musa + else + MUSA_PATH ?= /opt/musa + endif + MTGPU_TARGETS ?= mp_21 mp_22 + + MK_CPPFLAGS += -DGGML_USE_MUSA -DGGML_USE_CUDA + MK_LDFLAGS += -L$(MUSA_PATH)/lib -Wl,-rpath=$(MUSA_PATH)/lib + MK_LDFLAGS += -lmusa -lmusart -lmublas + + ifndef GGML_NO_OPENMP + # For Ubuntu Focal + MK_CPPFLAGS += -I/usr/lib/llvm-10/include/openmp + MK_LDFLAGS += -L/usr/lib/llvm-10/lib + # For Ubuntu Jammy + MK_CPPFLAGS += -I/usr/lib/llvm-14/lib/clang/14.0.0/include + MK_LDFLAGS += -L/usr/lib/llvm-14/lib + endif # GGML_NO_OPENMP + + CC := $(MUSA_PATH)/bin/clang + CXX := $(MUSA_PATH)/bin/clang++ + MCC := $(CCACHE) $(MUSA_PATH)/bin/mcc + + MUSAFLAGS += $(addprefix --cuda-gpu-arch=, $(MTGPU_TARGETS)) + +ifdef GGML_CUDA_FORCE_DMMV + MUSAFLAGS += -DGGML_CUDA_FORCE_DMMV +endif # GGML_CUDA_FORCE_DMMV + +ifdef GGML_CUDA_FORCE_MMQ + MUSAFLAGS += -DGGML_CUDA_FORCE_MMQ +endif # GGML_CUDA_FORCE_MMQ + +ifdef GGML_CUDA_FORCE_CUBLAS + MUSAFLAGS += -DGGML_CUDA_FORCE_CUBLAS +endif # GGML_CUDA_FORCE_CUBLAS + +ifdef GGML_CUDA_DMMV_X + MUSAFLAGS += -DGGML_CUDA_DMMV_X=$(GGML_CUDA_DMMV_X) +else + MUSAFLAGS += -DGGML_CUDA_DMMV_X=32 +endif # GGML_CUDA_DMMV_X + +ifdef GGML_CUDA_MMV_Y + MUSAFLAGS += -DGGML_CUDA_MMV_Y=$(GGML_CUDA_MMV_Y) +else + MUSAFLAGS += -DGGML_CUDA_MMV_Y=1 +endif # GGML_CUDA_MMV_Y + +ifdef GGML_CUDA_F16 + MUSAFLAGS += -DGGML_CUDA_F16 +endif # GGML_CUDA_F16 + +ifdef GGML_CUDA_DMMV_F16 + MUSAFLAGS += -DGGML_CUDA_F16 +endif # GGML_CUDA_DMMV_F16 + +ifdef GGML_CUDA_KQUANTS_ITER + MUSAFLAGS += -DK_QUANTS_PER_ITERATION=$(GGML_CUDA_KQUANTS_ITER) +else + MUSAFLAGS += -DK_QUANTS_PER_ITERATION=2 +endif + +ifdef GGML_CUDA_PEER_MAX_BATCH_SIZE + MUSAFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=$(GGML_CUDA_PEER_MAX_BATCH_SIZE) +else + MUSAFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 +endif # GGML_CUDA_PEER_MAX_BATCH_SIZE + +ifdef GGML_CUDA_NO_PEER_COPY + MUSAFLAGS += -DGGML_CUDA_NO_PEER_COPY +endif # GGML_CUDA_NO_PEER_COPY + +ifdef GGML_CUDA_FA_ALL_QUANTS + MUSAFLAGS += -DGGML_CUDA_FA_ALL_QUANTS +endif # GGML_CUDA_FA_ALL_QUANTS + + OBJ_GGML += ggml/src/ggml-cuda/ggml-cuda.o + OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu)) + OBJ_GGML += $(OBJ_CUDA_TMPL) + +ggml/src/ggml-cuda/ggml-cuda.o: \ + ggml/src/ggml-cuda/ggml-cuda.cu \ + ggml/include/ggml-cuda.h \ + ggml/include/ggml.h \ + ggml/include/ggml-backend.h \ + ggml/src/ggml-backend-impl.h \ + ggml/src/ggml-common.h \ + $(wildcard ggml/src/ggml-cuda/*.cuh) + $(MCC) $(CXXFLAGS) $(MUSAFLAGS) -x musa -mtgpu -c -o $@ $< + +ggml/src/ggml-cuda/%.o: \ + ggml/src/ggml-cuda/%.cu \ + ggml/include/ggml.h \ + ggml/src/ggml-common.h \ + ggml/src/ggml-cuda/common.cuh + $(MCC) $(CXXFLAGS) $(MUSAFLAGS) -x musa -mtgpu -c -o $@ $< +endif # GGML_MUSA + ifdef GGML_METAL MK_CPPFLAGS += -DGGML_USE_METAL MK_LDFLAGS += -framework Foundation -framework Metal -framework MetalKit @@ -1002,7 +1068,6 @@ $(info I CXX: $(shell $(CXX) --version | head -n 1)) ifdef GGML_CUDA $(info I NVCC: $(shell $(NVCC) --version | tail -n 1)) CUDA_VERSION := $(shell $(NVCC) --version | grep -oP 'release (\K[0-9]+\.[0-9])') -ifndef GGML_MUSA ifeq ($(shell awk -v "v=$(CUDA_VERSION)" 'BEGIN { print (v < 11.7) }'),1) ifndef CUDA_DOCKER_ARCH @@ -1012,7 +1077,6 @@ endif # CUDA_POWER_ARCH endif # CUDA_DOCKER_ARCH endif # eq ($(shell echo "$(CUDA_VERSION) < 11.7" | bc),1) -endif # GGML_MUSA endif # GGML_CUDA $(info ) @@ -1283,6 +1347,7 @@ clean: rm -vrf ggml/src/ggml-rpc/*.o rm -vrf ggml/src/ggml-sycl/*.o rm -vrf ggml/src/ggml-vulkan/*.o + rm -vrf ggml/src/ggml-musa/*.o rm -rvf $(BUILD_TARGETS) rm -rvf $(TEST_TARGETS) rm -f vulkan-shaders-gen ggml/src/ggml-vulkan-shaders.hpp ggml/src/ggml-vulkan-shaders.cpp diff --git a/ggml/src/ggml-musa/CMakeLists.txt b/ggml/src/ggml-musa/CMakeLists.txt index 5e9f32f3c..8edc75cc5 100644 --- a/ggml/src/ggml-musa/CMakeLists.txt +++ b/ggml/src/ggml-musa/CMakeLists.txt @@ -1,5 +1,9 @@ if (NOT EXISTS $ENV{MUSA_PATH}) - set(MUSA_PATH /usr/local/musa) + if (NOT EXISTS /opt/musa) + set(MUSA_PATH /usr/local/musa) + else() + set(MUSA_PATH /opt/musa) + endif() else() set(MUSA_PATH $ENV{MUSA_PATH}) endif()