From 9b28f3413b8a4aea937cd1ea14875e58d2a5bec3 Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Mon, 11 Dec 2023 14:14:48 -0500 Subject: [PATCH 01/11] make : simplify nvcc flags CUDA_CXXFLAGS and HOST_CXXFLAGS can no longer be overridden via the command line, but NVCCFLAGS now can. --- Makefile | 82 ++++++++++++++++++++++++++------------------------------ 1 file changed, 38 insertions(+), 44 deletions(-) diff --git a/Makefile b/Makefile index a1a6cae54..570419268 100644 --- a/Makefile +++ b/Makefile @@ -121,12 +121,12 @@ MK_CXXFLAGS = -std=c++11 -fPIC # -Ofast tends to produce faster code, but may not be available for some compilers. ifdef LLAMA_FAST -MK_CFLAGS += -Ofast -MK_HOST_CXXFLAGS += -Ofast -MK_CUDA_CXXFLAGS += -O3 +MK_CFLAGS += -Ofast +HOST_CXXFLAGS += -Ofast +MK_NVCCFLAGS += -O3 else -MK_CFLAGS += -O3 -MK_CXXFLAGS += -O3 +MK_CFLAGS += -O3 +MK_CXXFLAGS += -O3 endif # clock_gettime came in POSIX.1b (1993) @@ -222,8 +222,8 @@ MK_CXXFLAGS += $(WARN_FLAGS) -Wmissing-declarations -Wmissing-noreturn ifeq ($(CC_IS_CLANG), 1) # clang options - MK_CFLAGS += -Wunreachable-code-break -Wunreachable-code-return - MK_HOST_CXXFLAGS += -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi + MK_CFLAGS += -Wunreachable-code-break -Wunreachable-code-return + HOST_CXXFLAGS += -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi ifneq '' '$(and $(CC_IS_LLVM_CLANG),$(filter 1,$(shell expr $(CC_VER) \>= 030800)))' MK_CFLAGS += -Wdouble-promotion @@ -233,14 +233,14 @@ ifeq ($(CC_IS_CLANG), 1) endif else # gcc options - MK_CFLAGS += -Wdouble-promotion - MK_HOST_CXXFLAGS += -Wno-array-bounds + MK_CFLAGS += -Wdouble-promotion + HOST_CXXFLAGS += -Wno-array-bounds ifeq ($(shell expr $(CC_VER) \>= 070100), 1) - MK_HOST_CXXFLAGS += -Wno-format-truncation + HOST_CXXFLAGS += -Wno-format-truncation endif ifeq ($(shell expr $(CC_VER) \>= 080100), 1) - MK_HOST_CXXFLAGS += -Wextra-semi + HOST_CXXFLAGS += -Wextra-semi endif endif @@ -294,8 +294,8 @@ ifndef RISCV ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64)) # Use all CPU extensions that are available: - MK_CFLAGS += -march=native -mtune=native - MK_HOST_CXXFLAGS += -march=native -mtune=native + MK_CFLAGS += -march=native -mtune=native + HOST_CXXFLAGS += -march=native -mtune=native # Usage AVX-only #MK_CFLAGS += -mfma -mf16c -mavx @@ -395,61 +395,59 @@ ifdef LLAMA_CUBLAS MK_CPPFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include MK_LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib OBJS += ggml-cuda.o - NVCCFLAGS = --forward-unknown-to-host-compiler -use_fast_math + MK_NVCCFLAGS = --forward-unknown-to-host-compiler -use_fast_math ifdef LLAMA_CUDA_NVCC NVCC = $(LLAMA_CUDA_NVCC) else NVCC = nvcc endif #LLAMA_CUDA_NVCC ifdef CUDA_DOCKER_ARCH - NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH) -else ifdef CUDA_POWER_ARCH - NVCCFLAGS += -else - NVCCFLAGS += -arch=native + MK_NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH) +else ifndef CUDA_POWER_ARCH + MK_NVCCFLAGS += -arch=native endif # CUDA_DOCKER_ARCH ifdef LLAMA_CUDA_FORCE_DMMV - NVCCFLAGS += -DGGML_CUDA_FORCE_DMMV + MK_NVCCFLAGS += -DGGML_CUDA_FORCE_DMMV endif # LLAMA_CUDA_FORCE_DMMV ifdef LLAMA_CUDA_FORCE_MMQ - NVCCFLAGS += -DGGML_CUDA_FORCE_MMQ + MK_NVCCFLAGS += -DGGML_CUDA_FORCE_MMQ endif # LLAMA_CUDA_FORCE_MMQ ifdef LLAMA_CUDA_DMMV_X - NVCCFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X) + MK_NVCCFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X) else - NVCCFLAGS += -DGGML_CUDA_DMMV_X=32 + MK_NVCCFLAGS += -DGGML_CUDA_DMMV_X=32 endif # LLAMA_CUDA_DMMV_X ifdef LLAMA_CUDA_MMV_Y - NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y) + MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y) else ifdef LLAMA_CUDA_DMMV_Y - NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_DMMV_Y) # for backwards compatibility + MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_DMMV_Y) # for backwards compatibility else - NVCCFLAGS += -DGGML_CUDA_MMV_Y=1 + MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=1 endif # LLAMA_CUDA_MMV_Y ifdef LLAMA_CUDA_F16 - NVCCFLAGS += -DGGML_CUDA_F16 + MK_NVCCFLAGS += -DGGML_CUDA_F16 endif # LLAMA_CUDA_F16 ifdef LLAMA_CUDA_DMMV_F16 - NVCCFLAGS += -DGGML_CUDA_F16 + MK_NVCCFLAGS += -DGGML_CUDA_F16 endif # LLAMA_CUDA_DMMV_F16 ifdef LLAMA_CUDA_KQUANTS_ITER - NVCCFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER) + MK_NVCCFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER) else - NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2 + MK_NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2 endif ifdef LLAMA_CUDA_PEER_MAX_BATCH_SIZE - NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=$(LLAMA_CUDA_PEER_MAX_BATCH_SIZE) + MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=$(LLAMA_CUDA_PEER_MAX_BATCH_SIZE) else - NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 + MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 endif # LLAMA_CUDA_PEER_MAX_BATCH_SIZE #ifdef LLAMA_CUDA_CUBLAS -# NVCCFLAGS += -DGGML_CUDA_CUBLAS +# MK_NVCCFLAGS += -DGGML_CUDA_CUBLAS #endif # LLAMA_CUDA_CUBLAS ifdef LLAMA_CUDA_CCBIN - NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN) + MK_NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN) endif ggml-cuda.o: ggml-cuda.cu ggml-cuda.h - $(NVCC) $(NVCCFLAGS) -c $< -o $@ + $(NVCC) $(BASE_CXXFLAGS) $(NVCCFLAGS) -Wno-pedantic -Xcompiler "$(HOST_CXXFLAGS)" -c $< -o $@ endif # LLAMA_CUBLAS ifdef LLAMA_CLBLAST @@ -512,15 +510,11 @@ ggml-mpi.o: ggml-mpi.c ggml-mpi.h endif # LLAMA_MPI # combine build flags with cmdline overrides -override CFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS) $(MK_CFLAGS) $(CFLAGS) -override CXXFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS) $(MK_CXXFLAGS) $(CXXFLAGS) -override CUDA_CXXFLAGS := $(MK_CUDA_CXXFLAGS) $(CUDA_CXXFLAGS) -override HOST_CXXFLAGS := $(MK_HOST_CXXFLAGS) $(HOST_CXXFLAGS) -override LDFLAGS := $(MK_LDFLAGS) $(LDFLAGS) - -# save CXXFLAGS before we add host-only options -NVCCFLAGS := $(NVCCFLAGS) $(CXXFLAGS) $(CUDA_CXXFLAGS) -Wno-pedantic -Xcompiler "$(HOST_CXXFLAGS)" -override CXXFLAGS += $(HOST_CXXFLAGS) +override CFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS) $(MK_CFLAGS) $(CFLAGS) +BASE_CXXFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS) $(MK_CXXFLAGS) $(CXXFLAGS) +override CXXFLAGS := $(BASE_CXXFLAGS) $(HOST_CXXFLAGS) +override NVCCFLAGS := $(MK_NVCCFLAGS) $(NVCCFLAGS) +override LDFLAGS := $(MK_LDFLAGS) $(LDFLAGS) # # Print build information From 91df2623d7f80e605d3083187dbe12f775c8ef8f Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Mon, 11 Dec 2023 15:09:56 -0500 Subject: [PATCH 02/11] make : detect host compiler and cuda compiler separately --- Makefile | 51 +++++++++----------------------------------- scripts/get_flags.mk | 37 ++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 41 deletions(-) create mode 100644 scripts/get_flags.mk diff --git a/Makefile b/Makefile index 570419268..e9baa4866 100644 --- a/Makefile +++ b/Makefile @@ -26,20 +26,6 @@ ifndef UNAME_M UNAME_M := $(shell uname -m) endif -ifeq '' '$(findstring clang,$(shell $(CC) --version))' - CC_IS_GCC=1 - CC_VER := $(shell $(CC) -dumpfullversion -dumpversion | awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }') -else - CC_IS_CLANG=1 - ifeq '' '$(findstring Apple,$(shell $(CC) --version))' - CC_IS_LLVM_CLANG=1 - else - CC_IS_APPLE_CLANG=1 - endif - CC_VER := $(shell $(CC) --version | sed -n 's/^.* version \([0-9.]*\).*$$/\1/p' \ - | awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }') -endif - # Mac OS + Arm can report x86_64 # ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789 ifeq ($(UNAME_S),Darwin) @@ -220,30 +206,6 @@ MK_CFLAGS += $(WARN_FLAGS) -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmis -Werror=implicit-function-declaration MK_CXXFLAGS += $(WARN_FLAGS) -Wmissing-declarations -Wmissing-noreturn -ifeq ($(CC_IS_CLANG), 1) - # clang options - MK_CFLAGS += -Wunreachable-code-break -Wunreachable-code-return - HOST_CXXFLAGS += -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi - - ifneq '' '$(and $(CC_IS_LLVM_CLANG),$(filter 1,$(shell expr $(CC_VER) \>= 030800)))' - MK_CFLAGS += -Wdouble-promotion - endif - ifneq '' '$(and $(CC_IS_APPLE_CLANG),$(filter 1,$(shell expr $(CC_VER) \>= 070300)))' - MK_CFLAGS += -Wdouble-promotion - endif -else - # gcc options - MK_CFLAGS += -Wdouble-promotion - HOST_CXXFLAGS += -Wno-array-bounds - - ifeq ($(shell expr $(CC_VER) \>= 070100), 1) - HOST_CXXFLAGS += -Wno-format-truncation - endif - ifeq ($(shell expr $(CC_VER) \>= 080100), 1) - HOST_CXXFLAGS += -Wextra-semi - endif -endif - # this version of Apple ld64 is buggy ifneq '' '$(findstring dyld-1015.7,$(shell $(CC) $(LDFLAGS) -Wl,-v 2>&1))' MK_CPPFLAGS += -DHAVE_BUGGY_APPLE_LINKER @@ -392,6 +354,10 @@ ifdef LLAMA_BLIS endif # LLAMA_BLIS ifdef LLAMA_CUBLAS + GF_CC := nvcc 2>/dev/null .c -Xcompiler + include scripts/get_flags.mk + CUDA_CXXFLAGS := $(GF_CXXFLAGS) + MK_CPPFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include MK_LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib OBJS += ggml-cuda.o @@ -447,7 +413,7 @@ ifdef LLAMA_CUDA_CCBIN MK_NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN) endif ggml-cuda.o: ggml-cuda.cu ggml-cuda.h - $(NVCC) $(BASE_CXXFLAGS) $(NVCCFLAGS) -Wno-pedantic -Xcompiler "$(HOST_CXXFLAGS)" -c $< -o $@ + $(NVCC) $(BASE_CXXFLAGS) $(NVCCFLAGS) -Wno-pedantic -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@ endif # LLAMA_CUBLAS ifdef LLAMA_CLBLAST @@ -509,10 +475,13 @@ ggml-mpi.o: ggml-mpi.c ggml-mpi.h $(CC) $(CFLAGS) -c $< -o $@ endif # LLAMA_MPI +GF_CC := $(CC) +include scripts/get_flags.mk + # combine build flags with cmdline overrides -override CFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS) $(MK_CFLAGS) $(CFLAGS) +override CFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS) $(MK_CFLAGS) $(GF_CFLAGS) $(CFLAGS) BASE_CXXFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS) $(MK_CXXFLAGS) $(CXXFLAGS) -override CXXFLAGS := $(BASE_CXXFLAGS) $(HOST_CXXFLAGS) +override CXXFLAGS := $(BASE_CXXFLAGS) $(HOST_CXXFLAGS) $(GF_CXXFLAGS) override NVCCFLAGS := $(MK_NVCCFLAGS) $(NVCCFLAGS) override LDFLAGS := $(MK_LDFLAGS) $(LDFLAGS) diff --git a/scripts/get_flags.mk b/scripts/get_flags.mk new file mode 100644 index 000000000..94be8d36f --- /dev/null +++ b/scripts/get_flags.mk @@ -0,0 +1,37 @@ +ifeq '' '$(findstring clang,$(shell $(GF_CC) --version))' + GF_CC_IS_GCC = 1 + GF_CC_VER := $(shell { $(GF_CC) -dumpfullversion 2>/dev/null || $(GF_CC) -dumpversion; } | awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }') +else + GF_CC_IS_CLANG = 1 + ifeq '' '$(findstring Apple,$(shell $(GF_CC) --version))' + GF_CC_IS_LLVM_CLANG = 1 + else + GF_CC_IS_APPLE_CLANG = 1 + endif + GF_CC_VER := $(shell $(GF_CC) --version | sed -n 's/^.* version \([0-9.]*\).*$$/\1/p' \ + | awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }') +endif + +ifeq ($(GF_CC_IS_CLANG), 1) + # clang options + GF_CFLAGS = -Wunreachable-code-break -Wunreachable-code-return + GF_CXXFLAGS = -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi + + ifneq '' '$(and $(GF_CC_IS_LLVM_CLANG),$(filter 1,$(shell expr $(GF_CC_VER) \>= 030800)))' + GF_CFLAGS += -Wdouble-promotion + endif + ifneq '' '$(and $(GF_CC_IS_APPLE_CLANG),$(filter 1,$(shell expr $(GF_CC_VER) \>= 070300)))' + GF_CFLAGS += -Wdouble-promotion + endif +else + # gcc options + GF_CFLAGS = -Wdouble-promotion + GF_CXXFLAGS = -Wno-array-bounds + + ifeq ($(shell expr $(GF_CC_VER) \>= 070100), 1) + GF_CXXFLAGS += -Wno-format-truncation + endif + ifeq ($(shell expr $(GF_CC_VER) \>= 080100), 1) + GF_CXXFLAGS += -Wextra-semi + endif +endif From 93ca80fa3a6b9abb23b19a3646b67adcfa1c760a Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Mon, 11 Dec 2023 15:17:07 -0500 Subject: [PATCH 03/11] make editorconfig checker happy --- .editorconfig | 3 +++ scripts/get_flags.mk | 5 +++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.editorconfig b/.editorconfig index f8245b85c..a56e9ccc8 100644 --- a/.editorconfig +++ b/.editorconfig @@ -15,6 +15,9 @@ indent_size = 4 [Makefile] indent_style = tab +[scripts/*.mk] +indent_style = tab + [prompts/*.txt] insert_final_newline = unset diff --git a/scripts/get_flags.mk b/scripts/get_flags.mk index 94be8d36f..596d7ead1 100644 --- a/scripts/get_flags.mk +++ b/scripts/get_flags.mk @@ -8,8 +8,9 @@ else else GF_CC_IS_APPLE_CLANG = 1 endif - GF_CC_VER := $(shell $(GF_CC) --version | sed -n 's/^.* version \([0-9.]*\).*$$/\1/p' \ - | awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }') + GF_CC_VER := \ + $(shell $(GF_CC) --version | sed -n 's/^.* version \([0-9.]*\).*$$/\1/p' \ + | awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }') endif ifeq ($(GF_CC_IS_CLANG), 1) From 88781479f1e9c7d9f6355ac6d78e563aec320716 Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Mon, 11 Dec 2023 16:42:22 -0500 Subject: [PATCH 04/11] make : honor NVCC, LLAMA_CUDA_CCBIN, NVCCFLAGS --- Makefile | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index e9baa4866..25c0a2a86 100644 --- a/Makefile +++ b/Makefile @@ -354,10 +354,6 @@ ifdef LLAMA_BLIS endif # LLAMA_BLIS ifdef LLAMA_CUBLAS - GF_CC := nvcc 2>/dev/null .c -Xcompiler - include scripts/get_flags.mk - CUDA_CXXFLAGS := $(GF_CXXFLAGS) - MK_CPPFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include MK_LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib OBJS += ggml-cuda.o @@ -485,6 +481,13 @@ override CXXFLAGS := $(BASE_CXXFLAGS) $(HOST_CXXFLAGS) $(GF_CXXFLAGS) override NVCCFLAGS := $(MK_NVCCFLAGS) $(NVCCFLAGS) override LDFLAGS := $(MK_LDFLAGS) $(LDFLAGS) +# identify CUDA host compiler +ifdef LLAMA_CUBLAS +GF_CC := $(NVCC) $(NVCCFLAGS) 2>/dev/null .c -Xcompiler +include scripts/get_flags.mk +CUDA_CXXFLAGS := $(GF_CXXFLAGS) +endif + # # Print build information # From abacb27868cff3e865e22d2d9785f515bbc3eba5 Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Mon, 11 Dec 2023 17:13:19 -0500 Subject: [PATCH 05/11] cmake : silence linker check stdout --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 78de2dd1a..93cb6db09 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -471,6 +471,7 @@ endif() execute_process( COMMAND ${CMAKE_C_COMPILER} ${CMAKE_EXE_LINKER_FLAGS} -Wl,-v ERROR_VARIABLE output + OUTPUT_QUIET ) if (output MATCHES "dyld-1015\.7") add_compile_definitions(HAVE_BUGGY_APPLE_LINKER) From a81a34add0ffa2e9124ade12ff45882a9efd11dd Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Mon, 11 Dec 2023 17:12:37 -0500 Subject: [PATCH 06/11] cmake : detect host compiler and cuda compiler separately --- CMakeLists.txt | 103 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 71 insertions(+), 32 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 93cb6db09..d0117ea6e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -397,54 +397,93 @@ if (LLAMA_HIPBLAS) endif() endif() +function(get_flags ccid ccver) + set(c_flags "") + set(cxx_flags "") + + if (ccid MATCHES "Clang") + set(c_flags -Wunreachable-code-break -Wunreachable-code-return) + set(cxx_flags -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi) + + if ( + (ccid STREQUAL "Clang" AND ccver VERSION_GREATER_EQUAL 3.8.0) OR + (ccid STREQUAL "AppleClang" AND ccver VERSION_GREATER_EQUAL 7.3.0) + ) + set(c_flags ${c_flags} -Wdouble-promotion) + endif() + elseif (ccid STREQUAL "GNU") + set(c_flags -Wdouble-promotion) + set(cxx_flags -Wno-array-bounds) + + if (ccver VERSION_GREATER_EQUAL 7.1.0) + set(cxx_flags ${cxx_flags} -Wno-format-truncation) + endif() + if (ccver VERSION_GREATER_EQUAL 8.1.0) + set(cxx_flags ${cxx_flags} -Wextra-semi) + endif() + endif() + + set(gf_c_flags ${c_flags} PARENT_SCOPE) + set(gf_cxx_flags ${cxx_flags} PARENT_SCOPE) +endfunction() + if (LLAMA_ALL_WARNINGS) if (NOT MSVC) set(warning_flags -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function) set(c_flags -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration) set(cxx_flags -Wmissing-declarations -Wmissing-noreturn) - set(host_cxx_flags "") - if (CMAKE_C_COMPILER_ID MATCHES "Clang") - set(warning_flags ${warning_flags} -Wunreachable-code-break -Wunreachable-code-return) - set(host_cxx_flags ${host_cxx_flags} -Wmissing-prototypes -Wextra-semi) + get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}) - if ( - (CMAKE_C_COMPILER_ID STREQUAL "Clang" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 3.8.0) OR - (CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 7.3.0) - ) - set(c_flags ${c_flags} -Wdouble-promotion) - endif() - elseif (CMAKE_C_COMPILER_ID STREQUAL "GNU") - set(c_flags ${c_flags} -Wdouble-promotion) - set(host_cxx_flags ${host_cxx_flags} -Wno-array-bounds) - - if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 7.1.0) - set(host_cxx_flags ${host_cxx_flags} -Wno-format-truncation) - endif() - if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 8.1.0) - set(host_cxx_flags ${host_cxx_flags} -Wextra-semi) - endif() - endif() + set(c_flags ${c_flags} ${warning_flags}) + set(cxx_flags ${cxx_flags} ${warning_flags}) + add_compile_options("$<$:${c_flags} ${gf_c_flags}>" + "$<$:${cxx_flags} ${gf_cxx_flags}>") else() # todo : msvc + set(c_flags "") + set(cxx_flags "") endif() - - set(c_flags ${c_flags} ${warning_flags}) - set(cxx_flags ${cxx_flags} ${warning_flags}) - add_compile_options("$<$:${c_flags}>" - "$<$:${cxx_flags}>" - "$<$:${host_cxx_flags}>") - endif() +set(cuda_flags ${cxx_flags} -use_fast_math) if (NOT MSVC) - set(cuda_flags -Wno-pedantic) + set(cuda_flags ${cuda_flags} -Wno-pedantic) endif() -set(cuda_flags ${cxx_flags} -use_fast_math ${cuda_flags}) -list(JOIN host_cxx_flags " " cuda_host_flags) # pass host compiler flags as a single argument +set(nvcc_cmd ${CMAKE_CUDA_COMPILER} .c) +if (NOT CMAKE_CUDA_HOST_COMPILER STREQUAL "") + set(nvcc_cmd ${nvcc_cmd} -ccbin ${CMAKE_CUDA_HOST_COMPILER}) +endif() + +execute_process( + COMMAND ${nvcc_cmd} -Xcompiler --version + OUTPUT_VARIABLE cuda_ccfullver + ERROR_QUIET +) + +if (NOT cuda_ccfullver MATCHES clang) + set(cuda_ccid "GNU") + execute_process( + COMMAND ${nvcc_cmd} -Xcompiler "-dumpfullversion -dumpversion" + OUTPUT_VARIABLE cuda_ccver + ERROR_QUIET + ) +else() + if (cuda_ccfullver MATCHES Apple) + set(cuda_ccid "AppleClang") + else() + set(cuda_ccid "Clang") + endif() + string(REGEX REPLACE "^.* version ([0-9.]*).*$" "\\1" cuda_ccver ${cuda_ccfullver}) +endif() + +message("-- CUDA host compiler is " ${cuda_ccid} " " ${cuda_ccver}) + +get_flags(${cuda_ccid} ${cuda_ccver}) +list(JOIN gf_cxx_flags " " cuda_cxx_flags) # pass host compiler flags as a single argument if (NOT cuda_host_flags STREQUAL "") - set(cuda_flags ${cuda_flags} -Xcompiler ${cuda_host_flags}) + set(cuda_flags ${cuda_flags} -Xcompiler ${cuda_cxx_flags}) endif() add_compile_options("$<$:${cuda_flags}>") From b5b2cdff1d6c0b6c4e62bc97fbf15198a6ed0eba Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Tue, 12 Dec 2023 11:19:18 -0500 Subject: [PATCH 07/11] cmake : fix incorrect variable reference --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d0117ea6e..cb155c29f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -482,7 +482,7 @@ message("-- CUDA host compiler is " ${cuda_ccid} " " ${cuda_ccver}) get_flags(${cuda_ccid} ${cuda_ccver}) list(JOIN gf_cxx_flags " " cuda_cxx_flags) # pass host compiler flags as a single argument -if (NOT cuda_host_flags STREQUAL "") +if (NOT cuda_cxx_flags STREQUAL "") set(cuda_flags ${cuda_flags} -Xcompiler ${cuda_cxx_flags}) endif() From e30a8ad1eec1b33570617c509fec8fc6a74d6923 Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Tue, 12 Dec 2023 11:23:04 -0500 Subject: [PATCH 08/11] cmake : capitalize variables --- CMakeLists.txt | 96 ++++++++++++++++++++++++++------------------------ 1 file changed, 49 insertions(+), 47 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cb155c29f..e7aae24aa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -397,96 +397,98 @@ if (LLAMA_HIPBLAS) endif() endif() -function(get_flags ccid ccver) - set(c_flags "") - set(cxx_flags "") +function(get_flags CCID CCVER) + set(C_FLAGS "") + set(CXX_FLAGS "") - if (ccid MATCHES "Clang") - set(c_flags -Wunreachable-code-break -Wunreachable-code-return) - set(cxx_flags -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi) + if (CCID MATCHES "Clang") + set(C_FLAGS -Wunreachable-code-break -Wunreachable-code-return) + set(CXX_FLAGS -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi) if ( - (ccid STREQUAL "Clang" AND ccver VERSION_GREATER_EQUAL 3.8.0) OR - (ccid STREQUAL "AppleClang" AND ccver VERSION_GREATER_EQUAL 7.3.0) + (CCID STREQUAL "Clang" AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR + (CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0) ) - set(c_flags ${c_flags} -Wdouble-promotion) + set(C_FLAGS ${C_FLAGS} -Wdouble-promotion) endif() - elseif (ccid STREQUAL "GNU") - set(c_flags -Wdouble-promotion) - set(cxx_flags -Wno-array-bounds) + elseif (CCID STREQUAL "GNU") + set(C_FLAGS -Wdouble-promotion) + set(CXX_FLAGS -Wno-array-bounds) - if (ccver VERSION_GREATER_EQUAL 7.1.0) - set(cxx_flags ${cxx_flags} -Wno-format-truncation) + if (CCVER VERSION_GREATER_EQUAL 7.1.0) + set(CXX_FLAGS ${CXX_FLAGS} -Wno-format-truncation) endif() - if (ccver VERSION_GREATER_EQUAL 8.1.0) - set(cxx_flags ${cxx_flags} -Wextra-semi) + if (CCVER VERSION_GREATER_EQUAL 8.1.0) + set(CXX_FLAGS ${CXX_FLAGS} -Wextra-semi) endif() endif() - set(gf_c_flags ${c_flags} PARENT_SCOPE) - set(gf_cxx_flags ${cxx_flags} PARENT_SCOPE) + set(GF_C_FLAGS ${C_FLAGS} PARENT_SCOPE) + set(GF_CXX_FLAGS ${CXX_FLAGS} PARENT_SCOPE) endfunction() if (LLAMA_ALL_WARNINGS) if (NOT MSVC) - set(warning_flags -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function) - set(c_flags -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration) - set(cxx_flags -Wmissing-declarations -Wmissing-noreturn) + set(WARNING_FLAGS -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function) + set(C_FLAGS -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes + -Werror=implicit-int -Werror=implicit-function-declaration) + set(CXX_FLAGS -Wmissing-declarations -Wmissing-noreturn) + + set(C_FLAGS ${WARNING_FLAGS} ${C_FLAGS}) + set(CXX_FLAGS ${WARNING_FLAGS} ${CXX_FLAGS}) get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}) - set(c_flags ${c_flags} ${warning_flags}) - set(cxx_flags ${cxx_flags} ${warning_flags}) - add_compile_options("$<$:${c_flags} ${gf_c_flags}>" - "$<$:${cxx_flags} ${gf_cxx_flags}>") + add_compile_options("$<$:${C_FLAGS} ${GF_C_FLAGS}>" + "$<$:${CXX_FLAGS} ${GF_CXX_FLAGS}>") else() # todo : msvc - set(c_flags "") - set(cxx_flags "") + set(C_FLAGS "") + set(CXX_FLAGS "") endif() endif() -set(cuda_flags ${cxx_flags} -use_fast_math) +set(CUDA_FLAGS ${CXX_FLAGS} -use_fast_math) if (NOT MSVC) - set(cuda_flags ${cuda_flags} -Wno-pedantic) + set(CUDA_FLAGS ${CUDA_FLAGS} -Wno-pedantic) endif() -set(nvcc_cmd ${CMAKE_CUDA_COMPILER} .c) +set(NVCC_CMD ${CMAKE_CUDA_COMPILER} .c) if (NOT CMAKE_CUDA_HOST_COMPILER STREQUAL "") - set(nvcc_cmd ${nvcc_cmd} -ccbin ${CMAKE_CUDA_HOST_COMPILER}) + set(NVCC_CMD ${NVCC_CMD} -ccbin ${CMAKE_CUDA_HOST_COMPILER}) endif() execute_process( - COMMAND ${nvcc_cmd} -Xcompiler --version - OUTPUT_VARIABLE cuda_ccfullver + COMMAND ${NVCC_CMD} -Xcompiler --version + OUTPUT_VARIABLE CUDA_CCFULLVER ERROR_QUIET ) -if (NOT cuda_ccfullver MATCHES clang) - set(cuda_ccid "GNU") +if (NOT CUDA_CCFULLVER MATCHES clang) + set(CUDA_CCID "GNU") execute_process( - COMMAND ${nvcc_cmd} -Xcompiler "-dumpfullversion -dumpversion" - OUTPUT_VARIABLE cuda_ccver + COMMAND ${NVCC_CMD} -Xcompiler "-dumpfullversion -dumpversion" + OUTPUT_VARIABLE CUDA_CCVER ERROR_QUIET ) else() - if (cuda_ccfullver MATCHES Apple) - set(cuda_ccid "AppleClang") + if (CUDA_CCFULLVER MATCHES Apple) + set(CUDA_CCID "AppleClang") else() - set(cuda_ccid "Clang") + set(CUDA_CCID "Clang") endif() - string(REGEX REPLACE "^.* version ([0-9.]*).*$" "\\1" cuda_ccver ${cuda_ccfullver}) + string(REGEX REPLACE "^.* version ([0-9.]*).*$" "\\1" CUDA_CCVER ${CUDA_CCFULLVER}) endif() -message("-- CUDA host compiler is " ${cuda_ccid} " " ${cuda_ccver}) +message("-- CUDA host compiler is ${CUDA_CCID} ${CUDA_CCVER}") -get_flags(${cuda_ccid} ${cuda_ccver}) -list(JOIN gf_cxx_flags " " cuda_cxx_flags) # pass host compiler flags as a single argument -if (NOT cuda_cxx_flags STREQUAL "") - set(cuda_flags ${cuda_flags} -Xcompiler ${cuda_cxx_flags}) +get_flags(${CUDA_CCID} ${CUDA_CCVER}) +list(JOIN GF_CXX_FLAGS " " CUDA_CXX_FLAGS) # pass host compiler flags as a single argument +if (NOT CUDA_CXX_FLAGS STREQUAL "") + set(CUDA_FLAGS ${CUDA_FLAGS} -Xcompiler ${CUDA_CXX_FLAGS}) endif() -add_compile_options("$<$:${cuda_flags}>") +add_compile_options("$<$:${CUDA_FLAGS}>") if (WIN32) add_compile_definitions(_CRT_SECURE_NO_WARNINGS) From cdf3cc3c1719b9ef9c97976821ac4159c7168f85 Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Tue, 12 Dec 2023 11:27:41 -0500 Subject: [PATCH 09/11] cmake : make CUDA warning stuff properly conditional --- CMakeLists.txt | 84 ++++++++++++++++++++++++++------------------------ 1 file changed, 44 insertions(+), 40 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e7aae24aa..93ec57e7c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -448,48 +448,52 @@ if (LLAMA_ALL_WARNINGS) endif() endif() -set(CUDA_FLAGS ${CXX_FLAGS} -use_fast_math) -if (NOT MSVC) - set(CUDA_FLAGS ${CUDA_FLAGS} -Wno-pedantic) +if (LLAMA_CUBLAS) + set(CUDA_FLAGS ${CXX_FLAGS} -use_fast_math) + if (NOT MSVC) + set(CUDA_FLAGS ${CUDA_FLAGS} -Wno-pedantic) + endif() + + if (LLAMA_ALL_WARNINGS AND NOT MSVC) + set(NVCC_CMD ${CMAKE_CUDA_COMPILER} .c) + if (NOT CMAKE_CUDA_HOST_COMPILER STREQUAL "") + set(NVCC_CMD ${NVCC_CMD} -ccbin ${CMAKE_CUDA_HOST_COMPILER}) + endif() + + execute_process( + COMMAND ${NVCC_CMD} -Xcompiler --version + OUTPUT_VARIABLE CUDA_CCFULLVER + ERROR_QUIET + ) + + if (NOT CUDA_CCFULLVER MATCHES clang) + set(CUDA_CCID "GNU") + execute_process( + COMMAND ${NVCC_CMD} -Xcompiler "-dumpfullversion -dumpversion" + OUTPUT_VARIABLE CUDA_CCVER + ERROR_QUIET + ) + else() + if (CUDA_CCFULLVER MATCHES Apple) + set(CUDA_CCID "AppleClang") + else() + set(CUDA_CCID "Clang") + endif() + string(REGEX REPLACE "^.* version ([0-9.]*).*$" "\\1" CUDA_CCVER ${CUDA_CCFULLVER}) + endif() + + message("-- CUDA host compiler is ${CUDA_CCID} ${CUDA_CCVER}") + + get_flags(${CUDA_CCID} ${CUDA_CCVER}) + list(JOIN GF_CXX_FLAGS " " CUDA_CXX_FLAGS) # pass host compiler flags as a single argument + if (NOT CUDA_CXX_FLAGS STREQUAL "") + set(CUDA_FLAGS ${CUDA_FLAGS} -Xcompiler ${CUDA_CXX_FLAGS}) + endif() + endif() + + add_compile_options("$<$:${CUDA_FLAGS}>") endif() -set(NVCC_CMD ${CMAKE_CUDA_COMPILER} .c) -if (NOT CMAKE_CUDA_HOST_COMPILER STREQUAL "") - set(NVCC_CMD ${NVCC_CMD} -ccbin ${CMAKE_CUDA_HOST_COMPILER}) -endif() - -execute_process( - COMMAND ${NVCC_CMD} -Xcompiler --version - OUTPUT_VARIABLE CUDA_CCFULLVER - ERROR_QUIET -) - -if (NOT CUDA_CCFULLVER MATCHES clang) - set(CUDA_CCID "GNU") - execute_process( - COMMAND ${NVCC_CMD} -Xcompiler "-dumpfullversion -dumpversion" - OUTPUT_VARIABLE CUDA_CCVER - ERROR_QUIET - ) -else() - if (CUDA_CCFULLVER MATCHES Apple) - set(CUDA_CCID "AppleClang") - else() - set(CUDA_CCID "Clang") - endif() - string(REGEX REPLACE "^.* version ([0-9.]*).*$" "\\1" CUDA_CCVER ${CUDA_CCFULLVER}) -endif() - -message("-- CUDA host compiler is ${CUDA_CCID} ${CUDA_CCVER}") - -get_flags(${CUDA_CCID} ${CUDA_CCVER}) -list(JOIN GF_CXX_FLAGS " " CUDA_CXX_FLAGS) # pass host compiler flags as a single argument -if (NOT CUDA_CXX_FLAGS STREQUAL "") - set(CUDA_FLAGS ${CUDA_FLAGS} -Xcompiler ${CUDA_CXX_FLAGS}) -endif() - -add_compile_options("$<$:${CUDA_FLAGS}>") - if (WIN32) add_compile_definitions(_CRT_SECURE_NO_WARNINGS) From cacac2519535989519a73ee19b26455eac97deeb Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Tue, 12 Dec 2023 11:30:57 -0500 Subject: [PATCH 10/11] cmake : fix improper joining in generator expression --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 93ec57e7c..1bf9857b9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -439,8 +439,8 @@ if (LLAMA_ALL_WARNINGS) get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}) - add_compile_options("$<$:${C_FLAGS} ${GF_C_FLAGS}>" - "$<$:${CXX_FLAGS} ${GF_CXX_FLAGS}>") + add_compile_options("$<$:${C_FLAGS};${GF_C_FLAGS}>" + "$<$:${CXX_FLAGS};${GF_CXX_FLAGS}>") else() # todo : msvc set(C_FLAGS "") From d870a9fd2c276934a30b9ad1f245c05324c68f00 Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Wed, 13 Dec 2023 12:05:01 -0500 Subject: [PATCH 11/11] get_flags.mk -> get-flags.mk --- Makefile | 4 ++-- scripts/{get_flags.mk => get-flags.mk} | 0 2 files changed, 2 insertions(+), 2 deletions(-) rename scripts/{get_flags.mk => get-flags.mk} (100%) diff --git a/Makefile b/Makefile index 25c0a2a86..5779e8440 100644 --- a/Makefile +++ b/Makefile @@ -472,7 +472,7 @@ ggml-mpi.o: ggml-mpi.c ggml-mpi.h endif # LLAMA_MPI GF_CC := $(CC) -include scripts/get_flags.mk +include scripts/get-flags.mk # combine build flags with cmdline overrides override CFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS) $(MK_CFLAGS) $(GF_CFLAGS) $(CFLAGS) @@ -484,7 +484,7 @@ override LDFLAGS := $(MK_LDFLAGS) $(LDFLAGS) # identify CUDA host compiler ifdef LLAMA_CUBLAS GF_CC := $(NVCC) $(NVCCFLAGS) 2>/dev/null .c -Xcompiler -include scripts/get_flags.mk +include scripts/get-flags.mk CUDA_CXXFLAGS := $(GF_CXXFLAGS) endif diff --git a/scripts/get_flags.mk b/scripts/get-flags.mk similarity index 100% rename from scripts/get_flags.mk rename to scripts/get-flags.mk