make : simplify nvcc flags
CUDA_CXXFLAGS and HOST_CXXFLAGS can no longer be overridden via the command line, but NVCCFLAGS now can.
This commit is contained in:
parent
8a7b2fa528
commit
9b28f3413b
1 changed files with 38 additions and 44 deletions
82
Makefile
82
Makefile
|
@ -121,12 +121,12 @@ MK_CXXFLAGS = -std=c++11 -fPIC
|
||||||
|
|
||||||
# -Ofast tends to produce faster code, but may not be available for some compilers.
|
# -Ofast tends to produce faster code, but may not be available for some compilers.
|
||||||
ifdef LLAMA_FAST
|
ifdef LLAMA_FAST
|
||||||
MK_CFLAGS += -Ofast
|
MK_CFLAGS += -Ofast
|
||||||
MK_HOST_CXXFLAGS += -Ofast
|
HOST_CXXFLAGS += -Ofast
|
||||||
MK_CUDA_CXXFLAGS += -O3
|
MK_NVCCFLAGS += -O3
|
||||||
else
|
else
|
||||||
MK_CFLAGS += -O3
|
MK_CFLAGS += -O3
|
||||||
MK_CXXFLAGS += -O3
|
MK_CXXFLAGS += -O3
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# clock_gettime came in POSIX.1b (1993)
|
# clock_gettime came in POSIX.1b (1993)
|
||||||
|
@ -222,8 +222,8 @@ MK_CXXFLAGS += $(WARN_FLAGS) -Wmissing-declarations -Wmissing-noreturn
|
||||||
|
|
||||||
ifeq ($(CC_IS_CLANG), 1)
|
ifeq ($(CC_IS_CLANG), 1)
|
||||||
# clang options
|
# clang options
|
||||||
MK_CFLAGS += -Wunreachable-code-break -Wunreachable-code-return
|
MK_CFLAGS += -Wunreachable-code-break -Wunreachable-code-return
|
||||||
MK_HOST_CXXFLAGS += -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi
|
HOST_CXXFLAGS += -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi
|
||||||
|
|
||||||
ifneq '' '$(and $(CC_IS_LLVM_CLANG),$(filter 1,$(shell expr $(CC_VER) \>= 030800)))'
|
ifneq '' '$(and $(CC_IS_LLVM_CLANG),$(filter 1,$(shell expr $(CC_VER) \>= 030800)))'
|
||||||
MK_CFLAGS += -Wdouble-promotion
|
MK_CFLAGS += -Wdouble-promotion
|
||||||
|
@ -233,14 +233,14 @@ ifeq ($(CC_IS_CLANG), 1)
|
||||||
endif
|
endif
|
||||||
else
|
else
|
||||||
# gcc options
|
# gcc options
|
||||||
MK_CFLAGS += -Wdouble-promotion
|
MK_CFLAGS += -Wdouble-promotion
|
||||||
MK_HOST_CXXFLAGS += -Wno-array-bounds
|
HOST_CXXFLAGS += -Wno-array-bounds
|
||||||
|
|
||||||
ifeq ($(shell expr $(CC_VER) \>= 070100), 1)
|
ifeq ($(shell expr $(CC_VER) \>= 070100), 1)
|
||||||
MK_HOST_CXXFLAGS += -Wno-format-truncation
|
HOST_CXXFLAGS += -Wno-format-truncation
|
||||||
endif
|
endif
|
||||||
ifeq ($(shell expr $(CC_VER) \>= 080100), 1)
|
ifeq ($(shell expr $(CC_VER) \>= 080100), 1)
|
||||||
MK_HOST_CXXFLAGS += -Wextra-semi
|
HOST_CXXFLAGS += -Wextra-semi
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
@ -294,8 +294,8 @@ ifndef RISCV
|
||||||
|
|
||||||
ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
|
ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
|
||||||
# Use all CPU extensions that are available:
|
# Use all CPU extensions that are available:
|
||||||
MK_CFLAGS += -march=native -mtune=native
|
MK_CFLAGS += -march=native -mtune=native
|
||||||
MK_HOST_CXXFLAGS += -march=native -mtune=native
|
HOST_CXXFLAGS += -march=native -mtune=native
|
||||||
|
|
||||||
# Usage AVX-only
|
# Usage AVX-only
|
||||||
#MK_CFLAGS += -mfma -mf16c -mavx
|
#MK_CFLAGS += -mfma -mf16c -mavx
|
||||||
|
@ -395,61 +395,59 @@ ifdef LLAMA_CUBLAS
|
||||||
MK_CPPFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
|
MK_CPPFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
|
||||||
MK_LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib
|
MK_LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib
|
||||||
OBJS += ggml-cuda.o
|
OBJS += ggml-cuda.o
|
||||||
NVCCFLAGS = --forward-unknown-to-host-compiler -use_fast_math
|
MK_NVCCFLAGS = --forward-unknown-to-host-compiler -use_fast_math
|
||||||
ifdef LLAMA_CUDA_NVCC
|
ifdef LLAMA_CUDA_NVCC
|
||||||
NVCC = $(LLAMA_CUDA_NVCC)
|
NVCC = $(LLAMA_CUDA_NVCC)
|
||||||
else
|
else
|
||||||
NVCC = nvcc
|
NVCC = nvcc
|
||||||
endif #LLAMA_CUDA_NVCC
|
endif #LLAMA_CUDA_NVCC
|
||||||
ifdef CUDA_DOCKER_ARCH
|
ifdef CUDA_DOCKER_ARCH
|
||||||
NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
|
MK_NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
|
||||||
else ifdef CUDA_POWER_ARCH
|
else ifndef CUDA_POWER_ARCH
|
||||||
NVCCFLAGS +=
|
MK_NVCCFLAGS += -arch=native
|
||||||
else
|
|
||||||
NVCCFLAGS += -arch=native
|
|
||||||
endif # CUDA_DOCKER_ARCH
|
endif # CUDA_DOCKER_ARCH
|
||||||
ifdef LLAMA_CUDA_FORCE_DMMV
|
ifdef LLAMA_CUDA_FORCE_DMMV
|
||||||
NVCCFLAGS += -DGGML_CUDA_FORCE_DMMV
|
MK_NVCCFLAGS += -DGGML_CUDA_FORCE_DMMV
|
||||||
endif # LLAMA_CUDA_FORCE_DMMV
|
endif # LLAMA_CUDA_FORCE_DMMV
|
||||||
ifdef LLAMA_CUDA_FORCE_MMQ
|
ifdef LLAMA_CUDA_FORCE_MMQ
|
||||||
NVCCFLAGS += -DGGML_CUDA_FORCE_MMQ
|
MK_NVCCFLAGS += -DGGML_CUDA_FORCE_MMQ
|
||||||
endif # LLAMA_CUDA_FORCE_MMQ
|
endif # LLAMA_CUDA_FORCE_MMQ
|
||||||
ifdef LLAMA_CUDA_DMMV_X
|
ifdef LLAMA_CUDA_DMMV_X
|
||||||
NVCCFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
|
MK_NVCCFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
|
||||||
else
|
else
|
||||||
NVCCFLAGS += -DGGML_CUDA_DMMV_X=32
|
MK_NVCCFLAGS += -DGGML_CUDA_DMMV_X=32
|
||||||
endif # LLAMA_CUDA_DMMV_X
|
endif # LLAMA_CUDA_DMMV_X
|
||||||
ifdef LLAMA_CUDA_MMV_Y
|
ifdef LLAMA_CUDA_MMV_Y
|
||||||
NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y)
|
MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y)
|
||||||
else ifdef LLAMA_CUDA_DMMV_Y
|
else ifdef LLAMA_CUDA_DMMV_Y
|
||||||
NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_DMMV_Y) # for backwards compatibility
|
MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_DMMV_Y) # for backwards compatibility
|
||||||
else
|
else
|
||||||
NVCCFLAGS += -DGGML_CUDA_MMV_Y=1
|
MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=1
|
||||||
endif # LLAMA_CUDA_MMV_Y
|
endif # LLAMA_CUDA_MMV_Y
|
||||||
ifdef LLAMA_CUDA_F16
|
ifdef LLAMA_CUDA_F16
|
||||||
NVCCFLAGS += -DGGML_CUDA_F16
|
MK_NVCCFLAGS += -DGGML_CUDA_F16
|
||||||
endif # LLAMA_CUDA_F16
|
endif # LLAMA_CUDA_F16
|
||||||
ifdef LLAMA_CUDA_DMMV_F16
|
ifdef LLAMA_CUDA_DMMV_F16
|
||||||
NVCCFLAGS += -DGGML_CUDA_F16
|
MK_NVCCFLAGS += -DGGML_CUDA_F16
|
||||||
endif # LLAMA_CUDA_DMMV_F16
|
endif # LLAMA_CUDA_DMMV_F16
|
||||||
ifdef LLAMA_CUDA_KQUANTS_ITER
|
ifdef LLAMA_CUDA_KQUANTS_ITER
|
||||||
NVCCFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
|
MK_NVCCFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
|
||||||
else
|
else
|
||||||
NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2
|
MK_NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2
|
||||||
endif
|
endif
|
||||||
ifdef LLAMA_CUDA_PEER_MAX_BATCH_SIZE
|
ifdef LLAMA_CUDA_PEER_MAX_BATCH_SIZE
|
||||||
NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=$(LLAMA_CUDA_PEER_MAX_BATCH_SIZE)
|
MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=$(LLAMA_CUDA_PEER_MAX_BATCH_SIZE)
|
||||||
else
|
else
|
||||||
NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128
|
MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128
|
||||||
endif # LLAMA_CUDA_PEER_MAX_BATCH_SIZE
|
endif # LLAMA_CUDA_PEER_MAX_BATCH_SIZE
|
||||||
#ifdef LLAMA_CUDA_CUBLAS
|
#ifdef LLAMA_CUDA_CUBLAS
|
||||||
# NVCCFLAGS += -DGGML_CUDA_CUBLAS
|
# MK_NVCCFLAGS += -DGGML_CUDA_CUBLAS
|
||||||
#endif # LLAMA_CUDA_CUBLAS
|
#endif # LLAMA_CUDA_CUBLAS
|
||||||
ifdef LLAMA_CUDA_CCBIN
|
ifdef LLAMA_CUDA_CCBIN
|
||||||
NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
|
MK_NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
|
||||||
endif
|
endif
|
||||||
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
|
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
|
||||||
$(NVCC) $(NVCCFLAGS) -c $< -o $@
|
$(NVCC) $(BASE_CXXFLAGS) $(NVCCFLAGS) -Wno-pedantic -Xcompiler "$(HOST_CXXFLAGS)" -c $< -o $@
|
||||||
endif # LLAMA_CUBLAS
|
endif # LLAMA_CUBLAS
|
||||||
|
|
||||||
ifdef LLAMA_CLBLAST
|
ifdef LLAMA_CLBLAST
|
||||||
|
@ -512,15 +510,11 @@ ggml-mpi.o: ggml-mpi.c ggml-mpi.h
|
||||||
endif # LLAMA_MPI
|
endif # LLAMA_MPI
|
||||||
|
|
||||||
# combine build flags with cmdline overrides
|
# combine build flags with cmdline overrides
|
||||||
override CFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS) $(MK_CFLAGS) $(CFLAGS)
|
override CFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS) $(MK_CFLAGS) $(CFLAGS)
|
||||||
override CXXFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS) $(MK_CXXFLAGS) $(CXXFLAGS)
|
BASE_CXXFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS) $(MK_CXXFLAGS) $(CXXFLAGS)
|
||||||
override CUDA_CXXFLAGS := $(MK_CUDA_CXXFLAGS) $(CUDA_CXXFLAGS)
|
override CXXFLAGS := $(BASE_CXXFLAGS) $(HOST_CXXFLAGS)
|
||||||
override HOST_CXXFLAGS := $(MK_HOST_CXXFLAGS) $(HOST_CXXFLAGS)
|
override NVCCFLAGS := $(MK_NVCCFLAGS) $(NVCCFLAGS)
|
||||||
override LDFLAGS := $(MK_LDFLAGS) $(LDFLAGS)
|
override LDFLAGS := $(MK_LDFLAGS) $(LDFLAGS)
|
||||||
|
|
||||||
# save CXXFLAGS before we add host-only options
|
|
||||||
NVCCFLAGS := $(NVCCFLAGS) $(CXXFLAGS) $(CUDA_CXXFLAGS) -Wno-pedantic -Xcompiler "$(HOST_CXXFLAGS)"
|
|
||||||
override CXXFLAGS += $(HOST_CXXFLAGS)
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# Print build information
|
# Print build information
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue