From 4e58a0524944d8e5a374da12cbf002f47bccb44c Mon Sep 17 00:00:00 2001 From: Henri Vasserman Date: Fri, 11 Aug 2023 10:16:02 +0300 Subject: [PATCH] Allow overriding CC_TURING --- CMakeLists.txt | 1 + Makefile | 1 + ggml-cuda.cu | 4 +++- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 26cd0a494..13d862c4b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -378,6 +378,7 @@ if (LLAMA_HIPBLAS) target_compile_definitions(ggml-rocm PRIVATE GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X}) target_compile_definitions(ggml-rocm PRIVATE GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y}) target_compile_definitions(ggml-rocm PRIVATE K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER}) + target_compile_definitions(ggml-rocm PRIVATE CC_TURING=1000000000) set_source_files_properties(ggml-cuda.cu PROPERTIES LANGUAGE CXX) target_link_libraries(ggml-rocm PRIVATE hip::device PUBLIC hip::host roc::rocblas roc::hipblas) diff --git a/Makefile b/Makefile index 0843d86c6..59b4abeba 100644 --- a/Makefile +++ b/Makefile @@ -296,6 +296,7 @@ ggml-cuda.o: CXXFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS)) ggml-cuda.o: CXXFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X) ggml-cuda.o: CXXFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y) ggml-cuda.o: CXXFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER) +ggml-cuda.o: CXXFLAGS += -DCC_TURING=1000000000 ggml-cuda.o: ggml-cuda.cu ggml-cuda.h $(CXX) $(CXXFLAGS) -x hip -c -o $@ $< endif # LLAMA_HIPBLAS diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 03ecdee7c..aa4a2e919 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -74,7 +74,9 @@ #include "ggml.h" #define MIN_CC_DP4A 610 // minimum compute capability for __dp4a, an intrinsic for byte-wise dot products -#define CC_TURING 1000000000 +#ifndef CC_TURING +#define CC_TURING 700 +#endif #if defined(GGML_USE_HIPBLAS) #define __CUDA_ARCH__ 1300