diff --git a/Makefile b/Makefile index 8273f8400..ef157aa13 100644 --- a/Makefile +++ b/Makefile @@ -91,6 +91,7 @@ gcovr-report: coverage ## Generate gcovr report mkdir -p gcovr-report gcovr --root . --html --html-details --output gcovr-report/coverage.html +CC := gcc ifdef RISCV_CROSS_COMPILE CC := riscv64-unknown-linux-gnu-gcc CXX := riscv64-unknown-linux-gnu-g++ @@ -357,10 +358,10 @@ ifdef LLAMA_BLIS endif # LLAMA_BLIS ifdef LLAMA_CUBLAS - MK_CPPFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include - MK_LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib + MK_CPPFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/usr/local/cuda-10.2/targets/aarch64-linux/include + MK_LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/usr/local/cuda-10.2/targets/aarch64-linux/lib OBJS += ggml-cuda.o - MK_NVCCFLAGS = --forward-unknown-to-host-compiler -use_fast_math + MK_NVCCFLAGS = -use_fast_math ifdef LLAMA_DEBUG MK_NVCCFLAGS += -lineinfo @@ -417,7 +418,7 @@ ifdef LLAMA_CUDA_CCBIN MK_NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN) endif ggml-cuda.o: ggml-cuda.cu ggml-cuda.h - $(NVCC) $(BASE_CXXFLAGS) $(NVCCFLAGS) -Wno-pedantic -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@ + $(NVCC) -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/usr/local/cuda-10.2/targets/aarch64-linux/include -std=c++11 -O3 $(NVCCFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@ endif # LLAMA_CUBLAS ifdef LLAMA_CLBLAST diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 9f4b188cb..f75ce7163 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -84,6 +84,9 @@ #include #include #include +#define CUBLAS_TF32_TENSOR_OP_MATH CUBLAS_TENSOR_OP_MATH +#define CUBLAS_COMPUTE_16F CUDA_R_16F +#define CUBLAS_COMPUTE_32F CUDA_R_32F #endif // defined(GGML_USE_HIPBLAS) #include "ggml-cuda.h" @@ -8396,7 +8399,7 @@ static void ggml_cuda_mul_mat_mat_batched_cublas(const ggml_tensor * src0, const half * dst_f16 = nullptr; char * dst_t = nullptr; - cublasComputeType_t cu_compute_type = CUBLAS_COMPUTE_16F; + cudaDataType_t cu_compute_type = CUBLAS_COMPUTE_16F; cudaDataType_t cu_data_type = CUDA_R_16F; // dst strides diff --git a/scripts/get-flags.mk b/scripts/get-flags.mk index 596d7ead1..f003d2c34 100644 --- a/scripts/get-flags.mk +++ b/scripts/get-flags.mk @@ -32,7 +32,4 @@ else ifeq ($(shell expr $(GF_CC_VER) \>= 070100), 1) GF_CXXFLAGS += -Wno-format-truncation endif - ifeq ($(shell expr $(GF_CC_VER) \>= 080100), 1) - GF_CXXFLAGS += -Wextra-semi - endif endif