diff --git a/Makefile b/Makefile index 6359cc643..789849ab2 100644 --- a/Makefile +++ b/Makefile @@ -133,24 +133,26 @@ endif # it is recommended to use the CMAKE file to build for cublas if you can - will likely work better ifdef LLAMA_CUBLAS - CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include - CXXFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include - LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib - OBJS += ggml-cuda.o - NVCC = nvcc - NVCCFLAGS = --forward-unknown-to-host-compiler -arch=native + CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include + CXXFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include + LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib + OBJS += ggml-cuda.o ggml_v2-cuda.o + NVCC = nvcc + NVCCFLAGS = --forward-unknown-to-host-compiler -arch=native ifdef LLAMA_CUDA_DMMV_X - NVCCFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X) + NVCCFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X) else - NVCCFLAGS += -DGGML_CUDA_DMMV_X=32 + NVCCFLAGS += -DGGML_CUDA_DMMV_X=32 endif # LLAMA_CUDA_DMMV_X ifdef LLAMA_CUDA_DMMV_Y - NVCCFLAGS += -DGGML_CUDA_DMMV_Y=$(LLAMA_CUDA_DMMV_Y) + NVCCFLAGS += -DGGML_CUDA_DMMV_Y=$(LLAMA_CUDA_DMMV_Y) else - NVCCFLAGS += -DGGML_CUDA_DMMV_Y=1 + NVCCFLAGS += -DGGML_CUDA_DMMV_Y=1 endif # LLAMA_CUDA_DMMV_Y ggml-cuda.o: ggml-cuda.cu ggml-cuda.h - $(NVCC) $(NVCCFLAGS) $(CXXFLAGS) -Wno-pedantic -c $< -o $@ + $(NVCC) $(NVCCFLAGS) $(CXXFLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@ +ggml_v2-cuda.o: otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h + $(NVCC) $(NVCCFLAGS) $(CXXFLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@ endif # LLAMA_CUBLAS ifdef LLAMA_METAL