diff --git a/Makefile b/Makefile index cf110833c..0d96f35f8 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -default: koboldcpp koboldcpp_noavx2 koboldcpp_openblas koboldcpp_openblas_noavx2 koboldcpp_clblast koboldcpp_cublas +default: koboldcpp koboldcpp_noavx2 koboldcpp_openblas koboldcpp_openblas_noavx2 koboldcpp_clblast simple: koboldcpp koboldcpp_noavx2 dev: koboldcpp_openblas @@ -55,7 +55,6 @@ BONUSCFLAGS2 = OPENBLAS_FLAGS = -DGGML_USE_OPENBLAS -I/usr/local/include/openblas CLBLAST_FLAGS = -DGGML_USE_CLBLAST -DGGML_USE_OPENBLAS -I/usr/local/include/openblas -CUBLAS_FLAGS = -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I./include/cuda -I./include/cuda/crt #lets try enabling everything CFLAGS += -pthread -s @@ -124,6 +123,7 @@ ifndef LLAMA_NO_ACCELERATE endif endif +#note: koboldcpp does not officially support cublas. You can manually link it if you want, but it will not be a regular feature ifdef LLAMA_CUBLAS CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include LDFLAGS += -lcublas_static -lculibos -lcudart_static -lcublasLt_static -lpthread -ldl -L/usr/local/cuda/lib64 @@ -157,7 +157,6 @@ NOAVX2_BUILD = OPENBLAS_BUILD = OPENBLAS_NOAVX2_BUILD = CLBLAST_BUILD = -CUBLAS_BUILD = ifeq ($(OS),Windows_NT) DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.dll $(LDFLAGS) @@ -165,7 +164,6 @@ ifeq ($(OS),Windows_NT) OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ lib/libopenblas.lib -shared -o $@.dll $(LDFLAGS) OPENBLAS_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ lib/libopenblas.lib -shared -o $@.dll $(LDFLAGS) CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ lib/OpenCL.lib lib/clblast.lib -shared -o $@.dll $(LDFLAGS) - CUBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.dll $(LDFLAGS) else DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS) NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS) @@ -176,17 +174,12 @@ else ifdef LLAMA_CLBLAST CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -lOpenCL $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS) endif - ifdef LLAMA_CUBLAS - CUBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ -lcublas_static -lculibos -lcudart_static -lcublasLt_static -lpthread -ldl -L/usr/local/cuda/lib64 -shared -o $@.so $(LDFLAGS) - endif ifndef LLAMA_OPENBLAS ifndef LLAMA_CLBLAST - ifndef LLAMA_CUBLAS OPENBLAS_BUILD = @echo 'Your OS $(OS) does not appear to be Windows. For faster speeds, install and link a BLAS library. Set LLAMA_OPENBLAS=1 to compile with OpenBLAS support or LLAMA_CLBLAST=1 to compile with ClBlast support. This is just a reminder, not an error.' endif endif - endif endif # @@ -223,9 +216,6 @@ ggml_openblas_noavx2.o: ggml.c ggml.h ggml_clblast.o: ggml.c ggml.h $(CC) $(CFLAGS) $(BONUSCFLAGS1) $(BONUSCFLAGS2) $(CLBLAST_FLAGS) -c $< -o $@ -ggml_cublas.o: ggml.c ggml.h - $(CC) $(CFLAGS) $(BONUSCFLAGS1) $(BONUSCFLAGS2) $(CUBLAS_FLAGS) -c $< -o $@ - ggml_v1.o: otherarch/ggml_v1.c otherarch/ggml_v1.h $(CC) $(CFLAGS) $(BONUSCFLAGS1) $(BONUSCFLAGS2) -c $< -o $@ @@ -251,7 +241,7 @@ gpttype_adapter.o: gpttype_adapter.cpp $(CXX) $(CXXFLAGS) -c $< -o $@ clean: - rm -vf *.o main quantize_llama quantize_gpt2 quantize_gptj quantize-stats perplexity embedding benchmark-q4_0-matmult main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe koboldcpp.dll koboldcpp_openblas.dll koboldcpp_noavx2.dll koboldcpp_openblas_noavx2.dll koboldcpp_clblast.dll koboldcpp_cublas.dll koboldcpp.so koboldcpp_openblas.so koboldcpp_noavx2.so koboldcpp_openblas_noavx2.so koboldcpp_clblast.so koboldcpp_cublas.so gptj.exe gpt2.exe + rm -vf *.o main quantize_llama quantize_gpt2 quantize_gptj quantize-stats perplexity embedding benchmark-q4_0-matmult main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe koboldcpp.dll koboldcpp_openblas.dll koboldcpp_noavx2.dll koboldcpp_openblas_noavx2.dll koboldcpp_clblast.dll koboldcpp.so koboldcpp_openblas.so koboldcpp_noavx2.so koboldcpp_openblas_noavx2.so koboldcpp_clblast.so gptj.exe gpt2.exe main: examples/main/main.cpp ggml.o llama.o common.o $(OBJS) $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) @@ -273,9 +263,6 @@ koboldcpp_openblas_noavx2: ggml_openblas_noavx2.o ggml_rwkv.o ggml_v1_noavx2.o e koboldcpp_clblast: ggml_clblast.o ggml_rwkv.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o $(CLBLAST_BUILD) - -koboldcpp_cublas: ggml_cublas.o ggml_rwkv.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o - $(CUBLAS_BUILD) quantize_llama: examples/quantize/quantize.cpp ggml.o llama.o $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)