diff --git a/Makefile b/Makefile index cb700d114..433ac6fd3 100644 --- a/Makefile +++ b/Makefile @@ -44,6 +44,9 @@ LDFLAGS = BONUSCFLAGS1 = BONUSCFLAGS2 = +OPENBLAS_FLAGS = -DGGML_USE_OPENBLAS -I/usr/local/include/openblas +CLBLAST_FLAGS = -DGGML_USE_CLBLAST -DGGML_USE_OPENBLAS -I/usr/local/include/openblas + #lets try enabling everything CFLAGS += -pthread -s CXXFLAGS += -pthread -s -Wno-multichar @@ -52,14 +55,7 @@ CXXFLAGS += -pthread -s -Wno-multichar # TODO: support Windows ifeq ($(UNAME_S),Linux) CFLAGS += -pthread - CXXFLAGS += -pthread - ifdef ARCH_LINUX - LDFLAGS += -lcblas - else - ifdef ARCH_LIKE - LDFLAGS += -lcblas - endif - endif + CXXFLAGS += -pthread endif ifeq ($(UNAME_S),Darwin) @@ -117,10 +113,6 @@ ifndef LLAMA_NO_ACCELERATE LDFLAGS += -framework Accelerate endif endif -ifdef LLAMA_OPENBLAS - CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas - LDFLAGS += -lopenblas -endif ifdef LLAMA_CLBLAST CFLAGS += -DGGML_USE_CLBLAST -DGGML_USE_OPENBLAS LDFLAGS += -lclblast -lOpenCL -lopenblas @@ -150,17 +142,29 @@ ifneq ($(filter armv8%,$(UNAME_M)),) CFLAGS += -mfp16-format=ieee -mno-unaligned-access endif -OPENBLAS_BUILD = -CLBLAST_BUILD = +DEFAULT_BUILD = NOAVX2_BUILD = +OPENBLAS_BUILD = OPENBLAS_NOAVX2_BUILD = +CLBLAST_BUILD = ifeq ($(OS),Windows_NT) - OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ lib/libopenblas.lib -shared -o $@ $(LDFLAGS) - CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ lib/OpenCL.lib lib/clblast.lib -shared -o $@ $(LDFLAGS) - OPENBLAS_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ lib/libopenblas.lib -shared -o $@ $(LDFLAGS) - NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@ $(LDFLAGS) + DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.dll $(LDFLAGS) + NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.dll $(LDFLAGS) + OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ lib/libopenblas.lib -shared -o $@.dll $(LDFLAGS) + OPENBLAS_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ lib/libopenblas.lib -shared -o $@.dll $(LDFLAGS) + CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ lib/OpenCL.lib lib/clblast.lib -shared -o $@.dll $(LDFLAGS) else + DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS) + NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS) + ifdef LLAMA_OPENBLAS + OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ -lcblas -lopenblas -shared -o $@.so $(LDFLAGS) + OPENBLAS_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ -lcblas -lopenblas -shared -o $@.so $(LDFLAGS) + endif + ifdef LLAMA_CLBLAST + CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -lOpenCL -lcblas -lopenblas -shared -o $@.so $(LDFLAGS) + endif + ifndef LLAMA_OPENBLAS ifndef LLAMA_CLBLAST OPENBLAS_BUILD = @echo 'Your OS $(OS) does not appear to be Windows. For faster speeds, install and link a BLAS library. Set LLAMA_OPENBLAS=1 to compile with OpenBLAS support or LLAMA_CLBLAST=1 to compile with ClBlast support. This is just a reminder, not an error.' @@ -183,9 +187,9 @@ $(info I CC: $(CCV)) $(info I CXX: $(CXXV)) $(info ) -default: koboldcpp.dll koboldcpp_noavx2.dll koboldcpp_openblas.dll koboldcpp_openblas_noavx2.dll koboldcpp_clblast.dll -simple: koboldcpp.dll koboldcpp_noavx2.dll -dev: koboldcpp_openblas.dll +default: koboldcpp koboldcpp_noavx2 koboldcpp_openblas koboldcpp_openblas_noavx2 koboldcpp_clblast +simple: koboldcpp koboldcpp_noavx2 +dev: koboldcpp_openblas # # Build library @@ -195,16 +199,16 @@ ggml.o: ggml.c ggml.h $(CC) $(CFLAGS) $(BONUSCFLAGS1) $(BONUSCFLAGS2) -c $< -o $@ ggml_openblas.o: ggml.c ggml.h - $(CC) $(CFLAGS) $(BONUSCFLAGS1) $(BONUSCFLAGS2) -DGGML_USE_OPENBLAS -c $< -o $@ + $(CC) $(CFLAGS) $(BONUSCFLAGS1) $(BONUSCFLAGS2) $(OPENBLAS_FLAGS) -c $< -o $@ ggml_noavx2.o: ggml.c ggml.h $(CC) $(CFLAGS) -c $< -o $@ ggml_openblas_noavx2.o: ggml.c ggml.h - $(CC) $(CFLAGS) -DGGML_USE_OPENBLAS -c $< -o $@ + $(CC) $(CFLAGS) $(OPENBLAS_FLAGS) -c $< -o $@ ggml_clblast.o: ggml.c ggml.h - $(CC) $(CFLAGS) $(BONUSCFLAGS1) $(BONUSCFLAGS2) -DGGML_USE_OPENBLAS -DGGML_USE_CLBLAST -c $< -o $@ + $(CC) $(CFLAGS) $(BONUSCFLAGS1) $(BONUSCFLAGS2) $(CLBLAST_FLAGS) -c $< -o $@ ggml_v1.o: otherarch/ggml_v1.c otherarch/ggml_v1.h $(CC) $(CFLAGS) $(BONUSCFLAGS1) $(BONUSCFLAGS2) -c $< -o $@ @@ -231,7 +235,7 @@ gpttype_adapter.o: gpttype_adapter.cpp $(CXX) $(CXXFLAGS) -c $< -o $@ clean: - rm -vf *.o main quantize_llama quantize_gpt2 quantize_gptj quantize-stats perplexity embedding benchmark-q4_0-matmult main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe koboldcpp.dll koboldcpp_openblas.dll koboldcpp_noavx2.dll koboldcpp_openblas_noavx2.dll koboldcpp_clblast.dll gptj.exe gpt2.exe + rm -vf *.o main quantize_llama quantize_gpt2 quantize_gptj quantize-stats perplexity embedding benchmark-q4_0-matmult main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe koboldcpp.dll koboldcpp_openblas.dll koboldcpp_noavx2.dll koboldcpp_openblas_noavx2.dll koboldcpp_clblast.dll koboldcpp.so koboldcpp_openblas.so koboldcpp_noavx2.so koboldcpp_openblas_noavx2.so koboldcpp_clblast.so gptj.exe gpt2.exe main: examples/main/main.cpp ggml.o llama.o common.o $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) @@ -239,21 +243,21 @@ main: examples/main/main.cpp ggml.o llama.o common.o @echo '==== Run ./main -h for help. ====' @echo -koboldcpp.dll: ggml.o ggml_rwkv.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o - $(CXX) $(CXXFLAGS) $^ -shared -o $@ $(LDFLAGS) +koboldcpp: ggml.o ggml_rwkv.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o + $(DEFAULT_BUILD) -koboldcpp_openblas.dll: ggml_openblas.o ggml_rwkv.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o +koboldcpp_openblas: ggml_openblas.o ggml_rwkv.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o $(OPENBLAS_BUILD) -koboldcpp_noavx2.dll: ggml_noavx2.o ggml_rwkv.o ggml_v1_noavx2.o expose.o common.o llama_adapter.o gpttype_adapter.o +koboldcpp_noavx2: ggml_noavx2.o ggml_rwkv.o ggml_v1_noavx2.o expose.o common.o llama_adapter.o gpttype_adapter.o $(NOAVX2_BUILD) -koboldcpp_openblas_noavx2.dll: ggml_openblas_noavx2.o ggml_rwkv.o ggml_v1_noavx2.o expose.o common.o llama_adapter.o gpttype_adapter.o +koboldcpp_openblas_noavx2: ggml_openblas_noavx2.o ggml_rwkv.o ggml_v1_noavx2.o expose.o common.o llama_adapter.o gpttype_adapter.o $(OPENBLAS_NOAVX2_BUILD) -koboldcpp_clblast.dll: ggml_clblast.o ggml_rwkv.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o +koboldcpp_clblast: ggml_clblast.o ggml_rwkv.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o $(CLBLAST_BUILD) - + quantize_llama: examples/quantize/quantize.cpp ggml.o llama.o $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) diff --git a/koboldcpp.py b/koboldcpp.py index fa240468c..43fc3eb5f 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -43,21 +43,40 @@ use_blas = False # if true, uses OpenBLAS for acceleration. libopenblas.dll must use_clblast = False #uses CLBlast instead use_noavx2 = False #uses openblas with no avx2 instructions +def pick_existant_file(ntoption,nonntoption): + ntexist = os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), ntoption)) + nonntexist = os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), nonntoption)) + if os.name == 'nt': + if nonntexist and not ntexist: + return nonntoption + return ntoption + else: + if ntexist and not nonntexist: + return option1 + return option2 + +lib_default = pick_existant_file("koboldcpp.dll","koboldcpp.so") +lib_noavx2 = pick_existant_file("koboldcpp_noavx2.dll","koboldcpp_noavx2.so") +lib_openblas = pick_existant_file("koboldcpp_openblas.dll","koboldcpp_openblas.so") +lib_openblas_noavx2 = pick_existant_file("koboldcpp_openblas_noavx2.dll","koboldcpp_openblas_noavx2.so") +lib_clblast = pick_existant_file("koboldcpp_clblast.dll","koboldcpp_clblast.so") + + def init_library(): global handle, use_blas, use_clblast, use_noavx2 libname = "" if use_noavx2: if use_blas: - libname = "koboldcpp_openblas_noavx2.dll" + libname = lib_openblas_noavx2 else: - libname = "koboldcpp_noavx2.dll" + libname = lib_noavx2 else: if use_clblast: - libname = "koboldcpp_clblast.dll" + libname = lib_clblast elif use_blas: - libname = "koboldcpp_openblas.dll" + libname = lib_openblas else: - libname = "koboldcpp.dll" + libname = lib_default print("Initializing dynamic library: " + libname) dir_path = os.path.dirname(os.path.realpath(__file__)) @@ -345,35 +364,35 @@ def RunServerMultiThreaded(addr, port, embedded_kailite = None): def main(args): global use_blas, use_clblast, use_noavx2 + global lib_default,lib_noavx2,lib_openblas,lib_openblas_noavx2,lib_clblast + use_blas = False use_clblast = False use_noavx2 = False - - if os.name != 'nt': - print("You are not on Windows. Default koboldcpp.dll library file will be used. Remember to manually link with OpenBLAS using LLAMA_OPENBLAS=1, or CLBlast with LLAMA_CLBLAST=1 if you want to use them. This is not an error, just a reminder.") - elif args.noavx2: + + if args.noavx2: use_noavx2 = True - if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll")) or not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "koboldcpp_openblas_noavx2.dll")): - print("Warning: libopenblas.dll or koboldcpp_openblas_noavx2.dll not found. Non-BLAS library will be used.") + if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), lib_openblas_noavx2)) or not (os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll")) and os.name=='nt'): + print("Warning: OpenBLAS library file not found. Non-BLAS library will be used.") elif args.noblas: print("Attempting to use non-avx2 compatibility library without OpenBLAS.") else: use_blas = True - print("Attempting to use non-avx2 compatibility library with OpenBLAS.") + print("Attempting to use non-avx2 compatibility library with OpenBLAS. A compatible libopenblas will be required.") elif args.useclblast: - if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "clblast.dll")) or not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "koboldcpp_clblast.dll")): - print("Warning: clblast.dll or koboldcpp_clblast.dll not found. Non-BLAS library will be used. Ignore this if you have manually linked with CLBlast.") + if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), lib_clblast)) or not (os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "clblast.dll")) and os.name=='nt'): + print("Warning: CLBlast library file not found. Non-BLAS library will be used.") else: - print("Attempting to use CLBlast library for faster prompt ingestion. A compatible clblast.dll will be required.") + print("Attempting to use CLBlast library for faster prompt ingestion. A compatible clblast will be required.") use_clblast = True else: - if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll")) or not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "koboldcpp_openblas.dll")): - print("Warning: libopenblas.dll or koboldcpp_openblas.dll not found. Non-BLAS library will be used.") + if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), lib_openblas)) or not (os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll")) and os.name=='nt'): + print("Warning: OpenBLAS library file not found. Non-BLAS library will be used.") elif args.noblas: print("Attempting to library without OpenBLAS.") else: use_blas = True - print("Attempting to use OpenBLAS library for faster prompt ingestion. A compatible libopenblas.dll will be required.") + print("Attempting to use OpenBLAS library for faster prompt ingestion. A compatible libopenblas will be required.") if args.psutil_set_threads: import psutil diff --git a/make_pyinstaller.bat b/make_pyinstaller.bat index 5029466f4..39523fec0 100644 --- a/make_pyinstaller.bat +++ b/make_pyinstaller.bat @@ -1 +1,11 @@ -PyInstaller --noconfirm --onefile --clean --console --icon "./niko.ico" --add-data "./klite.embd;." --add-data "./koboldcpp.dll;." --add-data "./koboldcpp_openblas.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_openblas_noavx2.dll;." --add-data "./libopenblas.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./clblast.dll;." --add-data "./rwkv_vocab.embd;." "./koboldcpp.py" -n "koboldcpp.exe" \ No newline at end of file +PyInstaller --noconfirm --onefile --clean --console --icon "./niko.ico" \ +--add-data "./klite.embd;." \ +--add-data "./koboldcpp.dll;." \ +--add-data "./koboldcpp_openblas.dll;." \ +--add-data "./koboldcpp_noavx2.dll;." \ +--add-data "./koboldcpp_openblas_noavx2.dll;." \ +--add-data "./libopenblas.dll;." \ +--add-data "./koboldcpp_clblast.dll;." \ +--add-data "./clblast.dll;." \ +--add-data "./rwkv_vocab.embd;." \ +"./koboldcpp.py" -n "koboldcpp.exe" \ No newline at end of file diff --git a/make_pyinstaller.sh b/make_pyinstaller.sh index cf1b720c5..db6f6ad01 100644 --- a/make_pyinstaller.sh +++ b/make_pyinstaller.sh @@ -2,12 +2,10 @@ pyinstaller --noconfirm --onefile --clean --console --icon "./niko.ico" \ --add-data "./klite.embd:." \ ---add-data "./koboldcpp.dll:." \ ---add-data "./ggml_openblas.o:." \ ---add-data "./ggml_noavx2.o:." \ ---add-data "./ggml_openblas_noavx2.o:." \ ---add-data "./libopenblas.dll:." \ ---add-data "./ggml_clblast.o:." \ ---add-data "./clblast.dll:." \ +--add-data "./koboldcpp.so:." \ +--add-data "./koboldcpp_openblas.so:." \ +--add-data "./koboldcpp_noavx2.so:." \ +--add-data "./koboldcpp_openblas_noavx2.so:." \ +--add-data "./koboldcpp_clblast.so:." \ --add-data "./rwkv_vocab.embd:." \ "./koboldcpp.py" -n "koboldcpp"