From cd4012c3ed7f9020cf4bae70a7d85f20334aeb33 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Tue, 23 May 2023 21:31:42 +0800 Subject: [PATCH] minor fixes to debug logging, fixed a typo, added a new failsafe mode --- .gitignore | 4 +-- Makefile | 72 +++++++++++++++++++++++--------------------- gpttype_adapter.cpp | 20 ++++++++++-- koboldcpp.py | 16 +++++----- make_pyinstaller.bat | 2 +- make_pyinstaller.sh | 2 +- 6 files changed, 69 insertions(+), 47 deletions(-) diff --git a/.gitignore b/.gitignore index 4bfed9c80..c75e1cb5b 100644 --- a/.gitignore +++ b/.gitignore @@ -51,12 +51,12 @@ perf-*.txt examples/jeopardy/results.txt koboldcpp.so -koboldcpp_noavx2.so +koboldcpp_failsafe.so koboldcpp_openblas.so koboldcpp_openblas_noavx2.so koboldcpp_clblast.so koboldcpp.dll -koboldcpp_noavx2.dll +koboldcpp_failsafe.dll koboldcpp_openblas.dll koboldcpp_openblas_noavx2.dll koboldcpp_clblast.dll \ No newline at end of file diff --git a/Makefile b/Makefile index 12702faa6..d2fb3e704 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ -default: koboldcpp koboldcpp_noavx2 koboldcpp_openblas koboldcpp_openblas_noavx2 koboldcpp_clblast -simple: koboldcpp koboldcpp_noavx2 +default: koboldcpp koboldcpp_failsafe koboldcpp_openblas koboldcpp_openblas_noavx2 koboldcpp_clblast +simple: koboldcpp koboldcpp_failsafe tools: quantize_gpt2 quantize_gptj quantize_llama quantize_neox dev: koboldcpp_openblas dev2: koboldcpp_clblast @@ -47,8 +47,9 @@ CXXFLAGS = -I. -I./examples -I./include -I./include/CL -I./otherarch -I./otherar LDFLAGS = # these are used on windows, to build some libraries with extra old device compatibility -BONUSCFLAGS1 = -BONUSCFLAGS2 = +SIMPLECFLAGS = +FULLCFLAGS = +NONECFLAGS = OPENBLAS_FLAGS = -DGGML_USE_OPENBLAS -I/usr/local/include/openblas CLBLAST_FLAGS = -DGGML_USE_CLBLAST @@ -101,9 +102,10 @@ ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686)) # Use all CPU extensions that are available: # old library NEEDS mf16c to work. so we must build with it. new one doesnt ifeq ($(OS),Windows_NT) - CFLAGS += -mavx - BONUSCFLAGS1 += -mf16c - BONUSCFLAGS2 += -mavx2 -msse3 -mfma + CFLAGS += + NONECFLAGS += -mno-sse3 + SIMPLECFLAGS += -mavx -msse3 + FULLCFLAGS += -mavx2 -msse3 -mfma -mf16c -mavx else # if not on windows, they are clearly building it themselves, so lets just use whatever is supported CFLAGS += -march=native -mtune=native @@ -168,20 +170,20 @@ ifneq ($(filter armv8%,$(UNAME_M)),) endif DEFAULT_BUILD = -NOAVX2_BUILD = +FAILSAFE_BUILD = OPENBLAS_BUILD = OPENBLAS_NOAVX2_BUILD = CLBLAST_BUILD = ifeq ($(OS),Windows_NT) DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.dll $(LDFLAGS) - NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.dll $(LDFLAGS) + FAILSAFE_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.dll $(LDFLAGS) OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ lib/libopenblas.lib -shared -o $@.dll $(LDFLAGS) OPENBLAS_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ lib/libopenblas.lib -shared -o $@.dll $(LDFLAGS) CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ lib/OpenCL.lib lib/clblast.lib -shared -o $@.dll $(LDFLAGS) else DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS) - NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS) + FAILSAFE_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS) ifdef LLAMA_OPENBLAS OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS) OPENBLAS_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS) @@ -217,40 +219,42 @@ $(info ) # ggml.o: ggml.c ggml.h - $(CC) $(CFLAGS) $(BONUSCFLAGS1) $(BONUSCFLAGS2) -c $< -o $@ + $(CC) $(CFLAGS) $(FULLCFLAGS) -c $< -o $@ ggml_openblas.o: ggml.c ggml.h - $(CC) $(CFLAGS) $(BONUSCFLAGS1) $(BONUSCFLAGS2) $(OPENBLAS_FLAGS) -c $< -o $@ -ggml_noavx2.o: ggml.c ggml.h - $(CC) $(CFLAGS) -c $< -o $@ + $(CC) $(CFLAGS) $(FULLCFLAGS) $(OPENBLAS_FLAGS) -c $< -o $@ +ggml_failsafe.o: ggml.c ggml.h + $(CC) $(CFLAGS) $(NONECFLAGS) -c $< -o $@ ggml_openblas_noavx2.o: ggml.c ggml.h - $(CC) $(CFLAGS) $(OPENBLAS_FLAGS) -c $< -o $@ + $(CC) $(CFLAGS) $(SIMPLECFLAGS) $(OPENBLAS_FLAGS) -c $< -o $@ ggml_clblast.o: ggml.c ggml.h - $(CC) $(CFLAGS) $(BONUSCFLAGS1) $(BONUSCFLAGS2) $(CLBLAST_FLAGS) -c $< -o $@ -ggml-opencl.o: ggml-opencl.cpp ggml-opencl.h - $(CXX) $(CXXFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ + $(CC) $(CFLAGS) $(FULLCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ #version 2 libs ggml_v2.o: otherarch/ggml_v2.c otherarch/ggml_v2.h - $(CC) $(CFLAGS) $(BONUSCFLAGS1) $(BONUSCFLAGS2) -c $< -o $@ + $(CC) $(CFLAGS) $(FULLCFLAGS) -c $< -o $@ ggml_v2_openblas.o: otherarch/ggml_v2.c otherarch/ggml_v2.h - $(CC) $(CFLAGS) $(BONUSCFLAGS1) $(BONUSCFLAGS2) $(OPENBLAS_FLAGS) -c $< -o $@ -ggml_v2_noavx2.o: otherarch/ggml_v2.c otherarch/ggml_v2.h - $(CC) $(CFLAGS) -c $< -o $@ + $(CC) $(CFLAGS) $(FULLCFLAGS) $(OPENBLAS_FLAGS) -c $< -o $@ +ggml_v2_failsafe.o: otherarch/ggml_v2.c otherarch/ggml_v2.h + $(CC) $(CFLAGS) $(NONECFLAGS) -c $< -o $@ ggml_v2_openblas_noavx2.o: otherarch/ggml_v2.c otherarch/ggml_v2.h - $(CC) $(CFLAGS) $(OPENBLAS_FLAGS) -c $< -o $@ + $(CC) $(CFLAGS) $(SIMPLECFLAGS) $(OPENBLAS_FLAGS) -c $< -o $@ ggml_v2_clblast.o: otherarch/ggml_v2.c otherarch/ggml_v2.h - $(CC) $(CFLAGS) $(BONUSCFLAGS1) $(BONUSCFLAGS2) $(CLBLAST_FLAGS) -c $< -o $@ + $(CC) $(CFLAGS) $(FULLCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ + +#extreme old version compat +ggml_v1.o: otherarch/ggml_v1.c otherarch/ggml_v1.h + $(CC) $(CFLAGS) $(FULLCFLAGS) -c $< -o $@ +ggml_v1_failsafe.o: otherarch/ggml_v1.c otherarch/ggml_v1.h + $(CC) $(CFLAGS) $(NONECFLAGS) -c $< -o $@ + +#opencl +ggml-opencl.o: ggml-opencl.cpp ggml-opencl.h + $(CXX) $(CXXFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ ggml_v2-opencl.o: otherarch/ggml_v2-opencl.cpp otherarch/ggml_v2-opencl.h $(CXX) $(CXXFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ ggml_v2-opencl-legacy.o: otherarch/ggml_v2-opencl-legacy.c otherarch/ggml_v2-opencl-legacy.h $(CC) $(CFLAGS) -c $< -o $@ -#extreme old version compat -ggml_v1.o: otherarch/ggml_v1.c otherarch/ggml_v1.h - $(CC) $(CFLAGS) $(BONUSCFLAGS1) $(BONUSCFLAGS2) -c $< -o $@ -ggml_v1_noavx2.o: otherarch/ggml_v1.c otherarch/ggml_v1.h - $(CC) $(CFLAGS) $(BONUSCFLAGS1) -c $< -o $@ - # intermediate objects llama.o: llama.cpp llama.h llama-util.h $(CXX) $(CXXFLAGS) -c $< -o $@ @@ -264,7 +268,7 @@ gpttype_adapter_clblast.o: gpttype_adapter.cpp $(CXX) $(CXXFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ clean: - rm -vf *.o main quantize_llama quantize_gpt2 quantize_gptj quantize_neox quantize-stats perplexity embedding benchmark-matmult save-load-state main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe koboldcpp.dll koboldcpp_openblas.dll koboldcpp_noavx2.dll koboldcpp_openblas_noavx2.dll koboldcpp_clblast.dll koboldcpp.so koboldcpp_openblas.so koboldcpp_noavx2.so koboldcpp_openblas_noavx2.so koboldcpp_clblast.so gptj.exe gpt2.exe + rm -vf *.o main quantize_llama quantize_gpt2 quantize_gptj quantize_neox quantize-stats perplexity embedding benchmark-matmult save-load-state main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe koboldcpp.dll koboldcpp_openblas.dll koboldcpp_failsafe.dll koboldcpp_openblas_noavx2.dll koboldcpp_clblast.dll koboldcpp.so koboldcpp_openblas.so koboldcpp_failsafe.so koboldcpp_openblas_noavx2.so koboldcpp_clblast.so gptj.exe gpt2.exe main: examples/main/main.cpp build-info.h ggml.o llama.o common.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) @@ -276,9 +280,9 @@ koboldcpp: ggml.o ggml_v2.o ggml_v1.o expose.o common.o gpttype_adapter.o $(OBJS $(DEFAULT_BUILD) koboldcpp_openblas: ggml_openblas.o ggml_v2_openblas.o ggml_v1.o expose.o common.o gpttype_adapter.o $(OPENBLAS_BUILD) -koboldcpp_noavx2: ggml_noavx2.o ggml_v2_noavx2.o ggml_v1_noavx2.o expose.o common.o gpttype_adapter.o - $(NOAVX2_BUILD) -koboldcpp_openblas_noavx2: ggml_openblas_noavx2.o ggml_v2_openblas_noavx2.o ggml_v1_noavx2.o expose.o common.o gpttype_adapter.o +koboldcpp_failsafe: ggml_failsafe.o ggml_v2_failsafe.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter.o + $(FAILSAFE_BUILD) +koboldcpp_openblas_noavx2: ggml_openblas_noavx2.o ggml_v2_openblas_noavx2.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter.o $(OPENBLAS_NOAVX2_BUILD) koboldcpp_clblast: ggml_clblast.o ggml_v2_clblast.o ggml_v1.o expose.o common.o gpttype_adapter_clblast.o ggml-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o $(CLBLAST_BUILD) diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index e5d0e7675..54b3b5521 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -797,13 +797,19 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o if(debugmode) { - printf("\n[Debug: Dump Input Tokens: %d]\n",file_format); + printf("\n[Debug: Dump Input Tokens, format: %d]\n",file_format); if (file_format == FileFormat::GGML || file_format == FileFormat::GGHF || file_format == FileFormat::GGJT || file_format == FileFormat::GGJT_2) { for (auto id : embd_inp) { printf("'%s (%d)', ",llama_v2_token_to_str(llama_ctx_v2, id),id); } + + printf("\n\n[Debug: Context Size = %d]\n",current_context_tokens.size()); + for (auto id : current_context_tokens) + { + printf("'%s (%d)', ",llama_v2_token_to_str(llama_ctx_v2, id),id); + } } else if (file_format == FileFormat::GGJT_3) { @@ -811,6 +817,11 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o { printf("'%s (%d)', ",llama_token_to_str(llama_ctx_v3, id),id); } + printf("\n\n[Debug: Context Size = %d]\n",current_context_tokens.size()); + for (auto id : current_context_tokens) + { + printf("'%s (%d)', ",llama_token_to_str(llama_ctx_v3, id),id); + } } else { @@ -818,8 +829,13 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o { printf("'%s (%d)', ",vocab.id_to_token[id].c_str(),id); } + printf("\n\n[Debug: Context Size = %d]\n",current_context_tokens.size()); + for (auto id : current_context_tokens) + { + printf("'%s (%d)', ",vocab.id_to_token[id].c_str(),id); + } } - printf("\n"); + printf("\n\n"); } while (remaining_tokens > 0) diff --git a/koboldcpp.py b/koboldcpp.py index c281d33dc..704d6f1bf 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -69,7 +69,7 @@ def pick_existant_file(ntoption,nonntoption): return nonntoption lib_default = pick_existant_file("koboldcpp.dll","koboldcpp.so") -lib_noavx2 = pick_existant_file("koboldcpp_noavx2.dll","koboldcpp_noavx2.so") +lib_failsafe = pick_existant_file("koboldcpp_failsafe.dll","koboldcpp_failsafe.so") lib_openblas = pick_existant_file("koboldcpp_openblas.dll","koboldcpp_openblas.so") lib_openblas_noavx2 = pick_existant_file("koboldcpp_openblas_noavx2.dll","koboldcpp_openblas_noavx2.so") lib_clblast = pick_existant_file("koboldcpp_clblast.dll","koboldcpp_clblast.so") @@ -77,7 +77,7 @@ lib_clblast = pick_existant_file("koboldcpp_clblast.dll","koboldcpp_clblast.so") def init_library(): global handle - global lib_default,lib_noavx2,lib_openblas,lib_openblas_noavx2,lib_clblast + global lib_default,lib_failsafe,lib_openblas,lib_openblas_noavx2,lib_clblast libname = "" use_blas = False # if true, uses OpenBLAS for acceleration. libopenblas.dll must exist in the same dir. @@ -89,7 +89,7 @@ def init_library(): if not file_exists(lib_openblas_noavx2) or (os.name=='nt' and not file_exists("libopenblas.dll")): print("Warning: OpenBLAS library file not found. Non-BLAS library will be used.") elif args.noblas: - print("Attempting to use non-avx2 compatibility library without OpenBLAS.") + print("!!! Attempting to use FAILSAFE MODE !!!") else: use_blas = True print("Attempting to use non-avx2 compatibility library with OpenBLAS. A compatible libopenblas will be required.") @@ -114,7 +114,9 @@ def init_library(): if use_blas: libname = lib_openblas_noavx2 else: - libname = lib_noavx2 + libname = lib_failsafe + args.nommap = True + print("[Failsafe Mode : mmap is disabled.]") else: if use_clblast: libname = lib_clblast @@ -451,7 +453,7 @@ def show_gui(): font = ("Arial", 9)).pack() - opts = ["Use OpenBLAS","Use CLBLast GPU #1","Use CLBLast GPU #2","Use CLBLast GPU #3","Use No BLAS","Use OpenBLAS (Old Devices)","Use No BLAS (Old Devices)"] + opts = ["Use OpenBLAS","Use CLBLast GPU #1","Use CLBLast GPU #2","Use CLBLast GPU #3","Use No BLAS","Use OpenBLAS (Old CPU, noavx2)","Failsafe Mode (Old CPU, noavx)"] runchoice = tk.StringVar() runchoice.set("Use OpenBLAS") tk.OptionMenu( root , runchoice , *opts ).pack() @@ -493,9 +495,9 @@ def show_gui(): if selchoice==opts[4]: args.noblas = True if selchoice==opts[5]: - args.nonoavx2 = True + args.noavx2 = True if selchoice==opts[6]: - args.nonoavx2 = True + args.noavx2 = True args.noblas = True root = tk.Tk() diff --git a/make_pyinstaller.bat b/make_pyinstaller.bat index 5029466f4..611ebd952 100644 --- a/make_pyinstaller.bat +++ b/make_pyinstaller.bat @@ -1 +1 @@ -PyInstaller --noconfirm --onefile --clean --console --icon "./niko.ico" --add-data "./klite.embd;." --add-data "./koboldcpp.dll;." --add-data "./koboldcpp_openblas.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_openblas_noavx2.dll;." --add-data "./libopenblas.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./clblast.dll;." --add-data "./rwkv_vocab.embd;." "./koboldcpp.py" -n "koboldcpp.exe" \ No newline at end of file +PyInstaller --noconfirm --onefile --clean --console --icon "./niko.ico" --add-data "./klite.embd;." --add-data "./koboldcpp.dll;." --add-data "./koboldcpp_openblas.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_openblas_noavx2.dll;." --add-data "./libopenblas.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./clblast.dll;." --add-data "./rwkv_vocab.embd;." "./koboldcpp.py" -n "koboldcpp.exe" \ No newline at end of file diff --git a/make_pyinstaller.sh b/make_pyinstaller.sh index db6f6ad01..43ee95b98 100644 --- a/make_pyinstaller.sh +++ b/make_pyinstaller.sh @@ -4,7 +4,7 @@ pyinstaller --noconfirm --onefile --clean --console --icon "./niko.ico" \ --add-data "./klite.embd:." \ --add-data "./koboldcpp.so:." \ --add-data "./koboldcpp_openblas.so:." \ ---add-data "./koboldcpp_noavx2.so:." \ +--add-data "./koboldcpp_failsafe.so:." \ --add-data "./koboldcpp_openblas_noavx2.so:." \ --add-data "./koboldcpp_clblast.so:." \ --add-data "./rwkv_vocab.embd:." \