diff --git a/MIT_LICENSE_GGML_LLAMACPP_ONLY b/MIT_LICENSE_GGML_LLAMACPP_ONLY index 27872a398..252a81b36 100644 --- a/MIT_LICENSE_GGML_LLAMACPP_ONLY +++ b/MIT_LICENSE_GGML_LLAMACPP_ONLY @@ -23,4 +23,4 @@ SOFTWARE. =================================== Note that the above license applies ONLY to the GGML library and llama.cpp by ggerganov which are licensed under the MIT License -Kobold Lite by Concedo and the provided python ctypes bindings in llamacpp.dll are licensed under the AGPL v3.0 License \ No newline at end of file +Kobold Lite by Concedo and the provided python ctypes bindings in koboldcpp.dll are licensed under the AGPL v3.0 License \ No newline at end of file diff --git a/Makefile b/Makefile index f0a443c0a..06a4b5370 100644 --- a/Makefile +++ b/Makefile @@ -119,7 +119,7 @@ endif BLAS_BUILD = ifeq ($(OS),Windows_NT) - BLAS_BUILD = $(CXX) $(CXXFLAGS) ggml_blas.o ggml_v1.o expose.o common.o llama_adapter.o gptj_adapter.o libopenblas.lib -shared -o llamacpp_blas.dll $(LDFLAGS) + BLAS_BUILD = $(CXX) $(CXXFLAGS) ggml_blas.o ggml_v1.o expose.o common.o llama_adapter.o gptj_adapter.o libopenblas.lib -shared -o koboldcpp_blas.dll $(LDFLAGS) else BLAS_BUILD = @echo 'Your OS is $(OS) and does not appear to be Windows. If you want to use openblas, please link it manually with LLAMA_OPENBLAS=1' endif @@ -170,7 +170,7 @@ gptj_adapter.o: $(CXX) $(CXXFLAGS) -c gptj_adapter.cpp -o gptj_adapter.o clean: - rm -vf *.o main quantize perplexity embedding main.exe quantize.exe llamacpp.dll llamacpp_blas.dll gptj.exe + rm -vf *.o main quantize perplexity embedding main.exe quantize.exe koboldcpp.dll koboldcpp_blas.dll gptj.exe main: examples/main/main.cpp ggml.o llama.o common.o $(CXX) $(CXXFLAGS) examples/main/main.cpp ggml.o llama.o common.o -o main $(LDFLAGS) @@ -179,7 +179,7 @@ main: examples/main/main.cpp ggml.o llama.o common.o @echo llamalib: ggml.o ggml_v1.o expose.o common.o llama_adapter.o gptj_adapter.o - $(CXX) $(CXXFLAGS) ggml.o ggml_v1.o expose.o common.o llama_adapter.o gptj_adapter.o -shared -o llamacpp.dll $(LDFLAGS) + $(CXX) $(CXXFLAGS) ggml.o ggml_v1.o expose.o common.o llama_adapter.o gptj_adapter.o -shared -o koboldcpp.dll $(LDFLAGS) llamalib_blas: ggml_blas.o ggml_v1.o expose.o common.o llama_adapter.o gptj_adapter.o $(BLAS_BUILD) diff --git a/README.md b/README.md index 849cb0944..13ae1f770 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# llamacpp-for-kobold +# koboldcpp (formerly llamacpp-for-kobold) A self contained distributable from Concedo that exposes llama.cpp function bindings, allowing it to be used via a simulated Kobold API endpoint. @@ -9,18 +9,18 @@ What does it mean? You get llama.cpp with a fancy UI, persistent stories, editin Now has **BACKWARDS COMPATIBILITY** with ALL 3 versions of GGML LLAMA models, all ALPACA.CPP models, all versions of GPT4ALL.CPP models, and can run GGML older GPT-J.CPP models too. I also use a bunch of tricks to make subsequent prompt processing with shared ancestry much faster than the parent repo does. ## Usage -- [Download the latest release here](https://github.com/LostRuins/llamacpp-for-kobold/releases/latest) or clone the repo. -- Windows binaries are provided in the form of **llamacpp-for-kobold.exe**, which is a pyinstaller wrapper for **llamacpp.dll** and **llamacpp_for_kobold.py**. If you feel concerned, you may prefer to rebuild it yourself with the provided makefiles and scripts. +- [Download the latest release here](https://github.com/LostRuins/koboldcpp/releases/latest) or clone the repo. +- Windows binaries are provided in the form of **koboldcpp.exe**, which is a pyinstaller wrapper for **koboldcpp.dll** and **koboldcpp.py**. If you feel concerned, you may prefer to rebuild it yourself with the provided makefiles and scripts. - Weights are not included, you can use the `quantize.exe` to generate them from your official weight files (or download them from other places). -- To run, execute **llamacpp-for-kobold.exe** or drag and drop your quantized `ggml_model.bin` file onto the .exe, and then connect with Kobold or Kobold Lite. +- To run, execute **koboldcpp.exe** or drag and drop your quantized `ggml_model.bin` file onto the .exe, and then connect with Kobold or Kobold Lite. - By default, you can connect to http://localhost:5001 -- You can also run it using the command line `llamacpp-for-kobold.exe [ggml_model.bin] [port]`. For info, please check `llamacpp-for-kobold.exe --help` +- You can also run it using the command line `koboldcpp.exe [ggml_model.bin] [port]`. For info, please check `koboldcpp.exe --help` - If you are having crashes or issues with OpenBLAS, please try the `--noblas` flag. ## OSX and Linux - You will have to compile your binaries from source. A makefile is provided, simply run `make` - If you want you can also link your own install of OpenBLAS manually with `make LLAMA_OPENBLAS=1` -- After all binaries are built, you can run the python script with the command `llamacpp_for_kobold.py [ggml_model.bin] [port]` +- After all binaries are built, you can run the python script with the command `koboldcpp.py [ggml_model.bin] [port]` ## Considerations - Don't want to use pybind11 due to dependencies on MSVCC @@ -33,7 +33,7 @@ Now has **BACKWARDS COMPATIBILITY** with ALL 3 versions of GGML LLAMA models, al ## License - The original GGML library and llama.cpp by ggerganov are licensed under the MIT License - However, Kobold Lite is licensed under the AGPL v3.0 License -- The provided python ctypes bindings in llamacpp.dll are also under the AGPL v3.0 License +- The provided python ctypes bindings in koboldcpp.dll are also under the AGPL v3.0 License ## Notes - Generation delay scales linearly with original prompt length. See [this discussion](https://github.com/ggerganov/llama.cpp/discussions/229). If OpenBLAS is enabled then prompt ingestion becomes about 2-3x faster. This is automatic on windows, but will require linking on OSX and Linux. diff --git a/klite.embd b/klite.embd index 78234a009..10dc0b590 100644 --- a/klite.embd +++ b/klite.embd @@ -1,7 +1,7 @@ diff --git a/llamacpp.dll b/koboldcpp.dll similarity index 97% rename from llamacpp.dll rename to koboldcpp.dll index f2018ac29..bc3aac21f 100644 Binary files a/llamacpp.dll and b/koboldcpp.dll differ diff --git a/llamacpp_for_kobold.py b/koboldcpp.py similarity index 97% rename from llamacpp_for_kobold.py rename to koboldcpp.py index 063bf9bdc..a8d233d93 100644 --- a/llamacpp_for_kobold.py +++ b/koboldcpp.py @@ -39,13 +39,13 @@ def init_library(): global handle, use_blas libname = "" if use_blas: - libname = "llamacpp_blas.dll" + libname = "koboldcpp_blas.dll" else: - libname = "llamacpp.dll" + libname = "koboldcpp.dll" print("Initializing dynamic library: " + libname) dir_path = os.path.dirname(os.path.realpath(__file__)) - + #OpenBLAS should provide about a 2x speedup on prompt ingestion if compatible. handle = ctypes.CDLL(os.path.join(dir_path, libname )) @@ -86,7 +86,7 @@ def generate(prompt,max_length=20, max_context_length=512,temperature=0.8,top_k= ### A hacky simple HTTP server simulating a kobold api by Concedo ### we are intentionally NOT using flask, because we want MINIMAL dependencies ################################################################# -friendlymodelname = "concedo/llamacpp" # local kobold api apparently needs a hardcoded known HF model name +friendlymodelname = "concedo/koboldcpp" # local kobold api apparently needs a hardcoded known HF model name maxctx = 2048 maxlen = 128 modelbusy = False @@ -292,8 +292,8 @@ def RunServerMultiThreaded(addr, port, embedded_kailite = None): def main(args): global use_blas - if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll")) or not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "llamacpp_blas.dll")): - print("Warning: libopenblas.dll or llamacpp_blas.dll not found. Non-BLAS library will be used. Ignore this if you have manually linked with OpenBLAS.") + if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll")) or not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "koboldcpp_blas.dll")): + print("Warning: libopenblas.dll or koboldcpp_blas.dll not found. Non-BLAS library will be used. Ignore this if you have manually linked with OpenBLAS.") use_blas = False elif os.name != 'nt': print("Prebuilt OpenBLAS binaries only available for windows. Please manually build/link libopenblas from makefile with LLAMA_OPENBLAS=1") diff --git a/llamacpp_blas.dll b/koboldcpp_blas.dll similarity index 96% rename from llamacpp_blas.dll rename to koboldcpp_blas.dll index 2be1b3d5d..672e16d7a 100644 Binary files a/llamacpp_blas.dll and b/koboldcpp_blas.dll differ diff --git a/make_pyinstaller.bat b/make_pyinstaller.bat index fc91fd028..240b303e1 100644 --- a/make_pyinstaller.bat +++ b/make_pyinstaller.bat @@ -1 +1 @@ -pyinstaller --noconfirm --onefile --clean --console --icon "./niko.ico" --add-data "./klite.embd;." --add-data "./llamacpp.dll;." --add-data "./llamacpp_blas.dll;." --add-data "./libopenblas.dll;." "./llamacpp_for_kobold.py" -n "llamacpp-for-kobold.exe" \ No newline at end of file +pyinstaller --noconfirm --onefile --clean --console --icon "./niko.ico" --add-data "./klite.embd;." --add-data "./koboldcpp.dll;." --add-data "./koboldcpp_blas.dll;." --add-data "./libopenblas.dll;." "./koboldcpp.py" -n "koboldcpp.exe" \ No newline at end of file diff --git a/niko.ico b/niko.ico index 3b84754ff..f0b191e4d 100644 Binary files a/niko.ico and b/niko.ico differ