rebrand to koboldcpp

This commit is contained in:
Concedo 2023-04-03 10:35:18 +08:00
parent 8dd8ab1659
commit eb5b22dda2
9 changed files with 20 additions and 20 deletions

View file

@ -23,4 +23,4 @@ SOFTWARE.
===================================
Note that the above license applies ONLY to the GGML library and llama.cpp by ggerganov which are licensed under the MIT License
Kobold Lite by Concedo and the provided python ctypes bindings in llamacpp.dll are licensed under the AGPL v3.0 License
Kobold Lite by Concedo and the provided python ctypes bindings in koboldcpp.dll are licensed under the AGPL v3.0 License

View file

@ -119,7 +119,7 @@ endif
BLAS_BUILD =
ifeq ($(OS),Windows_NT)
BLAS_BUILD = $(CXX) $(CXXFLAGS) ggml_blas.o ggml_v1.o expose.o common.o llama_adapter.o gptj_adapter.o libopenblas.lib -shared -o llamacpp_blas.dll $(LDFLAGS)
BLAS_BUILD = $(CXX) $(CXXFLAGS) ggml_blas.o ggml_v1.o expose.o common.o llama_adapter.o gptj_adapter.o libopenblas.lib -shared -o koboldcpp_blas.dll $(LDFLAGS)
else
BLAS_BUILD = @echo 'Your OS is $(OS) and does not appear to be Windows. If you want to use openblas, please link it manually with LLAMA_OPENBLAS=1'
endif
@ -170,7 +170,7 @@ gptj_adapter.o:
$(CXX) $(CXXFLAGS) -c gptj_adapter.cpp -o gptj_adapter.o
clean:
rm -vf *.o main quantize perplexity embedding main.exe quantize.exe llamacpp.dll llamacpp_blas.dll gptj.exe
rm -vf *.o main quantize perplexity embedding main.exe quantize.exe koboldcpp.dll koboldcpp_blas.dll gptj.exe
main: examples/main/main.cpp ggml.o llama.o common.o
$(CXX) $(CXXFLAGS) examples/main/main.cpp ggml.o llama.o common.o -o main $(LDFLAGS)
@ -179,7 +179,7 @@ main: examples/main/main.cpp ggml.o llama.o common.o
@echo
llamalib: ggml.o ggml_v1.o expose.o common.o llama_adapter.o gptj_adapter.o
$(CXX) $(CXXFLAGS) ggml.o ggml_v1.o expose.o common.o llama_adapter.o gptj_adapter.o -shared -o llamacpp.dll $(LDFLAGS)
$(CXX) $(CXXFLAGS) ggml.o ggml_v1.o expose.o common.o llama_adapter.o gptj_adapter.o -shared -o koboldcpp.dll $(LDFLAGS)
llamalib_blas: ggml_blas.o ggml_v1.o expose.o common.o llama_adapter.o gptj_adapter.o
$(BLAS_BUILD)

View file

@ -1,4 +1,4 @@
# llamacpp-for-kobold
# koboldcpp (formerly llamacpp-for-kobold)
A self contained distributable from Concedo that exposes llama.cpp function bindings, allowing it to be used via a simulated Kobold API endpoint.
@ -9,18 +9,18 @@ What does it mean? You get llama.cpp with a fancy UI, persistent stories, editin
Now has **BACKWARDS COMPATIBILITY** with ALL 3 versions of GGML LLAMA models, all ALPACA.CPP models, all versions of GPT4ALL.CPP models, and can run GGML older GPT-J.CPP models too. I also use a bunch of tricks to make subsequent prompt processing with shared ancestry much faster than the parent repo does.
## Usage
- [Download the latest release here](https://github.com/LostRuins/llamacpp-for-kobold/releases/latest) or clone the repo.
- Windows binaries are provided in the form of **llamacpp-for-kobold.exe**, which is a pyinstaller wrapper for **llamacpp.dll** and **llamacpp_for_kobold.py**. If you feel concerned, you may prefer to rebuild it yourself with the provided makefiles and scripts.
- [Download the latest release here](https://github.com/LostRuins/koboldcpp/releases/latest) or clone the repo.
- Windows binaries are provided in the form of **koboldcpp.exe**, which is a pyinstaller wrapper for **koboldcpp.dll** and **koboldcpp.py**. If you feel concerned, you may prefer to rebuild it yourself with the provided makefiles and scripts.
- Weights are not included, you can use the `quantize.exe` to generate them from your official weight files (or download them from other places).
- To run, execute **llamacpp-for-kobold.exe** or drag and drop your quantized `ggml_model.bin` file onto the .exe, and then connect with Kobold or Kobold Lite.
- To run, execute **koboldcpp.exe** or drag and drop your quantized `ggml_model.bin` file onto the .exe, and then connect with Kobold or Kobold Lite.
- By default, you can connect to http://localhost:5001
- You can also run it using the command line `llamacpp-for-kobold.exe [ggml_model.bin] [port]`. For info, please check `llamacpp-for-kobold.exe --help`
- You can also run it using the command line `koboldcpp.exe [ggml_model.bin] [port]`. For info, please check `koboldcpp.exe --help`
- If you are having crashes or issues with OpenBLAS, please try the `--noblas` flag.
## OSX and Linux
- You will have to compile your binaries from source. A makefile is provided, simply run `make`
- If you want you can also link your own install of OpenBLAS manually with `make LLAMA_OPENBLAS=1`
- After all binaries are built, you can run the python script with the command `llamacpp_for_kobold.py [ggml_model.bin] [port]`
- After all binaries are built, you can run the python script with the command `koboldcpp.py [ggml_model.bin] [port]`
## Considerations
- Don't want to use pybind11 due to dependencies on MSVCC
@ -33,7 +33,7 @@ Now has **BACKWARDS COMPATIBILITY** with ALL 3 versions of GGML LLAMA models, al
## License
- The original GGML library and llama.cpp by ggerganov are licensed under the MIT License
- However, Kobold Lite is licensed under the AGPL v3.0 License
- The provided python ctypes bindings in llamacpp.dll are also under the AGPL v3.0 License
- The provided python ctypes bindings in koboldcpp.dll are also under the AGPL v3.0 License
## Notes
- Generation delay scales linearly with original prompt length. See [this discussion](https://github.com/ggerganov/llama.cpp/discussions/229). If OpenBLAS is enabled then prompt ingestion becomes about 2-3x faster. This is automatic on windows, but will require linking on OSX and Linux.

View file

@ -1,7 +1,7 @@
<!--
An embedded version of Kobold Lite for use in llamacpp-for-kobold
An embedded version of Kobold Lite for use in koboldcpp
Current version: 17
Kobold Lite is under the AGPL v3.0 License for the purposes of llamacpp-for-kobold. Please do not remove this line.
Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp. Please do not remove this line.
- Concedo
-->

Binary file not shown.

View file

@ -39,13 +39,13 @@ def init_library():
global handle, use_blas
libname = ""
if use_blas:
libname = "llamacpp_blas.dll"
libname = "koboldcpp_blas.dll"
else:
libname = "llamacpp.dll"
libname = "koboldcpp.dll"
print("Initializing dynamic library: " + libname)
dir_path = os.path.dirname(os.path.realpath(__file__))
#OpenBLAS should provide about a 2x speedup on prompt ingestion if compatible.
handle = ctypes.CDLL(os.path.join(dir_path, libname ))
@ -86,7 +86,7 @@ def generate(prompt,max_length=20, max_context_length=512,temperature=0.8,top_k=
### A hacky simple HTTP server simulating a kobold api by Concedo
### we are intentionally NOT using flask, because we want MINIMAL dependencies
#################################################################
friendlymodelname = "concedo/llamacpp" # local kobold api apparently needs a hardcoded known HF model name
friendlymodelname = "concedo/koboldcpp" # local kobold api apparently needs a hardcoded known HF model name
maxctx = 2048
maxlen = 128
modelbusy = False
@ -292,8 +292,8 @@ def RunServerMultiThreaded(addr, port, embedded_kailite = None):
def main(args):
global use_blas
if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll")) or not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "llamacpp_blas.dll")):
print("Warning: libopenblas.dll or llamacpp_blas.dll not found. Non-BLAS library will be used. Ignore this if you have manually linked with OpenBLAS.")
if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll")) or not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "koboldcpp_blas.dll")):
print("Warning: libopenblas.dll or koboldcpp_blas.dll not found. Non-BLAS library will be used. Ignore this if you have manually linked with OpenBLAS.")
use_blas = False
elif os.name != 'nt':
print("Prebuilt OpenBLAS binaries only available for windows. Please manually build/link libopenblas from makefile with LLAMA_OPENBLAS=1")

View file

@ -1 +1 @@
pyinstaller --noconfirm --onefile --clean --console --icon "./niko.ico" --add-data "./klite.embd;." --add-data "./llamacpp.dll;." --add-data "./llamacpp_blas.dll;." --add-data "./libopenblas.dll;." "./llamacpp_for_kobold.py" -n "llamacpp-for-kobold.exe"
pyinstaller --noconfirm --onefile --clean --console --icon "./niko.ico" --add-data "./klite.embd;." --add-data "./koboldcpp.dll;." --add-data "./koboldcpp_blas.dll;." --add-data "./libopenblas.dll;." "./koboldcpp.py" -n "koboldcpp.exe"

BIN
niko.ico

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

After

Width:  |  Height:  |  Size: 15 KiB

Before After
Before After