rebrand to koboldcpp
This commit is contained in:
parent
8dd8ab1659
commit
eb5b22dda2
9 changed files with 20 additions and 20 deletions
|
@ -23,4 +23,4 @@ SOFTWARE.
|
|||
===================================
|
||||
|
||||
Note that the above license applies ONLY to the GGML library and llama.cpp by ggerganov which are licensed under the MIT License
|
||||
Kobold Lite by Concedo and the provided python ctypes bindings in llamacpp.dll are licensed under the AGPL v3.0 License
|
||||
Kobold Lite by Concedo and the provided python ctypes bindings in koboldcpp.dll are licensed under the AGPL v3.0 License
|
6
Makefile
6
Makefile
|
@ -119,7 +119,7 @@ endif
|
|||
|
||||
BLAS_BUILD =
|
||||
ifeq ($(OS),Windows_NT)
|
||||
BLAS_BUILD = $(CXX) $(CXXFLAGS) ggml_blas.o ggml_v1.o expose.o common.o llama_adapter.o gptj_adapter.o libopenblas.lib -shared -o llamacpp_blas.dll $(LDFLAGS)
|
||||
BLAS_BUILD = $(CXX) $(CXXFLAGS) ggml_blas.o ggml_v1.o expose.o common.o llama_adapter.o gptj_adapter.o libopenblas.lib -shared -o koboldcpp_blas.dll $(LDFLAGS)
|
||||
else
|
||||
BLAS_BUILD = @echo 'Your OS is $(OS) and does not appear to be Windows. If you want to use openblas, please link it manually with LLAMA_OPENBLAS=1'
|
||||
endif
|
||||
|
@ -170,7 +170,7 @@ gptj_adapter.o:
|
|||
$(CXX) $(CXXFLAGS) -c gptj_adapter.cpp -o gptj_adapter.o
|
||||
|
||||
clean:
|
||||
rm -vf *.o main quantize perplexity embedding main.exe quantize.exe llamacpp.dll llamacpp_blas.dll gptj.exe
|
||||
rm -vf *.o main quantize perplexity embedding main.exe quantize.exe koboldcpp.dll koboldcpp_blas.dll gptj.exe
|
||||
|
||||
main: examples/main/main.cpp ggml.o llama.o common.o
|
||||
$(CXX) $(CXXFLAGS) examples/main/main.cpp ggml.o llama.o common.o -o main $(LDFLAGS)
|
||||
|
@ -179,7 +179,7 @@ main: examples/main/main.cpp ggml.o llama.o common.o
|
|||
@echo
|
||||
|
||||
llamalib: ggml.o ggml_v1.o expose.o common.o llama_adapter.o gptj_adapter.o
|
||||
$(CXX) $(CXXFLAGS) ggml.o ggml_v1.o expose.o common.o llama_adapter.o gptj_adapter.o -shared -o llamacpp.dll $(LDFLAGS)
|
||||
$(CXX) $(CXXFLAGS) ggml.o ggml_v1.o expose.o common.o llama_adapter.o gptj_adapter.o -shared -o koboldcpp.dll $(LDFLAGS)
|
||||
|
||||
llamalib_blas: ggml_blas.o ggml_v1.o expose.o common.o llama_adapter.o gptj_adapter.o
|
||||
$(BLAS_BUILD)
|
||||
|
|
14
README.md
14
README.md
|
@ -1,4 +1,4 @@
|
|||
# llamacpp-for-kobold
|
||||
# koboldcpp (formerly llamacpp-for-kobold)
|
||||
|
||||
A self contained distributable from Concedo that exposes llama.cpp function bindings, allowing it to be used via a simulated Kobold API endpoint.
|
||||
|
||||
|
@ -9,18 +9,18 @@ What does it mean? You get llama.cpp with a fancy UI, persistent stories, editin
|
|||
Now has **BACKWARDS COMPATIBILITY** with ALL 3 versions of GGML LLAMA models, all ALPACA.CPP models, all versions of GPT4ALL.CPP models, and can run GGML older GPT-J.CPP models too. I also use a bunch of tricks to make subsequent prompt processing with shared ancestry much faster than the parent repo does.
|
||||
|
||||
## Usage
|
||||
- [Download the latest release here](https://github.com/LostRuins/llamacpp-for-kobold/releases/latest) or clone the repo.
|
||||
- Windows binaries are provided in the form of **llamacpp-for-kobold.exe**, which is a pyinstaller wrapper for **llamacpp.dll** and **llamacpp_for_kobold.py**. If you feel concerned, you may prefer to rebuild it yourself with the provided makefiles and scripts.
|
||||
- [Download the latest release here](https://github.com/LostRuins/koboldcpp/releases/latest) or clone the repo.
|
||||
- Windows binaries are provided in the form of **koboldcpp.exe**, which is a pyinstaller wrapper for **koboldcpp.dll** and **koboldcpp.py**. If you feel concerned, you may prefer to rebuild it yourself with the provided makefiles and scripts.
|
||||
- Weights are not included, you can use the `quantize.exe` to generate them from your official weight files (or download them from other places).
|
||||
- To run, execute **llamacpp-for-kobold.exe** or drag and drop your quantized `ggml_model.bin` file onto the .exe, and then connect with Kobold or Kobold Lite.
|
||||
- To run, execute **koboldcpp.exe** or drag and drop your quantized `ggml_model.bin` file onto the .exe, and then connect with Kobold or Kobold Lite.
|
||||
- By default, you can connect to http://localhost:5001
|
||||
- You can also run it using the command line `llamacpp-for-kobold.exe [ggml_model.bin] [port]`. For info, please check `llamacpp-for-kobold.exe --help`
|
||||
- You can also run it using the command line `koboldcpp.exe [ggml_model.bin] [port]`. For info, please check `koboldcpp.exe --help`
|
||||
- If you are having crashes or issues with OpenBLAS, please try the `--noblas` flag.
|
||||
|
||||
## OSX and Linux
|
||||
- You will have to compile your binaries from source. A makefile is provided, simply run `make`
|
||||
- If you want you can also link your own install of OpenBLAS manually with `make LLAMA_OPENBLAS=1`
|
||||
- After all binaries are built, you can run the python script with the command `llamacpp_for_kobold.py [ggml_model.bin] [port]`
|
||||
- After all binaries are built, you can run the python script with the command `koboldcpp.py [ggml_model.bin] [port]`
|
||||
|
||||
## Considerations
|
||||
- Don't want to use pybind11 due to dependencies on MSVCC
|
||||
|
@ -33,7 +33,7 @@ Now has **BACKWARDS COMPATIBILITY** with ALL 3 versions of GGML LLAMA models, al
|
|||
## License
|
||||
- The original GGML library and llama.cpp by ggerganov are licensed under the MIT License
|
||||
- However, Kobold Lite is licensed under the AGPL v3.0 License
|
||||
- The provided python ctypes bindings in llamacpp.dll are also under the AGPL v3.0 License
|
||||
- The provided python ctypes bindings in koboldcpp.dll are also under the AGPL v3.0 License
|
||||
|
||||
## Notes
|
||||
- Generation delay scales linearly with original prompt length. See [this discussion](https://github.com/ggerganov/llama.cpp/discussions/229). If OpenBLAS is enabled then prompt ingestion becomes about 2-3x faster. This is automatic on windows, but will require linking on OSX and Linux.
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
<!--
|
||||
An embedded version of Kobold Lite for use in llamacpp-for-kobold
|
||||
An embedded version of Kobold Lite for use in koboldcpp
|
||||
Current version: 17
|
||||
Kobold Lite is under the AGPL v3.0 License for the purposes of llamacpp-for-kobold. Please do not remove this line.
|
||||
Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp. Please do not remove this line.
|
||||
- Concedo
|
||||
-->
|
||||
|
||||
|
|
Binary file not shown.
|
@ -39,13 +39,13 @@ def init_library():
|
|||
global handle, use_blas
|
||||
libname = ""
|
||||
if use_blas:
|
||||
libname = "llamacpp_blas.dll"
|
||||
libname = "koboldcpp_blas.dll"
|
||||
else:
|
||||
libname = "llamacpp.dll"
|
||||
libname = "koboldcpp.dll"
|
||||
|
||||
print("Initializing dynamic library: " + libname)
|
||||
dir_path = os.path.dirname(os.path.realpath(__file__))
|
||||
|
||||
|
||||
#OpenBLAS should provide about a 2x speedup on prompt ingestion if compatible.
|
||||
handle = ctypes.CDLL(os.path.join(dir_path, libname ))
|
||||
|
||||
|
@ -86,7 +86,7 @@ def generate(prompt,max_length=20, max_context_length=512,temperature=0.8,top_k=
|
|||
### A hacky simple HTTP server simulating a kobold api by Concedo
|
||||
### we are intentionally NOT using flask, because we want MINIMAL dependencies
|
||||
#################################################################
|
||||
friendlymodelname = "concedo/llamacpp" # local kobold api apparently needs a hardcoded known HF model name
|
||||
friendlymodelname = "concedo/koboldcpp" # local kobold api apparently needs a hardcoded known HF model name
|
||||
maxctx = 2048
|
||||
maxlen = 128
|
||||
modelbusy = False
|
||||
|
@ -292,8 +292,8 @@ def RunServerMultiThreaded(addr, port, embedded_kailite = None):
|
|||
|
||||
def main(args):
|
||||
global use_blas
|
||||
if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll")) or not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "llamacpp_blas.dll")):
|
||||
print("Warning: libopenblas.dll or llamacpp_blas.dll not found. Non-BLAS library will be used. Ignore this if you have manually linked with OpenBLAS.")
|
||||
if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll")) or not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "koboldcpp_blas.dll")):
|
||||
print("Warning: libopenblas.dll or koboldcpp_blas.dll not found. Non-BLAS library will be used. Ignore this if you have manually linked with OpenBLAS.")
|
||||
use_blas = False
|
||||
elif os.name != 'nt':
|
||||
print("Prebuilt OpenBLAS binaries only available for windows. Please manually build/link libopenblas from makefile with LLAMA_OPENBLAS=1")
|
Binary file not shown.
|
@ -1 +1 @@
|
|||
pyinstaller --noconfirm --onefile --clean --console --icon "./niko.ico" --add-data "./klite.embd;." --add-data "./llamacpp.dll;." --add-data "./llamacpp_blas.dll;." --add-data "./libopenblas.dll;." "./llamacpp_for_kobold.py" -n "llamacpp-for-kobold.exe"
|
||||
pyinstaller --noconfirm --onefile --clean --console --icon "./niko.ico" --add-data "./klite.embd;." --add-data "./koboldcpp.dll;." --add-data "./koboldcpp_blas.dll;." --add-data "./libopenblas.dll;." "./koboldcpp.py" -n "koboldcpp.exe"
|
BIN
niko.ico
BIN
niko.ico
Binary file not shown.
Before Width: | Height: | Size: 15 KiB After Width: | Height: | Size: 15 KiB |
Loading…
Add table
Add a link
Reference in a new issue