Merge branch 'master' into concedo
# Conflicts: # README.md
This commit is contained in:
commit
bb965cc120
6 changed files with 33 additions and 95 deletions
91
Makefile
91
Makefile
|
@ -70,95 +70,8 @@ endif
|
||||||
# TODO: probably these flags need to be tweaked on some architectures
|
# TODO: probably these flags need to be tweaked on some architectures
|
||||||
# feel free to update the Makefile for your architecture and send a pull request or issue
|
# feel free to update the Makefile for your architecture and send a pull request or issue
|
||||||
ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686))
|
ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686))
|
||||||
ifeq ($(UNAME_S),Darwin)
|
# Use all CPU extensions that are available:
|
||||||
F16C_M := $(shell sysctl machdep.cpu.features)
|
CFLAGS += -march=native -mtune=native
|
||||||
ifneq (,$(findstring F16C,$(F16C_M)))
|
|
||||||
CFLAGS += -mf16c
|
|
||||||
endif
|
|
||||||
AVX1_M := $(shell sysctl machdep.cpu.features)
|
|
||||||
ifneq (,$(findstring FMA,$(AVX1_M)))
|
|
||||||
CFLAGS += -mfma
|
|
||||||
endif
|
|
||||||
ifneq (,$(findstring AVX1.0,$(AVX1_M)))
|
|
||||||
CFLAGS += -mavx
|
|
||||||
endif
|
|
||||||
AVX2_M := $(shell sysctl machdep.cpu.leaf7_features)
|
|
||||||
ifneq (,$(findstring AVX2,$(AVX2_M)))
|
|
||||||
CFLAGS += -mavx2
|
|
||||||
endif
|
|
||||||
else ifeq ($(UNAME_S),Linux)
|
|
||||||
AVX1_M := $(shell grep "avx " /proc/cpuinfo)
|
|
||||||
ifneq (,$(findstring avx,$(AVX1_M)))
|
|
||||||
CFLAGS += -mavx
|
|
||||||
endif
|
|
||||||
AVX2_M := $(shell grep "avx2 " /proc/cpuinfo)
|
|
||||||
ifneq (,$(findstring avx2,$(AVX2_M)))
|
|
||||||
CFLAGS += -mavx2
|
|
||||||
endif
|
|
||||||
FMA_M := $(shell grep "fma " /proc/cpuinfo)
|
|
||||||
ifneq (,$(findstring fma,$(FMA_M)))
|
|
||||||
CFLAGS += -mfma
|
|
||||||
endif
|
|
||||||
F16C_M := $(shell grep "f16c " /proc/cpuinfo)
|
|
||||||
ifneq (,$(findstring f16c,$(F16C_M)))
|
|
||||||
CFLAGS += -mf16c
|
|
||||||
endif
|
|
||||||
SSE3_M := $(shell grep "sse3 " /proc/cpuinfo)
|
|
||||||
ifneq (,$(findstring sse3,$(SSE3_M)))
|
|
||||||
CFLAGS += -msse3
|
|
||||||
endif
|
|
||||||
AVX512F_M := $(shell grep "avx512f " /proc/cpuinfo)
|
|
||||||
ifneq (,$(findstring avx512f,$(AVX512F_M)))
|
|
||||||
CFLAGS += -mavx512f
|
|
||||||
endif
|
|
||||||
AVX512BW_M := $(shell grep "avx512bw " /proc/cpuinfo)
|
|
||||||
ifneq (,$(findstring avx512bw,$(AVX512BW_M)))
|
|
||||||
CFLAGS += -mavx512bw
|
|
||||||
endif
|
|
||||||
AVX512DQ_M := $(shell grep "avx512dq " /proc/cpuinfo)
|
|
||||||
ifneq (,$(findstring avx512dq,$(AVX512DQ_M)))
|
|
||||||
CFLAGS += -mavx512dq
|
|
||||||
endif
|
|
||||||
AVX512VL_M := $(shell grep "avx512vl " /proc/cpuinfo)
|
|
||||||
ifneq (,$(findstring avx512vl,$(AVX512VL_M)))
|
|
||||||
CFLAGS += -mavx512vl
|
|
||||||
endif
|
|
||||||
AVX512CD_M := $(shell grep "avx512cd " /proc/cpuinfo)
|
|
||||||
ifneq (,$(findstring avx512cd,$(AVX512CD_M)))
|
|
||||||
CFLAGS += -mavx512cd
|
|
||||||
endif
|
|
||||||
AVX512ER_M := $(shell grep "avx512er " /proc/cpuinfo)
|
|
||||||
ifneq (,$(findstring avx512er,$(AVX512ER_M)))
|
|
||||||
CFLAGS += -mavx512er
|
|
||||||
endif
|
|
||||||
AVX512IFMA_M := $(shell grep "avx512ifma " /proc/cpuinfo)
|
|
||||||
ifneq (,$(findstring avx512ifma,$(AVX512IFMA_M)))
|
|
||||||
CFLAGS += -mavx512ifma
|
|
||||||
endif
|
|
||||||
AVX512PF_M := $(shell grep "avx512pf " /proc/cpuinfo)
|
|
||||||
ifneq (,$(findstring avx512pf,$(AVX512PF_M)))
|
|
||||||
CFLAGS += -mavx512pf
|
|
||||||
endif
|
|
||||||
else ifeq ($(UNAME_S),Haiku)
|
|
||||||
AVX1_M := $(shell sysinfo -cpu | grep -w "AVX")
|
|
||||||
ifneq (,$(findstring AVX,$(AVX1_M)))
|
|
||||||
CFLAGS += -mavx
|
|
||||||
endif
|
|
||||||
AVX2_M := $(shell sysinfo -cpu | grep -w "AVX2")
|
|
||||||
ifneq (,$(findstring AVX2,$(AVX2_M)))
|
|
||||||
CFLAGS += -mavx2
|
|
||||||
endif
|
|
||||||
FMA_M := $(shell sysinfo -cpu | grep -w "FMA")
|
|
||||||
ifneq (,$(findstring FMA,$(FMA_M)))
|
|
||||||
CFLAGS += -mfma
|
|
||||||
endif
|
|
||||||
F16C_M := $(shell sysinfo -cpu | grep -w "F16C")
|
|
||||||
ifneq (,$(findstring F16C,$(F16C_M)))
|
|
||||||
CFLAGS += -mf16c
|
|
||||||
endif
|
|
||||||
else
|
|
||||||
CFLAGS += -mfma -mf16c -mavx -mavx2
|
|
||||||
endif
|
|
||||||
endif
|
endif
|
||||||
ifneq ($(filter ppc64%,$(UNAME_M)),)
|
ifneq ($(filter ppc64%,$(UNAME_M)),)
|
||||||
POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
|
POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
|
||||||
|
|
|
@ -6,6 +6,8 @@ What does it mean? You get llama.cpp with a fancy UI, persistent stories, editin
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
|
Now has **BACKWARDS COMPATIBILITY** with ALL 3 versions of GGML LLAMA models, all ALPACA.CPP models, all versions of GPT4ALL.CPP models, and can run GGML older GPT-J.CPP models too. I also use a bunch of tricks to make subsequent prompt processing with shared ancestry much faster than the parent repo does.
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
- [Download the latest release here](https://github.com/LostRuins/llamacpp-for-kobold/releases/latest) or clone the repo.
|
- [Download the latest release here](https://github.com/LostRuins/llamacpp-for-kobold/releases/latest) or clone the repo.
|
||||||
- Windows binaries are provided in the form of **llamacpp-for-kobold.exe**, which is a pyinstaller wrapper for **llamacpp.dll** and **llamacpp_for_kobold.py**. If you feel concerned, you may prefer to rebuild it yourself with the provided makefiles and scripts.
|
- Windows binaries are provided in the form of **llamacpp-for-kobold.exe**, which is a pyinstaller wrapper for **llamacpp.dll** and **llamacpp_for_kobold.py**. If you feel concerned, you may prefer to rebuild it yourself with the provided makefiles and scripts.
|
||||||
|
@ -17,6 +19,7 @@ What does it mean? You get llama.cpp with a fancy UI, persistent stories, editin
|
||||||
|
|
||||||
## OSX and Linux
|
## OSX and Linux
|
||||||
- You will have to compile your binaries from source. A makefile is provided, simply run `make`
|
- You will have to compile your binaries from source. A makefile is provided, simply run `make`
|
||||||
|
- If you want you can also link your own install of OpenBLAS manually with `make LLAMA_OPENBLAS=1`
|
||||||
- After all binaries are built, you can run the python script with the command `llamacpp_for_kobold.py [ggml_model.bin] [port]`
|
- After all binaries are built, you can run the python script with the command `llamacpp_for_kobold.py [ggml_model.bin] [port]`
|
||||||
|
|
||||||
## Considerations
|
## Considerations
|
||||||
|
@ -24,7 +27,7 @@ What does it mean? You get llama.cpp with a fancy UI, persistent stories, editin
|
||||||
- ZERO or MINIMAL changes as possible to parent repo files - do not move their function declarations elsewhere! We want to be able to update the repo and pull any changes automatically.
|
- ZERO or MINIMAL changes as possible to parent repo files - do not move their function declarations elsewhere! We want to be able to update the repo and pull any changes automatically.
|
||||||
- No dynamic memory allocation! Setup structs with FIXED (known) shapes and sizes for ALL output fields. Python will ALWAYS provide the memory, we just write to it.
|
- No dynamic memory allocation! Setup structs with FIXED (known) shapes and sizes for ALL output fields. Python will ALWAYS provide the memory, we just write to it.
|
||||||
- No external libraries or dependencies. That means no Flask, Pybind and whatever. All You Need Is Python.
|
- No external libraries or dependencies. That means no Flask, Pybind and whatever. All You Need Is Python.
|
||||||
- Since v1.0.6, requires libopenblas, the prebuilt windows binaries are included in this repo. If not found, it will fall back to a mode without BLAS. If you want you can also link your own install of OpenBLAS manually with `LLAMA_OPENBLAS=1`
|
- Since v1.0.6, requires libopenblas, the prebuilt windows binaries are included in this repo. If not found, it will fall back to a mode without BLAS.
|
||||||
- **I plan to keep backwards compatibility with ALL past llama.cpp AND alpaca.cpp models**. But you are also encouraged to reconvert/update your models if possible for best results.
|
- **I plan to keep backwards compatibility with ALL past llama.cpp AND alpaca.cpp models**. But you are also encouraged to reconvert/update your models if possible for best results.
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
|
@ -254,7 +254,7 @@ def main():
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--hf",
|
"--hf",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="Whether to save the model in the huggingface format. (default: False)",
|
help="Whether to save the model in the Hugging Face format. (default: False)",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--chat", "-c", action="store_true", help="Whether to open a chat with the model. (default: False)"
|
"--chat", "-c", action="store_true", help="Whether to open a chat with the model. (default: False)"
|
||||||
|
|
|
@ -39,6 +39,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
||||||
|
|
||||||
bool invalid_param = false;
|
bool invalid_param = false;
|
||||||
std::string arg;
|
std::string arg;
|
||||||
|
gpt_params default_params;
|
||||||
|
|
||||||
for (int i = 1; i < argc; i++) {
|
for (int i = 1; i < argc; i++) {
|
||||||
arg = argv[i];
|
arg = argv[i];
|
||||||
|
|
||||||
|
@ -66,6 +68,11 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
std::ifstream file(argv[i]);
|
std::ifstream file(argv[i]);
|
||||||
|
if (!file) {
|
||||||
|
fprintf(stderr, "error: failed to open file '%s'\n", argv[i]);
|
||||||
|
invalid_param = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
std::copy(std::istreambuf_iterator<char>(file), std::istreambuf_iterator<char>(), back_inserter(params.prompt));
|
std::copy(std::istreambuf_iterator<char>(file), std::istreambuf_iterator<char>(), back_inserter(params.prompt));
|
||||||
if (params.prompt.back() == '\n') {
|
if (params.prompt.back() == '\n') {
|
||||||
params.prompt.pop_back();
|
params.prompt.pop_back();
|
||||||
|
@ -168,7 +175,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
||||||
}
|
}
|
||||||
params.n_parts = std::stoi(argv[i]);
|
params.n_parts = std::stoi(argv[i]);
|
||||||
} else if (arg == "-h" || arg == "--help") {
|
} else if (arg == "-h" || arg == "--help") {
|
||||||
gpt_print_usage(argc, argv, params);
|
gpt_print_usage(argc, argv, default_params);
|
||||||
exit(0);
|
exit(0);
|
||||||
} else if (arg == "--random-prompt") {
|
} else if (arg == "--random-prompt") {
|
||||||
params.random_prompt = true;
|
params.random_prompt = true;
|
||||||
|
@ -180,13 +187,13 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
||||||
params.input_prefix = argv[i];
|
params.input_prefix = argv[i];
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
||||||
gpt_print_usage(argc, argv, params);
|
gpt_print_usage(argc, argv, default_params);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (invalid_param) {
|
if (invalid_param) {
|
||||||
fprintf(stderr, "error: invalid parameter for argument: %s\n", arg.c_str());
|
fprintf(stderr, "error: invalid parameter for argument: %s\n", arg.c_str());
|
||||||
gpt_print_usage(argc, argv, params);
|
gpt_print_usage(argc, argv, default_params);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
15
examples/gpt4all.sh
Executable file
15
examples/gpt4all.sh
Executable file
|
@ -0,0 +1,15 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
#
|
||||||
|
# Temporary script - will be removed in the future
|
||||||
|
#
|
||||||
|
|
||||||
|
cd `dirname $0`
|
||||||
|
cd ..
|
||||||
|
|
||||||
|
./main --color --instruct --threads 4 \
|
||||||
|
--model ./models/gpt4all-7B/gpt4all-lora-quantized.bin \
|
||||||
|
--file ./prompts/alpaca.txt \
|
||||||
|
--batch_size 8 --ctx_size 2048 \
|
||||||
|
--repeat_last_n 64 --repeat_penalty 1.3 \
|
||||||
|
--n_predict 128 --temp 0.1 --top_k 40 --top_p 0.95
|
|
@ -1608,7 +1608,7 @@ struct llama_context * llama_init_from_file(
|
||||||
}
|
}
|
||||||
|
|
||||||
// reserve memory for context buffers
|
// reserve memory for context buffers
|
||||||
{
|
if (!params.vocab_only) {
|
||||||
if (!kv_cache_init(ctx->model.hparams, ctx->model.kv_self, memory_type, ctx->model.hparams.n_ctx)) {
|
if (!kv_cache_init(ctx->model.hparams, ctx->model.kv_self, memory_type, ctx->model.hparams.n_ctx)) {
|
||||||
fprintf(stderr, "%s: kv_cache_init() failed for self-attention cache\n", __func__);
|
fprintf(stderr, "%s: kv_cache_init() failed for self-attention cache\n", __func__);
|
||||||
llama_free(ctx);
|
llama_free(ctx);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue