Merge remote-tracking branch 'temp/concedo' into concedo_experimental

# Conflicts:
#	koboldcpp.py
This commit is contained in:
Concedo 2023-05-02 22:43:34 +08:00
commit 966cd2ce91
4 changed files with 63 additions and 49 deletions

10
.gitignore vendored
View file

@ -47,3 +47,13 @@ zig-cache/
ppl-*.txt
examples/jeopardy/results.txt
koboldcpp.so
koboldcpp_noavx2.so
koboldcpp_openblas.so
koboldcpp_openblas_noavx2.so
koboldcpp_clblast.so
koboldcpp.dll
koboldcpp_noavx2.dll
koboldcpp_openblas.dll
koboldcpp_openblas_noavx2.dll
koboldcpp_clblast.dll

View file

@ -11,6 +11,7 @@ struct load_model_inputs
const char * model_filename;
const char * lora_filename;
const bool use_mmap;
const bool use_mlock;
const bool use_smartcontext;
const bool unban_tokens;
const int clblast_info = 0;

View file

@ -158,7 +158,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
llama_ctx_params.f16_kv = inputs.f16_kv;
llama_ctx_params.logits_all = false;
llama_ctx_params.use_mmap = inputs.use_mmap;
llama_ctx_params.use_mlock = false;
llama_ctx_params.use_mlock = inputs.use_mlock;
llama_ctx_v1 = llama_init_from_file(modelname.c_str(), llama_ctx_params);

View file

@ -18,6 +18,7 @@ class load_model_inputs(ctypes.Structure):
("model_filename", ctypes.c_char_p),
("lora_filename", ctypes.c_char_p),
("use_mmap", ctypes.c_bool),
("use_mlock", ctypes.c_bool),
("use_smartcontext", ctypes.c_bool),
("unban_tokens", ctypes.c_bool),
("clblast_info", ctypes.c_int),
@ -134,6 +135,7 @@ def load_model(model_filename):
inputs.threads = args.threads
inputs.f16_kv = True
inputs.use_mmap = (not args.nommap)
inputs.use_mlock = args.usemlock
if args.lora and args.lora!="":
inputs.use_mmap = False
inputs.use_smartcontext = args.smartcontext
@ -608,6 +610,7 @@ if __name__ == '__main__':
parser.add_argument("--smartcontext", help="Reserving a portion of context to try processing less frequently.", action='store_true')
parser.add_argument("--unbantokens", help="Normally, KoboldAI prevents certain tokens such as EOS and Square Brackets. This flag unbans them.", action='store_true')
parser.add_argument("--nommap", help="If set, do not use mmap to load newer models", action='store_true')
parser.add_argument("--usemlock", help="For Apple Systems. Force system to keep model in RAM rather than swapping or compressing", action='store_true')
parser.add_argument("--noavx2", help="Do not use AVX2 instructions, a slower compatibility mode for older devices. Does not work with --clblast.", action='store_true')
parser.add_argument("--debugmode", help="Shows additional debug info in the terminal.", action='store_true')
parser.add_argument("--skiplauncher", help="Doesn't display or use the new GUI launcher.", action='store_true')