Merge remote-tracking branch 'temp/concedo' into concedo_experimental
# Conflicts: # koboldcpp.py
This commit is contained in:
commit
966cd2ce91
4 changed files with 63 additions and 49 deletions
10
.gitignore
vendored
10
.gitignore
vendored
|
@ -47,3 +47,13 @@ zig-cache/
|
|||
ppl-*.txt
|
||||
|
||||
examples/jeopardy/results.txt
|
||||
koboldcpp.so
|
||||
koboldcpp_noavx2.so
|
||||
koboldcpp_openblas.so
|
||||
koboldcpp_openblas_noavx2.so
|
||||
koboldcpp_clblast.so
|
||||
koboldcpp.dll
|
||||
koboldcpp_noavx2.dll
|
||||
koboldcpp_openblas.dll
|
||||
koboldcpp_openblas_noavx2.dll
|
||||
koboldcpp_clblast.dll
|
1
expose.h
1
expose.h
|
@ -11,6 +11,7 @@ struct load_model_inputs
|
|||
const char * model_filename;
|
||||
const char * lora_filename;
|
||||
const bool use_mmap;
|
||||
const bool use_mlock;
|
||||
const bool use_smartcontext;
|
||||
const bool unban_tokens;
|
||||
const int clblast_info = 0;
|
||||
|
|
|
@ -158,7 +158,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
|||
llama_ctx_params.f16_kv = inputs.f16_kv;
|
||||
llama_ctx_params.logits_all = false;
|
||||
llama_ctx_params.use_mmap = inputs.use_mmap;
|
||||
llama_ctx_params.use_mlock = false;
|
||||
llama_ctx_params.use_mlock = inputs.use_mlock;
|
||||
|
||||
llama_ctx_v1 = llama_init_from_file(modelname.c_str(), llama_ctx_params);
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@ class load_model_inputs(ctypes.Structure):
|
|||
("model_filename", ctypes.c_char_p),
|
||||
("lora_filename", ctypes.c_char_p),
|
||||
("use_mmap", ctypes.c_bool),
|
||||
("use_mlock", ctypes.c_bool),
|
||||
("use_smartcontext", ctypes.c_bool),
|
||||
("unban_tokens", ctypes.c_bool),
|
||||
("clblast_info", ctypes.c_int),
|
||||
|
@ -134,6 +135,7 @@ def load_model(model_filename):
|
|||
inputs.threads = args.threads
|
||||
inputs.f16_kv = True
|
||||
inputs.use_mmap = (not args.nommap)
|
||||
inputs.use_mlock = args.usemlock
|
||||
if args.lora and args.lora!="":
|
||||
inputs.use_mmap = False
|
||||
inputs.use_smartcontext = args.smartcontext
|
||||
|
@ -608,6 +610,7 @@ if __name__ == '__main__':
|
|||
parser.add_argument("--smartcontext", help="Reserving a portion of context to try processing less frequently.", action='store_true')
|
||||
parser.add_argument("--unbantokens", help="Normally, KoboldAI prevents certain tokens such as EOS and Square Brackets. This flag unbans them.", action='store_true')
|
||||
parser.add_argument("--nommap", help="If set, do not use mmap to load newer models", action='store_true')
|
||||
parser.add_argument("--usemlock", help="For Apple Systems. Force system to keep model in RAM rather than swapping or compressing", action='store_true')
|
||||
parser.add_argument("--noavx2", help="Do not use AVX2 instructions, a slower compatibility mode for older devices. Does not work with --clblast.", action='store_true')
|
||||
parser.add_argument("--debugmode", help="Shows additional debug info in the terminal.", action='store_true')
|
||||
parser.add_argument("--skiplauncher", help="Doesn't display or use the new GUI launcher.", action='store_true')
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue