Merge remote-tracking branch 'temp/concedo' into concedo_experimental

# Conflicts: # koboldcpp.py
2023-05-02 22:43:34 +08:00 · 2023-05-02 22:43:34 +08:00 · 966cd2ce91
commit 966cd2ce91
parent 58f25dce86 069b3d4c37
4 changed files with 63 additions and 49 deletions
--- a/.gitignore
+++ b/.gitignore
@ -47,3 +47,13 @@ zig-cache/
 ppl-*.txt

 examples/jeopardy/results.txt
+koboldcpp.so
+koboldcpp_noavx2.so
+koboldcpp_openblas.so
+koboldcpp_openblas_noavx2.so
+koboldcpp_clblast.so
+koboldcpp.dll
+koboldcpp_noavx2.dll
+koboldcpp_openblas.dll
+koboldcpp_openblas_noavx2.dll
+koboldcpp_clblast.dll
--- a/expose.h
+++ b/expose.h
@ -11,6 +11,7 @@ struct load_model_inputs
    const char * model_filename;
    const char * lora_filename;
    const bool use_mmap;
+    const bool use_mlock;
    const bool use_smartcontext;
    const bool unban_tokens;
    const int clblast_info = 0;
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@ -158,7 +158,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
        llama_ctx_params.f16_kv = inputs.f16_kv;
        llama_ctx_params.logits_all = false;
        llama_ctx_params.use_mmap = inputs.use_mmap;
-        llama_ctx_params.use_mlock = false;
+        llama_ctx_params.use_mlock = inputs.use_mlock;
        
        llama_ctx_v1 = llama_init_from_file(modelname.c_str(), llama_ctx_params);
        
--- a/koboldcpp.py
+++ b/koboldcpp.py
@ -18,6 +18,7 @@ class load_model_inputs(ctypes.Structure):
                ("model_filename", ctypes.c_char_p),
                ("lora_filename", ctypes.c_char_p),
                ("use_mmap", ctypes.c_bool),
+                ("use_mlock", ctypes.c_bool),
                ("use_smartcontext", ctypes.c_bool),
                ("unban_tokens", ctypes.c_bool),
                ("clblast_info", ctypes.c_int),
@ -134,6 +135,7 @@ def load_model(model_filename):
    inputs.threads = args.threads
    inputs.f16_kv = True
    inputs.use_mmap = (not args.nommap)
+    inputs.use_mlock = args.usemlock
    if args.lora and args.lora!="":
        inputs.use_mmap = False
    inputs.use_smartcontext = args.smartcontext
@ -608,6 +610,7 @@ if __name__ == '__main__':
    parser.add_argument("--smartcontext", help="Reserving a portion of context to try processing less frequently.", action='store_true')
    parser.add_argument("--unbantokens", help="Normally, KoboldAI prevents certain tokens such as EOS and Square Brackets. This flag unbans them.", action='store_true')
    parser.add_argument("--nommap", help="If set, do not use mmap to load newer models", action='store_true')
+    parser.add_argument("--usemlock", help="For Apple Systems. Force system to keep model in RAM rather than swapping or compressing", action='store_true')
    parser.add_argument("--noavx2", help="Do not use AVX2 instructions, a slower compatibility mode for older devices. Does not work with --clblast.", action='store_true')
    parser.add_argument("--debugmode", help="Shows additional debug info in the terminal.", action='store_true')
    parser.add_argument("--skiplauncher", help="Doesn't display or use the new GUI launcher.", action='store_true')