fixed segfault with clblast by reversing commit in issue https://github.com/ggerganov/llama.cpp/issues/4296

2023-12-03 00:56:00 +08:00 · 2023-12-03 00:56:00 +08:00 · c142c5634a
commit c142c5634a
parent a8e66ef31c
2 changed files with 7 additions and 14 deletions
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@ -99,7 +99,7 @@ static std::mutex concat_output_mtx;
 static std::string concat_output = "";
 static std::string concat_output_reader_copy = "";

-const size_t extra_context_handle_fragmentation = 80;
+const int extra_context_handle_fragmentation = 80;

 inline bool IsNanCheck(float f)
 {
@ -888,6 +888,11 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
        llama_model_params model_params = llama_model_default_params();
        llama_context_params llama_ctx_params = llama_context_default_params();
        llama_ctx_params.n_ctx = clamped_max_context_length;
+        if(useContextShift)
+        {
+           llama_ctx_params.n_ctx += extra_context_handle_fragmentation;
+        }
+
        //llama_ctx_paran_parts = -1;
        llama_ctx_params.seed = -1;
        llama_ctx_params.f16_kv = inputs.f16_kv;
@ -1447,18 +1452,6 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
    params.n_threads_batch = n_blasthreads;
    bool stream_sse = inputs.stream_sse;

-    if(params.n_ctx >= 256 && useContextShift && (file_format == FileFormat::GGUF_LLAMA || file_format==FileFormat::GGUF_FALCON))
-    {
-        if(params.n_ctx + extra_context_handle_fragmentation >= max_context_limit_at_load)
-        {
-            params.n_ctx -= extra_context_handle_fragmentation; //add some additional buffer to handle KV fragmentation
-            if(debugmode==1)
-            {
-                printf("\nTrue max context permitted: %d\n",params.n_ctx);
-            }
-        }
-    }
-
    bool allow_regular_prints = (debugmode!=-1 && !inputs.quiet) || debugmode >= 1;

    generation_finished = false; // Set current generation status
--- a/koboldcpp.py
+++ b/koboldcpp.py
@ -392,7 +392,7 @@ maxhordelen = 256
 modelbusy = threading.Lock()
 requestsinqueue = 0
 defaultport = 5001
-KcppVersion = "1.51"
+KcppVersion = "1.51.1"
 showdebug = True
 showsamplerwarning = True
 showmaxctxwarning = True