From c142c5634ac2b41d2e5eb07724765abc1467313a Mon Sep 17 00:00:00 2001
From: Concedo <39025047+LostRuins@users.noreply.github.com>
Date: Sun, 3 Dec 2023 00:56:00 +0800
Subject: [PATCH] fixed segfault with clblast by reversing commit in issue
 https://github.com/ggerganov/llama.cpp/issues/4296

---
 gpttype_adapter.cpp | 19 ++++++-------------
 koboldcpp.py        |  2 +-
 2 files changed, 7 insertions(+), 14 deletions(-)

diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp
index e1830c733..fb12bb4f6 100644
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@@ -99,7 +99,7 @@ static std::mutex concat_output_mtx;
 static std::string concat_output = "";
 static std::string concat_output_reader_copy = "";
 
-const size_t extra_context_handle_fragmentation = 80;
+const int extra_context_handle_fragmentation = 80;
 
 inline bool IsNanCheck(float f)
 {
@@ -888,6 +888,11 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
         llama_model_params model_params = llama_model_default_params();
         llama_context_params llama_ctx_params = llama_context_default_params();
         llama_ctx_params.n_ctx = clamped_max_context_length;
+        if(useContextShift)
+        {
+           llama_ctx_params.n_ctx += extra_context_handle_fragmentation;
+        }
+
         //llama_ctx_paran_parts = -1;
         llama_ctx_params.seed = -1;
         llama_ctx_params.f16_kv = inputs.f16_kv;
@@ -1447,18 +1452,6 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
     params.n_threads_batch = n_blasthreads;
     bool stream_sse = inputs.stream_sse;
 
-    if(params.n_ctx >= 256 && useContextShift && (file_format == FileFormat::GGUF_LLAMA || file_format==FileFormat::GGUF_FALCON))
-    {
-        if(params.n_ctx + extra_context_handle_fragmentation >= max_context_limit_at_load)
-        {
-            params.n_ctx -= extra_context_handle_fragmentation; //add some additional buffer to handle KV fragmentation
-            if(debugmode==1)
-            {
-                printf("\nTrue max context permitted: %d\n",params.n_ctx);
-            }
-        }
-    }
-
     bool allow_regular_prints = (debugmode!=-1 && !inputs.quiet) || debugmode >= 1;
 
     generation_finished = false; // Set current generation status
diff --git a/koboldcpp.py b/koboldcpp.py
index e2843a4e3..2471103cc 100755
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -392,7 +392,7 @@ maxhordelen = 256
 modelbusy = threading.Lock()
 requestsinqueue = 0
 defaultport = 5001
-KcppVersion = "1.51"
+KcppVersion = "1.51.1"
 showdebug = True
 showsamplerwarning = True
 showmaxctxwarning = True