From c142c5634ac2b41d2e5eb07724765abc1467313a Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sun, 3 Dec 2023 00:56:00 +0800 Subject: [PATCH] fixed segfault with clblast by reversing commit in issue https://github.com/ggerganov/llama.cpp/issues/4296 --- gpttype_adapter.cpp | 19 ++++++------------- koboldcpp.py | 2 +- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index e1830c733..fb12bb4f6 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -99,7 +99,7 @@ static std::mutex concat_output_mtx; static std::string concat_output = ""; static std::string concat_output_reader_copy = ""; -const size_t extra_context_handle_fragmentation = 80; +const int extra_context_handle_fragmentation = 80; inline bool IsNanCheck(float f) { @@ -888,6 +888,11 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in llama_model_params model_params = llama_model_default_params(); llama_context_params llama_ctx_params = llama_context_default_params(); llama_ctx_params.n_ctx = clamped_max_context_length; + if(useContextShift) + { + llama_ctx_params.n_ctx += extra_context_handle_fragmentation; + } + //llama_ctx_paran_parts = -1; llama_ctx_params.seed = -1; llama_ctx_params.f16_kv = inputs.f16_kv; @@ -1447,18 +1452,6 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o params.n_threads_batch = n_blasthreads; bool stream_sse = inputs.stream_sse; - if(params.n_ctx >= 256 && useContextShift && (file_format == FileFormat::GGUF_LLAMA || file_format==FileFormat::GGUF_FALCON)) - { - if(params.n_ctx + extra_context_handle_fragmentation >= max_context_limit_at_load) - { - params.n_ctx -= extra_context_handle_fragmentation; //add some additional buffer to handle KV fragmentation - if(debugmode==1) - { - printf("\nTrue max context permitted: %d\n",params.n_ctx); - } - } - } - bool allow_regular_prints = (debugmode!=-1 && !inputs.quiet) || debugmode >= 1; generation_finished = false; // Set current generation status diff --git a/koboldcpp.py b/koboldcpp.py index e2843a4e3..2471103cc 100755 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -392,7 +392,7 @@ maxhordelen = 256 modelbusy = threading.Lock() requestsinqueue = 0 defaultport = 5001 -KcppVersion = "1.51" +KcppVersion = "1.51.1" showdebug = True showsamplerwarning = True showmaxctxwarning = True