diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 7d78d6910..7cf1824f9 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -435,11 +435,11 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in { //approximate NTK aware ctx auto effectivenctx = params.n_ctx; - if((file_format == FileFormat::GGUF_LLAMA || file_format==FileFormat::GGUF_FALCON) && llama_ctx_v4->model.hparams.n_ctx_train>2048) - { - float factor = llama_ctx_v4->model.hparams.n_ctx_train/2048; - effectivenctx = effectivenctx/factor; - } + // if((file_format == FileFormat::GGUF_LLAMA || file_format==FileFormat::GGUF_FALCON) && llama_ctx_v4->model.hparams.n_ctx_train>2048) + // { + // float factor = llama_ctx_v4->model.hparams.n_ctx_train/2048; + // effectivenctx = effectivenctx/factor; + // } rope_freq_base = (effectivenctx <= 3072 ? 26000.0f : (effectivenctx <= 4096 ? 32000.0f : (effectivenctx <= 6144 ? 54000.0f : (effectivenctx <= 8192 ? 82684.0f : (effectivenctx <= 12288 ? 140000.0f : 200000.0f))))); } diff --git a/koboldcpp.py b/koboldcpp.py index 481a20e49..403fe44d3 100755 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -324,7 +324,7 @@ maxhordectx = 1024 maxhordelen = 256 modelbusy = threading.Lock() defaultport = 5001 -KcppVersion = "1.42" +KcppVersion = "1.42.1" showdebug = True showsamplerwarning = True showmaxctxwarning = True