diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 96f51a04c..856ebf1c1 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -4332,7 +4332,7 @@ static void * ggml_cuda_pool_malloc(size_t size, size_t * actual_size) { } void * ptr; - size_t look_ahead_size = (size_t) (1.02 * size); + size_t look_ahead_size = (size_t) (1.05 * size); look_ahead_size = 256 * ((look_ahead_size + 255)/256); CUDA_CHECK(cudaMalloc((void **) &ptr, look_ahead_size)); *actual_size = look_ahead_size; diff --git a/koboldcpp.py b/koboldcpp.py index 34a619cd7..40441cffa 100755 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -303,7 +303,7 @@ maxhordectx = 1024 maxhordelen = 256 modelbusy = threading.Lock() defaultport = 5001 -KcppVersion = "1.39.1" +KcppVersion = "1.39.2" showdebug = True showsamplerwarning = True showmaxctxwarning = True