diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 46ac296be..1df0c578e 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -4563,7 +4563,6 @@ struct cuda_buffer { static cuda_buffer g_cuda_buffer_pool[GGML_CUDA_MAX_DEVICES][MAX_CUDA_BUFFERS]; static std::atomic_flag g_cuda_pool_lock = ATOMIC_FLAG_INIT; -static bool g_mul_mat_q = false; static void * ggml_cuda_pool_malloc(size_t size, size_t * actual_size) { scoped_spin_lock lock(g_cuda_pool_lock); diff --git a/koboldcpp.py b/koboldcpp.py index c4ce973fc..ab3ce85ec 100755 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -303,7 +303,7 @@ maxhordectx = 1024 maxhordelen = 256 modelbusy = threading.Lock() defaultport = 5001 -KcppVersion = "1.40" +KcppVersion = "1.40.1" showdebug = True showsamplerwarning = True showmaxctxwarning = True