diff --git a/ggml.h b/ggml.h index 1b26da3ad..780aab464 100644 --- a/ggml.h +++ b/ggml.h @@ -194,7 +194,7 @@ #define GGML_QNT_VERSION_FACTOR 1000 // do not change this #define GGML_MAX_DIMS 4 -#define GGML_MAX_NODES 4096 +#define GGML_MAX_NODES 8192 #define GGML_MAX_PARAMS 256 #define GGML_MAX_CONTEXTS 64 #define GGML_MAX_OPT 4 diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 5ff9d0451..8307e7ec5 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -479,7 +479,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in } else { - n_batch = 10; //use sequence mode to speedup + n_batch = 8; //use sequence mode to speedup //setup buffers for rwkv state auto padding = 512u; diff --git a/koboldcpp.py b/koboldcpp.py index aeba36434..4f65d5b3e 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -224,7 +224,7 @@ maxctx = 2048 maxlen = 256 modelbusy = False defaultport = 5001 -KcppVersion = "1.30" +KcppVersion = "1.30.1" class ServerRequestHandler(http.server.SimpleHTTPRequestHandler): sys_version = ""