diff --git a/koboldcpp.py b/koboldcpp.py index a8f37b857..812141d22 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -486,7 +486,7 @@ if __name__ == '__main__': default_threads = (physical_core_limit if physical_core_limit<=3 else max(3,physical_core_limit-1)) parser.add_argument("--threads", help="Use a custom number of threads if specified. Otherwise, uses an amount based on CPU cores", type=int, default=default_threads) parser.add_argument("--psutil_set_threads", help="Experimental flag. If set, uses psutils to determine thread count based on physical cores.", action='store_true') - parser.add_argument("--blasbatchsize", help="Sets the batch size used in BLAS processing (default 512)", type=int,choices=[64,128,256,512,1024], default=512) + parser.add_argument("--blasbatchsize", help="Sets the batch size used in BLAS processing (default 512)", type=int,choices=[32,64,128,256,512,1024], default=512) parser.add_argument("--stream", help="Uses pseudo streaming", action='store_true') parser.add_argument("--smartcontext", help="Reserving a portion of context to try processing less frequently.", action='store_true') parser.add_argument("--nommap", help="If set, do not use mmap to load newer models", action='store_true') diff --git a/otherarch/gpt2_v2.cpp b/otherarch/gpt2_v2.cpp index 8accd8fb7..6a9cf5205 100644 --- a/otherarch/gpt2_v2.cpp +++ b/otherarch/gpt2_v2.cpp @@ -371,7 +371,7 @@ bool gpt2_eval( const int n_vocab = hparams.n_vocab; //todo: there is a bug that causes the buffer to oom and I cannot figure it out, hack to increase size for now - static size_t buf_size = 1600u*1024*1024; + static size_t buf_size = 512u*1024*1024; static void * buf = malloc(buf_size); if (mem_per_token > 0 && mem_per_token*N*1.6 > buf_size) { diff --git a/otherarch/gptj_v2.cpp b/otherarch/gptj_v2.cpp index 7aa1b65ab..06752ff9a 100644 --- a/otherarch/gptj_v2.cpp +++ b/otherarch/gptj_v2.cpp @@ -382,7 +382,7 @@ bool gptj_eval( const int d_key = n_embd/n_head; //todo: there is a bug that causes the buffer to oom and I cannot figure it out, hack to increase size for now - static size_t buf_size = 1600u*1024*1024; + static size_t buf_size = 512u*1024*1024; static void * buf = malloc(buf_size); if (mem_per_token > 0 && mem_per_token*N*1.4 > buf_size) {