diff --git a/koboldcpp.py b/koboldcpp.py
index a8f37b857..812141d22 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -486,7 +486,7 @@ if __name__ == '__main__':
     default_threads = (physical_core_limit if physical_core_limit<=3 else max(3,physical_core_limit-1))
     parser.add_argument("--threads", help="Use a custom number of threads if specified. Otherwise, uses an amount based on CPU cores", type=int, default=default_threads)
     parser.add_argument("--psutil_set_threads", help="Experimental flag. If set, uses psutils to determine thread count based on physical cores.", action='store_true')
-    parser.add_argument("--blasbatchsize", help="Sets the batch size used in BLAS processing (default 512)", type=int,choices=[64,128,256,512,1024], default=512)
+    parser.add_argument("--blasbatchsize", help="Sets the batch size used in BLAS processing (default 512)", type=int,choices=[32,64,128,256,512,1024], default=512)
     parser.add_argument("--stream", help="Uses pseudo streaming", action='store_true')
     parser.add_argument("--smartcontext", help="Reserving a portion of context to try processing less frequently.", action='store_true')
     parser.add_argument("--nommap", help="If set, do not use mmap to load newer models", action='store_true')
diff --git a/otherarch/gpt2_v2.cpp b/otherarch/gpt2_v2.cpp
index 8accd8fb7..6a9cf5205 100644
--- a/otherarch/gpt2_v2.cpp
+++ b/otherarch/gpt2_v2.cpp
@@ -371,7 +371,7 @@ bool gpt2_eval(
     const int n_vocab = hparams.n_vocab;
 
     //todo: there is a bug that causes the buffer to oom and I cannot figure it out, hack to increase size for now  
-    static size_t buf_size = 1600u*1024*1024;
+    static size_t buf_size = 512u*1024*1024;
     static void * buf = malloc(buf_size);
 
     if (mem_per_token > 0 && mem_per_token*N*1.6 > buf_size) {
diff --git a/otherarch/gptj_v2.cpp b/otherarch/gptj_v2.cpp
index 7aa1b65ab..06752ff9a 100644
--- a/otherarch/gptj_v2.cpp
+++ b/otherarch/gptj_v2.cpp
@@ -382,7 +382,7 @@ bool gptj_eval(
     const int d_key = n_embd/n_head;
 
     //todo: there is a bug that causes the buffer to oom and I cannot figure it out, hack to increase size for now  
-    static size_t buf_size = 1600u*1024*1024;
+    static size_t buf_size = 512u*1024*1024;
     static void * buf = malloc(buf_size);
 
     if (mem_per_token > 0 && mem_per_token*N*1.4 > buf_size) {