adjusted down the buf memory allocation now that realloc seems to work

2023-04-20 17:51:13 +08:00 · 2023-04-20 17:51:13 +08:00 · 49697d86d8
commit 49697d86d8
parent 4605074245
3 changed files with 3 additions and 3 deletions
--- a/koboldcpp.py
+++ b/koboldcpp.py
@ -486,7 +486,7 @@ if __name__ == '__main__':
    default_threads = (physical_core_limit if physical_core_limit<=3 else max(3,physical_core_limit-1))
    parser.add_argument("--threads", help="Use a custom number of threads if specified. Otherwise, uses an amount based on CPU cores", type=int, default=default_threads)
    parser.add_argument("--psutil_set_threads", help="Experimental flag. If set, uses psutils to determine thread count based on physical cores.", action='store_true')
-    parser.add_argument("--blasbatchsize", help="Sets the batch size used in BLAS processing (default 512)", type=int,choices=[64,128,256,512,1024], default=512)
+    parser.add_argument("--blasbatchsize", help="Sets the batch size used in BLAS processing (default 512)", type=int,choices=[32,64,128,256,512,1024], default=512)
    parser.add_argument("--stream", help="Uses pseudo streaming", action='store_true')
    parser.add_argument("--smartcontext", help="Reserving a portion of context to try processing less frequently.", action='store_true')
    parser.add_argument("--nommap", help="If set, do not use mmap to load newer models", action='store_true')
--- a/otherarch/gpt2_v2.cpp
+++ b/otherarch/gpt2_v2.cpp
@ -371,7 +371,7 @@ bool gpt2_eval(
    const int n_vocab = hparams.n_vocab;
    //todo: there is a bug that causes the buffer to oom and I cannot figure it out, hack to increase size for now  
-    static size_t buf_size = 1600u*1024*1024;
+    static size_t buf_size = 512u*1024*1024;
    static void * buf = malloc(buf_size);
    if (mem_per_token > 0 && mem_per_token*N*1.6 > buf_size) {
--- a/otherarch/gptj_v2.cpp
+++ b/otherarch/gptj_v2.cpp
@ -382,7 +382,7 @@ bool gptj_eval(
    const int d_key = n_embd/n_head;
    //todo: there is a bug that causes the buffer to oom and I cannot figure it out, hack to increase size for now  
-    static size_t buf_size = 1600u*1024*1024;
+    static size_t buf_size = 512u*1024*1024;
    static void * buf = malloc(buf_size);
    if (mem_per_token > 0 && mem_per_token*N*1.4 > buf_size) {