From d41490c27b5743daa98423fd1d85ea99c7f439b1 Mon Sep 17 00:00:00 2001
From: Concedo <39025047+LostRuins@users.noreply.github.com>
Date: Sun, 23 Apr 2023 00:35:42 +0800
Subject: [PATCH] just revert back to the working commit

---
 otherarch/gpt2_v2.cpp | 5 ++---
 otherarch/gptj_v2.cpp | 7 +++----
 otherarch/neox.cpp    | 7 +++----
 3 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/otherarch/gpt2_v2.cpp b/otherarch/gpt2_v2.cpp
index f86750656..2f4c0b0be 100644
--- a/otherarch/gpt2_v2.cpp
+++ b/otherarch/gpt2_v2.cpp
@@ -371,12 +371,11 @@ bool gpt2_eval(
     const int n_vocab = hparams.n_vocab;
 
     //todo: there is a bug that causes the buffer to oom and I cannot figure it out, hack to increase size for now  
-    const size_t extra_buf = 64u*1024*1024;
-    static size_t buf_size = 256u*1024*1024 + extra_buf;    
+    static size_t buf_size = 512u*1024*1024;    
     static void * buf = malloc(buf_size);
 
     if (mem_per_token > 0 && mem_per_token*N*1.6 > buf_size) {
-        const size_t buf_size_new = extra_buf + 2*(mem_per_token*N); // add 10% to account for ggml object overhead
+        const size_t buf_size_new = 2*(mem_per_token*N); // add 10% to account for ggml object overhead
         //printf("\n%s: reallocating buffer from %zu to %zu bytes\n", __func__, buf_size, buf_size_new);
 
         // reallocate
diff --git a/otherarch/gptj_v2.cpp b/otherarch/gptj_v2.cpp
index a81053e62..040be8478 100644
--- a/otherarch/gptj_v2.cpp
+++ b/otherarch/gptj_v2.cpp
@@ -382,12 +382,11 @@ bool gptj_eval(
     const int d_key = n_embd/n_head;
 
     //todo: there is a bug that causes the buffer to oom and I cannot figure it out, hack to increase size for now  
-    const size_t extra_buf = 64u*1024*1024;
-    static size_t buf_size = 256u*1024*1024 + extra_buf;    
+    static size_t buf_size = 512u*1024*1024;    
     static void * buf = malloc(buf_size);
 
-    if (mem_per_token > 0 && mem_per_token*N*1.5 > buf_size) {
-        const size_t buf_size_new = extra_buf + 1.6*(mem_per_token*N); // add 10% to account for ggml object overhead
+    if (mem_per_token > 0 && mem_per_token*N*1.4 > buf_size) {
+        const size_t buf_size_new = 1.6*(mem_per_token*N); // add 10% to account for ggml object overhead
         //printf("\n%s: reallocating buffer from %zu to %zu bytes\n", __func__, buf_size, buf_size_new);
 
         // reallocate
diff --git a/otherarch/neox.cpp b/otherarch/neox.cpp
index 51450c620..1bcfad61b 100644
--- a/otherarch/neox.cpp
+++ b/otherarch/neox.cpp
@@ -364,12 +364,11 @@ bool stablelm_eval(
     const int n_vocab = hparams.n_vocab;
     const int n_rot   = hparams.n_rot;
 
-    const size_t extra_buf = 64u*1024*1024;
-    static size_t buf_size = 256u*1024*1024 + extra_buf;    
+    static size_t buf_size = 512u*1024*1024;    
     static void * buf = malloc(buf_size);
 
-    if (mem_per_token > 0 && mem_per_token*N*1.5 > buf_size) {
-        const size_t buf_size_new = extra_buf + 1.6*(mem_per_token*N); // add 10% to account for ggml object overhead
+    if (mem_per_token > 0 && mem_per_token*N*1.4 > buf_size) {
+        const size_t buf_size_new = 1.6*(mem_per_token*N); // add 10% to account for ggml object overhead
         //printf("\n%s: reallocating buffer from %zu to %zu bytes\n", __func__, buf_size, buf_size_new);
 
         // reallocate