From 9aa2d8535b7e8a27e5a017769eefcd5b5c7505e6 Mon Sep 17 00:00:00 2001
From: Concedo <39025047+LostRuins@users.noreply.github.com>
Date: Sun, 4 Jun 2023 21:47:17 +0800
Subject: [PATCH] hide gpu input box when dropdown not selected, minor memory
 fix for neox and gptj

---
 koboldcpp.py          | 10 +++++++++-
 otherarch/gptj_v3.cpp |  2 +-
 otherarch/neox_v3.cpp | 20 ++++++++++++--------
 3 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/koboldcpp.py b/koboldcpp.py
index 84b1486dd..09d439dd2 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -459,7 +459,14 @@ def show_gui():
         opts = ["Use OpenBLAS","Use CLBLast GPU #1","Use CLBLast GPU #2","Use CLBLast GPU #3","Use No BLAS","Use OpenBLAS (Old CPU, noavx2)","Failsafe Mode (Old CPU, noavx)"]
         runchoice = tk.StringVar()
         runchoice.set("Use OpenBLAS")
-        tk.OptionMenu( root , runchoice , *opts ).grid(row=2,column=0)
+        def onDropdownChange(event):
+            sel = runchoice.get()
+            if sel==opts[1] or sel==opts[2] or sel==opts[3]:
+                frm1.grid(row=4,column=0,pady=4)
+            else:
+                frm1.grid_forget()
+            pass
+        tk.OptionMenu( root , runchoice , command = onDropdownChange ,*opts ).grid(row=2,column=0)
      
 
         frm2 = tk.Frame(root)
@@ -479,6 +486,7 @@ def show_gui():
         gpu_lbl.grid(row=0,column=0)
         gpu_layers_input.grid(row=0,column=1)
         frm1.grid(row=4,column=0,pady=4)
+        onDropdownChange(None)
 
         stream = tk.IntVar()
         smartcontext = tk.IntVar()
diff --git a/otherarch/gptj_v3.cpp b/otherarch/gptj_v3.cpp
index 894e7fb4d..0e4a83148 100644
--- a/otherarch/gptj_v3.cpp
+++ b/otherarch/gptj_v3.cpp
@@ -368,7 +368,7 @@ bool gptj_eval(
     static void * buf = malloc(buf_size);
 
     if (mem_per_token > 0 && (mem_per_token*N*2 + 64u*1024*1024) > buf_size) {
-        const size_t buf_size_new = 320u*1024*1024 + 2*(mem_per_token*N); // add 10% to account for ggml object overhead
+        const size_t buf_size_new = 320u*1024*1024 + 1.7*(mem_per_token*N); // add 10% to account for ggml object overhead
         //printf("\n%s: reallocating buffer from %zu to %zu bytes\n", __func__, buf_size, buf_size_new);
 
         // reallocate
diff --git a/otherarch/neox_v3.cpp b/otherarch/neox_v3.cpp
index 65458f609..885976712 100644
--- a/otherarch/neox_v3.cpp
+++ b/otherarch/neox_v3.cpp
@@ -138,8 +138,8 @@ ModelLoadResult gpt_neox_model_load(const std::string & fname, gpt_neox_model &
         ctx_size += n_layer*(4*n_embd*n_embd*ggml_type_sizef(wtype));         // c_mlp_proj_w
         ctx_size += n_layer*(         n_embd*ggml_type_sizef(GGML_TYPE_F32)); // c_mlp_proj_b
 
-        ctx_size += n_ctx*n_layer*n_embd*ggml_type_sizef(GGML_TYPE_F32); // memory_k
-        ctx_size += n_ctx*n_layer*n_embd*ggml_type_sizef(GGML_TYPE_F32); // memory_v
+        ctx_size += n_ctx*n_layer*n_embd*ggml_type_sizef(GGML_TYPE_F16); // memory_k
+        ctx_size += n_ctx*n_layer*n_embd*ggml_type_sizef(GGML_TYPE_F16); // memory_v
 
         ctx_size += (6 + 16*n_layer)*512; // object overhead
 
@@ -410,15 +410,19 @@ bool gpt_neox_eval(
     static void * buf = malloc(buf_size);
 
     if (mem_per_token > 0 && (mem_per_token*N*2 + 64u*1024*1024) > buf_size) {
-        const size_t buf_size_new = 360u*1024*1024 + 2*(mem_per_token*N); // add 10% to account for ggml object overhead
+        const size_t buf_size_new = 360u*1024*1024 + 1.7*(mem_per_token*N); // add 10% to account for ggml object overhead
         //printf("\n%s: reallocating buffer from %zu to %zu bytes\n", __func__, buf_size, buf_size_new);
 
         // reallocate
-        buf_size = buf_size_new;
-        buf = realloc(buf, buf_size);
-        if (buf == nullptr) {
-            fprintf(stderr, "%s: failed to allocate %zu bytes\n", __func__, buf_size);
-            return false;
+        if (buf_size_new > buf_size)
+        {
+            buf_size = buf_size_new;
+            buf = realloc(buf, buf_size);
+            if (buf == nullptr)
+            {
+                fprintf(stderr, "%s: failed to allocate %zu bytes\n", __func__, buf_size);
+                return false;
+            }
         }
     }