From 9aa2d8535b7e8a27e5a017769eefcd5b5c7505e6 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sun, 4 Jun 2023 21:47:17 +0800 Subject: [PATCH] hide gpu input box when dropdown not selected, minor memory fix for neox and gptj --- koboldcpp.py | 10 +++++++++- otherarch/gptj_v3.cpp | 2 +- otherarch/neox_v3.cpp | 20 ++++++++++++-------- 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/koboldcpp.py b/koboldcpp.py index 84b1486dd..09d439dd2 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -459,7 +459,14 @@ def show_gui(): opts = ["Use OpenBLAS","Use CLBLast GPU #1","Use CLBLast GPU #2","Use CLBLast GPU #3","Use No BLAS","Use OpenBLAS (Old CPU, noavx2)","Failsafe Mode (Old CPU, noavx)"] runchoice = tk.StringVar() runchoice.set("Use OpenBLAS") - tk.OptionMenu( root , runchoice , *opts ).grid(row=2,column=0) + def onDropdownChange(event): + sel = runchoice.get() + if sel==opts[1] or sel==opts[2] or sel==opts[3]: + frm1.grid(row=4,column=0,pady=4) + else: + frm1.grid_forget() + pass + tk.OptionMenu( root , runchoice , command = onDropdownChange ,*opts ).grid(row=2,column=0) frm2 = tk.Frame(root) @@ -479,6 +486,7 @@ def show_gui(): gpu_lbl.grid(row=0,column=0) gpu_layers_input.grid(row=0,column=1) frm1.grid(row=4,column=0,pady=4) + onDropdownChange(None) stream = tk.IntVar() smartcontext = tk.IntVar() diff --git a/otherarch/gptj_v3.cpp b/otherarch/gptj_v3.cpp index 894e7fb4d..0e4a83148 100644 --- a/otherarch/gptj_v3.cpp +++ b/otherarch/gptj_v3.cpp @@ -368,7 +368,7 @@ bool gptj_eval( static void * buf = malloc(buf_size); if (mem_per_token > 0 && (mem_per_token*N*2 + 64u*1024*1024) > buf_size) { - const size_t buf_size_new = 320u*1024*1024 + 2*(mem_per_token*N); // add 10% to account for ggml object overhead + const size_t buf_size_new = 320u*1024*1024 + 1.7*(mem_per_token*N); // add 10% to account for ggml object overhead //printf("\n%s: reallocating buffer from %zu to %zu bytes\n", __func__, buf_size, buf_size_new); // reallocate diff --git a/otherarch/neox_v3.cpp b/otherarch/neox_v3.cpp index 65458f609..885976712 100644 --- a/otherarch/neox_v3.cpp +++ b/otherarch/neox_v3.cpp @@ -138,8 +138,8 @@ ModelLoadResult gpt_neox_model_load(const std::string & fname, gpt_neox_model & ctx_size += n_layer*(4*n_embd*n_embd*ggml_type_sizef(wtype)); // c_mlp_proj_w ctx_size += n_layer*( n_embd*ggml_type_sizef(GGML_TYPE_F32)); // c_mlp_proj_b - ctx_size += n_ctx*n_layer*n_embd*ggml_type_sizef(GGML_TYPE_F32); // memory_k - ctx_size += n_ctx*n_layer*n_embd*ggml_type_sizef(GGML_TYPE_F32); // memory_v + ctx_size += n_ctx*n_layer*n_embd*ggml_type_sizef(GGML_TYPE_F16); // memory_k + ctx_size += n_ctx*n_layer*n_embd*ggml_type_sizef(GGML_TYPE_F16); // memory_v ctx_size += (6 + 16*n_layer)*512; // object overhead @@ -410,15 +410,19 @@ bool gpt_neox_eval( static void * buf = malloc(buf_size); if (mem_per_token > 0 && (mem_per_token*N*2 + 64u*1024*1024) > buf_size) { - const size_t buf_size_new = 360u*1024*1024 + 2*(mem_per_token*N); // add 10% to account for ggml object overhead + const size_t buf_size_new = 360u*1024*1024 + 1.7*(mem_per_token*N); // add 10% to account for ggml object overhead //printf("\n%s: reallocating buffer from %zu to %zu bytes\n", __func__, buf_size, buf_size_new); // reallocate - buf_size = buf_size_new; - buf = realloc(buf, buf_size); - if (buf == nullptr) { - fprintf(stderr, "%s: failed to allocate %zu bytes\n", __func__, buf_size); - return false; + if (buf_size_new > buf_size) + { + buf_size = buf_size_new; + buf = realloc(buf, buf_size); + if (buf == nullptr) + { + fprintf(stderr, "%s: failed to allocate %zu bytes\n", __func__, buf_size); + return false; + } } }