hide gpu input box when dropdown not selected, minor memory fix for neox and gptj

This commit is contained in:
Concedo 2023-06-04 21:47:17 +08:00
parent 1ddbb9acd9
commit 9aa2d8535b
3 changed files with 22 additions and 10 deletions

View file

@ -459,7 +459,14 @@ def show_gui():
opts = ["Use OpenBLAS","Use CLBLast GPU #1","Use CLBLast GPU #2","Use CLBLast GPU #3","Use No BLAS","Use OpenBLAS (Old CPU, noavx2)","Failsafe Mode (Old CPU, noavx)"]
runchoice = tk.StringVar()
runchoice.set("Use OpenBLAS")
tk.OptionMenu( root , runchoice , *opts ).grid(row=2,column=0)
def onDropdownChange(event):
sel = runchoice.get()
if sel==opts[1] or sel==opts[2] or sel==opts[3]:
frm1.grid(row=4,column=0,pady=4)
else:
frm1.grid_forget()
pass
tk.OptionMenu( root , runchoice , command = onDropdownChange ,*opts ).grid(row=2,column=0)
frm2 = tk.Frame(root)
@ -479,6 +486,7 @@ def show_gui():
gpu_lbl.grid(row=0,column=0)
gpu_layers_input.grid(row=0,column=1)
frm1.grid(row=4,column=0,pady=4)
onDropdownChange(None)
stream = tk.IntVar()
smartcontext = tk.IntVar()

View file

@ -368,7 +368,7 @@ bool gptj_eval(
static void * buf = malloc(buf_size);
if (mem_per_token > 0 && (mem_per_token*N*2 + 64u*1024*1024) > buf_size) {
const size_t buf_size_new = 320u*1024*1024 + 2*(mem_per_token*N); // add 10% to account for ggml object overhead
const size_t buf_size_new = 320u*1024*1024 + 1.7*(mem_per_token*N); // add 10% to account for ggml object overhead
//printf("\n%s: reallocating buffer from %zu to %zu bytes\n", __func__, buf_size, buf_size_new);
// reallocate

View file

@ -138,8 +138,8 @@ ModelLoadResult gpt_neox_model_load(const std::string & fname, gpt_neox_model &
ctx_size += n_layer*(4*n_embd*n_embd*ggml_type_sizef(wtype)); // c_mlp_proj_w
ctx_size += n_layer*( n_embd*ggml_type_sizef(GGML_TYPE_F32)); // c_mlp_proj_b
ctx_size += n_ctx*n_layer*n_embd*ggml_type_sizef(GGML_TYPE_F32); // memory_k
ctx_size += n_ctx*n_layer*n_embd*ggml_type_sizef(GGML_TYPE_F32); // memory_v
ctx_size += n_ctx*n_layer*n_embd*ggml_type_sizef(GGML_TYPE_F16); // memory_k
ctx_size += n_ctx*n_layer*n_embd*ggml_type_sizef(GGML_TYPE_F16); // memory_v
ctx_size += (6 + 16*n_layer)*512; // object overhead
@ -410,15 +410,19 @@ bool gpt_neox_eval(
static void * buf = malloc(buf_size);
if (mem_per_token > 0 && (mem_per_token*N*2 + 64u*1024*1024) > buf_size) {
const size_t buf_size_new = 360u*1024*1024 + 2*(mem_per_token*N); // add 10% to account for ggml object overhead
const size_t buf_size_new = 360u*1024*1024 + 1.7*(mem_per_token*N); // add 10% to account for ggml object overhead
//printf("\n%s: reallocating buffer from %zu to %zu bytes\n", __func__, buf_size, buf_size_new);
// reallocate
buf_size = buf_size_new;
buf = realloc(buf, buf_size);
if (buf == nullptr) {
fprintf(stderr, "%s: failed to allocate %zu bytes\n", __func__, buf_size);
return false;
if (buf_size_new > buf_size)
{
buf_size = buf_size_new;
buf = realloc(buf, buf_size);
if (buf == nullptr)
{
fprintf(stderr, "%s: failed to allocate %zu bytes\n", __func__, buf_size);
return false;
}
}
}