hide gpu input box when dropdown not selected, minor memory fix for neox and gptj
This commit is contained in:
parent
1ddbb9acd9
commit
9aa2d8535b
3 changed files with 22 additions and 10 deletions
10
koboldcpp.py
10
koboldcpp.py
|
@ -459,7 +459,14 @@ def show_gui():
|
|||
opts = ["Use OpenBLAS","Use CLBLast GPU #1","Use CLBLast GPU #2","Use CLBLast GPU #3","Use No BLAS","Use OpenBLAS (Old CPU, noavx2)","Failsafe Mode (Old CPU, noavx)"]
|
||||
runchoice = tk.StringVar()
|
||||
runchoice.set("Use OpenBLAS")
|
||||
tk.OptionMenu( root , runchoice , *opts ).grid(row=2,column=0)
|
||||
def onDropdownChange(event):
|
||||
sel = runchoice.get()
|
||||
if sel==opts[1] or sel==opts[2] or sel==opts[3]:
|
||||
frm1.grid(row=4,column=0,pady=4)
|
||||
else:
|
||||
frm1.grid_forget()
|
||||
pass
|
||||
tk.OptionMenu( root , runchoice , command = onDropdownChange ,*opts ).grid(row=2,column=0)
|
||||
|
||||
|
||||
frm2 = tk.Frame(root)
|
||||
|
@ -479,6 +486,7 @@ def show_gui():
|
|||
gpu_lbl.grid(row=0,column=0)
|
||||
gpu_layers_input.grid(row=0,column=1)
|
||||
frm1.grid(row=4,column=0,pady=4)
|
||||
onDropdownChange(None)
|
||||
|
||||
stream = tk.IntVar()
|
||||
smartcontext = tk.IntVar()
|
||||
|
|
|
@ -368,7 +368,7 @@ bool gptj_eval(
|
|||
static void * buf = malloc(buf_size);
|
||||
|
||||
if (mem_per_token > 0 && (mem_per_token*N*2 + 64u*1024*1024) > buf_size) {
|
||||
const size_t buf_size_new = 320u*1024*1024 + 2*(mem_per_token*N); // add 10% to account for ggml object overhead
|
||||
const size_t buf_size_new = 320u*1024*1024 + 1.7*(mem_per_token*N); // add 10% to account for ggml object overhead
|
||||
//printf("\n%s: reallocating buffer from %zu to %zu bytes\n", __func__, buf_size, buf_size_new);
|
||||
|
||||
// reallocate
|
||||
|
|
|
@ -138,8 +138,8 @@ ModelLoadResult gpt_neox_model_load(const std::string & fname, gpt_neox_model &
|
|||
ctx_size += n_layer*(4*n_embd*n_embd*ggml_type_sizef(wtype)); // c_mlp_proj_w
|
||||
ctx_size += n_layer*( n_embd*ggml_type_sizef(GGML_TYPE_F32)); // c_mlp_proj_b
|
||||
|
||||
ctx_size += n_ctx*n_layer*n_embd*ggml_type_sizef(GGML_TYPE_F32); // memory_k
|
||||
ctx_size += n_ctx*n_layer*n_embd*ggml_type_sizef(GGML_TYPE_F32); // memory_v
|
||||
ctx_size += n_ctx*n_layer*n_embd*ggml_type_sizef(GGML_TYPE_F16); // memory_k
|
||||
ctx_size += n_ctx*n_layer*n_embd*ggml_type_sizef(GGML_TYPE_F16); // memory_v
|
||||
|
||||
ctx_size += (6 + 16*n_layer)*512; // object overhead
|
||||
|
||||
|
@ -410,17 +410,21 @@ bool gpt_neox_eval(
|
|||
static void * buf = malloc(buf_size);
|
||||
|
||||
if (mem_per_token > 0 && (mem_per_token*N*2 + 64u*1024*1024) > buf_size) {
|
||||
const size_t buf_size_new = 360u*1024*1024 + 2*(mem_per_token*N); // add 10% to account for ggml object overhead
|
||||
const size_t buf_size_new = 360u*1024*1024 + 1.7*(mem_per_token*N); // add 10% to account for ggml object overhead
|
||||
//printf("\n%s: reallocating buffer from %zu to %zu bytes\n", __func__, buf_size, buf_size_new);
|
||||
|
||||
// reallocate
|
||||
if (buf_size_new > buf_size)
|
||||
{
|
||||
buf_size = buf_size_new;
|
||||
buf = realloc(buf, buf_size);
|
||||
if (buf == nullptr) {
|
||||
if (buf == nullptr)
|
||||
{
|
||||
fprintf(stderr, "%s: failed to allocate %zu bytes\n", __func__, buf_size);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = buf_size;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue