hide gpu input box when dropdown not selected, minor memory fix for neox and gptj
This commit is contained in:
parent
1ddbb9acd9
commit
9aa2d8535b
3 changed files with 22 additions and 10 deletions
10
koboldcpp.py
10
koboldcpp.py
|
@ -459,7 +459,14 @@ def show_gui():
|
||||||
opts = ["Use OpenBLAS","Use CLBLast GPU #1","Use CLBLast GPU #2","Use CLBLast GPU #3","Use No BLAS","Use OpenBLAS (Old CPU, noavx2)","Failsafe Mode (Old CPU, noavx)"]
|
opts = ["Use OpenBLAS","Use CLBLast GPU #1","Use CLBLast GPU #2","Use CLBLast GPU #3","Use No BLAS","Use OpenBLAS (Old CPU, noavx2)","Failsafe Mode (Old CPU, noavx)"]
|
||||||
runchoice = tk.StringVar()
|
runchoice = tk.StringVar()
|
||||||
runchoice.set("Use OpenBLAS")
|
runchoice.set("Use OpenBLAS")
|
||||||
tk.OptionMenu( root , runchoice , *opts ).grid(row=2,column=0)
|
def onDropdownChange(event):
|
||||||
|
sel = runchoice.get()
|
||||||
|
if sel==opts[1] or sel==opts[2] or sel==opts[3]:
|
||||||
|
frm1.grid(row=4,column=0,pady=4)
|
||||||
|
else:
|
||||||
|
frm1.grid_forget()
|
||||||
|
pass
|
||||||
|
tk.OptionMenu( root , runchoice , command = onDropdownChange ,*opts ).grid(row=2,column=0)
|
||||||
|
|
||||||
|
|
||||||
frm2 = tk.Frame(root)
|
frm2 = tk.Frame(root)
|
||||||
|
@ -479,6 +486,7 @@ def show_gui():
|
||||||
gpu_lbl.grid(row=0,column=0)
|
gpu_lbl.grid(row=0,column=0)
|
||||||
gpu_layers_input.grid(row=0,column=1)
|
gpu_layers_input.grid(row=0,column=1)
|
||||||
frm1.grid(row=4,column=0,pady=4)
|
frm1.grid(row=4,column=0,pady=4)
|
||||||
|
onDropdownChange(None)
|
||||||
|
|
||||||
stream = tk.IntVar()
|
stream = tk.IntVar()
|
||||||
smartcontext = tk.IntVar()
|
smartcontext = tk.IntVar()
|
||||||
|
|
|
@ -368,7 +368,7 @@ bool gptj_eval(
|
||||||
static void * buf = malloc(buf_size);
|
static void * buf = malloc(buf_size);
|
||||||
|
|
||||||
if (mem_per_token > 0 && (mem_per_token*N*2 + 64u*1024*1024) > buf_size) {
|
if (mem_per_token > 0 && (mem_per_token*N*2 + 64u*1024*1024) > buf_size) {
|
||||||
const size_t buf_size_new = 320u*1024*1024 + 2*(mem_per_token*N); // add 10% to account for ggml object overhead
|
const size_t buf_size_new = 320u*1024*1024 + 1.7*(mem_per_token*N); // add 10% to account for ggml object overhead
|
||||||
//printf("\n%s: reallocating buffer from %zu to %zu bytes\n", __func__, buf_size, buf_size_new);
|
//printf("\n%s: reallocating buffer from %zu to %zu bytes\n", __func__, buf_size, buf_size_new);
|
||||||
|
|
||||||
// reallocate
|
// reallocate
|
||||||
|
|
|
@ -138,8 +138,8 @@ ModelLoadResult gpt_neox_model_load(const std::string & fname, gpt_neox_model &
|
||||||
ctx_size += n_layer*(4*n_embd*n_embd*ggml_type_sizef(wtype)); // c_mlp_proj_w
|
ctx_size += n_layer*(4*n_embd*n_embd*ggml_type_sizef(wtype)); // c_mlp_proj_w
|
||||||
ctx_size += n_layer*( n_embd*ggml_type_sizef(GGML_TYPE_F32)); // c_mlp_proj_b
|
ctx_size += n_layer*( n_embd*ggml_type_sizef(GGML_TYPE_F32)); // c_mlp_proj_b
|
||||||
|
|
||||||
ctx_size += n_ctx*n_layer*n_embd*ggml_type_sizef(GGML_TYPE_F32); // memory_k
|
ctx_size += n_ctx*n_layer*n_embd*ggml_type_sizef(GGML_TYPE_F16); // memory_k
|
||||||
ctx_size += n_ctx*n_layer*n_embd*ggml_type_sizef(GGML_TYPE_F32); // memory_v
|
ctx_size += n_ctx*n_layer*n_embd*ggml_type_sizef(GGML_TYPE_F16); // memory_v
|
||||||
|
|
||||||
ctx_size += (6 + 16*n_layer)*512; // object overhead
|
ctx_size += (6 + 16*n_layer)*512; // object overhead
|
||||||
|
|
||||||
|
@ -410,15 +410,19 @@ bool gpt_neox_eval(
|
||||||
static void * buf = malloc(buf_size);
|
static void * buf = malloc(buf_size);
|
||||||
|
|
||||||
if (mem_per_token > 0 && (mem_per_token*N*2 + 64u*1024*1024) > buf_size) {
|
if (mem_per_token > 0 && (mem_per_token*N*2 + 64u*1024*1024) > buf_size) {
|
||||||
const size_t buf_size_new = 360u*1024*1024 + 2*(mem_per_token*N); // add 10% to account for ggml object overhead
|
const size_t buf_size_new = 360u*1024*1024 + 1.7*(mem_per_token*N); // add 10% to account for ggml object overhead
|
||||||
//printf("\n%s: reallocating buffer from %zu to %zu bytes\n", __func__, buf_size, buf_size_new);
|
//printf("\n%s: reallocating buffer from %zu to %zu bytes\n", __func__, buf_size, buf_size_new);
|
||||||
|
|
||||||
// reallocate
|
// reallocate
|
||||||
buf_size = buf_size_new;
|
if (buf_size_new > buf_size)
|
||||||
buf = realloc(buf, buf_size);
|
{
|
||||||
if (buf == nullptr) {
|
buf_size = buf_size_new;
|
||||||
fprintf(stderr, "%s: failed to allocate %zu bytes\n", __func__, buf_size);
|
buf = realloc(buf, buf_size);
|
||||||
return false;
|
if (buf == nullptr)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "%s: failed to allocate %zu bytes\n", __func__, buf_size);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue