add option for 4th gpu, also fixed missing case in auto rope scaling
This commit is contained in:
parent
2dc96687eb
commit
6667fdcec8
2 changed files with 14 additions and 9 deletions
|
@ -443,7 +443,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
||||||
float factor = file_format_meta.n_ctx_train/2048;
|
float factor = file_format_meta.n_ctx_train/2048;
|
||||||
effectivenctx = effectivenctx/factor;
|
effectivenctx = effectivenctx/factor;
|
||||||
}
|
}
|
||||||
rope_freq_base = (effectivenctx <= 3072 ? 26000.0f : (effectivenctx <= 4096 ? 32000.0f : (effectivenctx <= 6144 ? 54000.0f : (effectivenctx <= 8192 ? 82684.0f : (effectivenctx <= 12288 ? 140000.0f : 200000.0f)))));
|
rope_freq_base = (effectivenctx <= 2048 ? 10000.0f : (effectivenctx <= 3072 ? 26000.0f : (effectivenctx <= 4096 ? 32000.0f : (effectivenctx <= 6144 ? 54000.0f : (effectivenctx <= 8192 ? 82684.0f : (effectivenctx <= 12288 ? 140000.0f : 200000.0f))))));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
21
koboldcpp.py
21
koboldcpp.py
|
@ -237,6 +237,9 @@ def load_model(model_filename):
|
||||||
elif (args.usecublas and "2" in args.usecublas):
|
elif (args.usecublas and "2" in args.usecublas):
|
||||||
os.environ["CUDA_VISIBLE_DEVICES"] = "2"
|
os.environ["CUDA_VISIBLE_DEVICES"] = "2"
|
||||||
os.environ["HIP_VISIBLE_DEVICES"] = "2"
|
os.environ["HIP_VISIBLE_DEVICES"] = "2"
|
||||||
|
elif (args.usecublas and "3" in args.usecublas):
|
||||||
|
os.environ["CUDA_VISIBLE_DEVICES"] = "3"
|
||||||
|
os.environ["HIP_VISIBLE_DEVICES"] = "3"
|
||||||
else:
|
else:
|
||||||
if (args.usecublas and "0" in args.usecublas):
|
if (args.usecublas and "0" in args.usecublas):
|
||||||
inputs.cublas_info = 0
|
inputs.cublas_info = 0
|
||||||
|
@ -244,6 +247,8 @@ def load_model(model_filename):
|
||||||
inputs.cublas_info = 1
|
inputs.cublas_info = 1
|
||||||
elif (args.usecublas and "2" in args.usecublas):
|
elif (args.usecublas and "2" in args.usecublas):
|
||||||
inputs.cublas_info = 2
|
inputs.cublas_info = 2
|
||||||
|
elif (args.usecublas and "3" in args.usecublas):
|
||||||
|
inputs.cublas_info = 3
|
||||||
|
|
||||||
inputs.executable_path = (getdirpath()+"/").encode("UTF-8")
|
inputs.executable_path = (getdirpath()+"/").encode("UTF-8")
|
||||||
inputs.debugmode = args.debugmode
|
inputs.debugmode = args.debugmode
|
||||||
|
@ -960,8 +965,8 @@ def show_new_gui():
|
||||||
|
|
||||||
# gpu options
|
# gpu options
|
||||||
quick_gpu_selector_label = makelabel(quick_tab, "GPU ID:", 3)
|
quick_gpu_selector_label = makelabel(quick_tab, "GPU ID:", 3)
|
||||||
quick_gpu_selector_box = ctk.CTkComboBox(quick_tab, values=["1","2","3"], width=60, variable=gpu_choice_var, state="readonly")
|
quick_gpu_selector_box = ctk.CTkComboBox(quick_tab, values=["1","2","3","4"], width=60, variable=gpu_choice_var, state="readonly")
|
||||||
CUDA_quick_gpu_selector_box = ctk.CTkComboBox(quick_tab, values=["1","2","3","All"], width=60, variable=gpu_choice_var, state="readonly")
|
CUDA_quick_gpu_selector_box = ctk.CTkComboBox(quick_tab, values=["1","2","3","4","All"], width=60, variable=gpu_choice_var, state="readonly")
|
||||||
quick_gpu_layers_entry,quick_gpu_layers_label = makelabelentry(quick_tab,"GPU Layers:", gpulayers_var, 5, 50)
|
quick_gpu_layers_entry,quick_gpu_layers_label = makelabelentry(quick_tab,"GPU Layers:", gpulayers_var, 5, 50)
|
||||||
quick_lowvram_box = makecheckbox(quick_tab, "Low VRAM", lowvram_var, 4,0)
|
quick_lowvram_box = makecheckbox(quick_tab, "Low VRAM", lowvram_var, 4,0)
|
||||||
quick_mmq_box = makecheckbox(quick_tab, "Use QuantMatMul (mmq)", mmq_var, 4,1)
|
quick_mmq_box = makecheckbox(quick_tab, "Use QuantMatMul (mmq)", mmq_var, 4,1)
|
||||||
|
@ -996,8 +1001,8 @@ def show_new_gui():
|
||||||
|
|
||||||
# gpu options
|
# gpu options
|
||||||
gpu_selector_label = makelabel(hardware_tab, "GPU ID:", 3)
|
gpu_selector_label = makelabel(hardware_tab, "GPU ID:", 3)
|
||||||
gpu_selector_box = ctk.CTkComboBox(hardware_tab, values=["1","2","3"], width=60, variable=gpu_choice_var, state="readonly")
|
gpu_selector_box = ctk.CTkComboBox(hardware_tab, values=["1","2","3","4"], width=60, variable=gpu_choice_var, state="readonly")
|
||||||
CUDA_gpu_selector_box = ctk.CTkComboBox(hardware_tab, values=["1","2","3", "All"], width=60, variable=gpu_choice_var, state="readonly")
|
CUDA_gpu_selector_box = ctk.CTkComboBox(hardware_tab, values=["1","2","3","4", "All"], width=60, variable=gpu_choice_var, state="readonly")
|
||||||
gpu_layers_entry,gpu_layers_label = makelabelentry(hardware_tab,"GPU Layers:", gpulayers_var, 5, 50)
|
gpu_layers_entry,gpu_layers_label = makelabelentry(hardware_tab,"GPU Layers:", gpulayers_var, 5, 50)
|
||||||
lowvram_box = makecheckbox(hardware_tab, "Low VRAM", lowvram_var, 4,0)
|
lowvram_box = makecheckbox(hardware_tab, "Low VRAM", lowvram_var, 4,0)
|
||||||
mmq_box = makecheckbox(hardware_tab, "Use QuantMatMul (mmq)", mmq_var, 4,1)
|
mmq_box = makecheckbox(hardware_tab, "Use QuantMatMul (mmq)", mmq_var, 4,1)
|
||||||
|
@ -1131,7 +1136,7 @@ def show_new_gui():
|
||||||
if gpu_choice_var.get()!="All":
|
if gpu_choice_var.get()!="All":
|
||||||
gpuchoiceidx = int(gpu_choice_var.get())-1
|
gpuchoiceidx = int(gpu_choice_var.get())-1
|
||||||
if runopts_var.get() == "Use CLBlast":
|
if runopts_var.get() == "Use CLBlast":
|
||||||
args.useclblast = [[0,0], [1,0], [0,1]][gpuchoiceidx]
|
args.useclblast = [[0,0], [1,0], [0,1], [1,1]][gpuchoiceidx]
|
||||||
if runopts_var.get() == "Use CuBLAS/hipBLAS":
|
if runopts_var.get() == "Use CuBLAS/hipBLAS":
|
||||||
if gpu_choice_var.get()=="All":
|
if gpu_choice_var.get()=="All":
|
||||||
args.usecublas = ["lowvram"] if lowvram_var.get() == 1 else ["normal"]
|
args.usecublas = ["lowvram"] if lowvram_var.get() == 1 else ["normal"]
|
||||||
|
@ -1187,14 +1192,14 @@ def show_new_gui():
|
||||||
if "useclblast" in dict and dict["useclblast"]:
|
if "useclblast" in dict and dict["useclblast"]:
|
||||||
if clblast_option is not None:
|
if clblast_option is not None:
|
||||||
runopts_var.set(clblast_option)
|
runopts_var.set(clblast_option)
|
||||||
gpu_choice_var.set(str(["0 0", "1 0", "0 1"].index(str(dict["useclblast"][0]) + " " + str(dict["useclblast"][1])) + 1))
|
gpu_choice_var.set(str(["0 0", "1 0", "0 1", "1 1"].index(str(dict["useclblast"][0]) + " " + str(dict["useclblast"][1])) + 1))
|
||||||
elif "usecublas" in dict and dict["usecublas"]:
|
elif "usecublas" in dict and dict["usecublas"]:
|
||||||
if cublas_option is not None:
|
if cublas_option is not None:
|
||||||
runopts_var.set(cublas_option)
|
runopts_var.set(cublas_option)
|
||||||
lowvram_var.set(1 if "lowvram" in dict["usecublas"] else 0)
|
lowvram_var.set(1 if "lowvram" in dict["usecublas"] else 0)
|
||||||
mmq_var.set(1 if "mmq" in dict["usecublas"] else 0)
|
mmq_var.set(1 if "mmq" in dict["usecublas"] else 0)
|
||||||
gpu_choice_var.set("All")
|
gpu_choice_var.set("All")
|
||||||
for g in range(3):
|
for g in range(4):
|
||||||
if str(g) in dict["usecublas"]:
|
if str(g) in dict["usecublas"]:
|
||||||
gpu_choice_var.set(str(g+1))
|
gpu_choice_var.set(str(g+1))
|
||||||
break
|
break
|
||||||
|
@ -1800,7 +1805,7 @@ if __name__ == '__main__':
|
||||||
compatgroup = parser.add_mutually_exclusive_group()
|
compatgroup = parser.add_mutually_exclusive_group()
|
||||||
compatgroup.add_argument("--noblas", help="Do not use OpenBLAS for accelerated prompt ingestion", action='store_true')
|
compatgroup.add_argument("--noblas", help="Do not use OpenBLAS for accelerated prompt ingestion", action='store_true')
|
||||||
compatgroup.add_argument("--useclblast", help="Use CLBlast for GPU Acceleration. Must specify exactly 2 arguments, platform ID and device ID (e.g. --useclblast 1 0).", type=int, choices=range(0,9), nargs=2)
|
compatgroup.add_argument("--useclblast", help="Use CLBlast for GPU Acceleration. Must specify exactly 2 arguments, platform ID and device ID (e.g. --useclblast 1 0).", type=int, choices=range(0,9), nargs=2)
|
||||||
compatgroup.add_argument("--usecublas", help="Use CuBLAS/hipBLAS for GPU Acceleration. Requires CUDA. Select lowvram to not allocate VRAM scratch buffer. Enter a number afterwards to select and use 1 GPU. Leaving no number will use all GPUs.", nargs='*',metavar=('[lowvram|normal] [main GPU ID] [mmq]'), choices=['normal', 'lowvram', '0', '1', '2', 'mmq'])
|
compatgroup.add_argument("--usecublas", help="Use CuBLAS/hipBLAS for GPU Acceleration. Requires CUDA. Select lowvram to not allocate VRAM scratch buffer. Enter a number afterwards to select and use 1 GPU. Leaving no number will use all GPUs.", nargs='*',metavar=('[lowvram|normal] [main GPU ID] [mmq]'), choices=['normal', 'lowvram', '0', '1', '2', '3', 'mmq'])
|
||||||
parser.add_argument("--gpulayers", help="Set number of layers to offload to GPU when using GPU. Requires GPU.",metavar=('[GPU layers]'), type=int, default=0)
|
parser.add_argument("--gpulayers", help="Set number of layers to offload to GPU when using GPU. Requires GPU.",metavar=('[GPU layers]'), type=int, default=0)
|
||||||
parser.add_argument("--tensor_split", help="For CUDA with ALL GPU set only, ratio to split tensors across multiple GPUs, space-separated list of proportions, e.g. 7 3", metavar=('[Ratios]'), type=float, nargs='+')
|
parser.add_argument("--tensor_split", help="For CUDA with ALL GPU set only, ratio to split tensors across multiple GPUs, space-separated list of proportions, e.g. 7 3", metavar=('[Ratios]'), type=float, nargs='+')
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue