Add Multi-GPU CuBLAS support in the new GUI
This commit is contained in:
parent
ddaa4f2a26
commit
242f01e983
1 changed files with 20 additions and 6 deletions
26
koboldcpp.py
26
koboldcpp.py
|
@ -9,6 +9,7 @@ import ctypes
|
||||||
import os
|
import os
|
||||||
import argparse
|
import argparse
|
||||||
import json, sys, http.server, time, asyncio, socket, threading
|
import json, sys, http.server, time, asyncio, socket, threading
|
||||||
|
import re
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
|
||||||
stop_token_max = 10
|
stop_token_max = 10
|
||||||
|
@ -764,21 +765,27 @@ def show_new_gui():
|
||||||
quick_gpu_layers_entry,quick_gpu_layers_label = makelabelentry(quick_tab,"GPU Layers:", gpulayers_var, 4, 50)
|
quick_gpu_layers_entry,quick_gpu_layers_label = makelabelentry(quick_tab,"GPU Layers:", gpulayers_var, 4, 50)
|
||||||
quick_gpu_selector_label = makelabel(quick_tab, "GPU ID:", 3)
|
quick_gpu_selector_label = makelabel(quick_tab, "GPU ID:", 3)
|
||||||
quick_gpu_selector_box = ctk.CTkComboBox(quick_tab, values=["1","2","3"], width=60, variable=gpu_choice_var, state="readonly")
|
quick_gpu_selector_box = ctk.CTkComboBox(quick_tab, values=["1","2","3"], width=60, variable=gpu_choice_var, state="readonly")
|
||||||
|
CUDA_quick_gpu_selector_box = ctk.CTkComboBox(quick_tab, values=["1","2","3","All"], width=60, variable=gpu_choice_var, state="readonly")
|
||||||
quick_lowvram_box = makecheckbox(quick_tab, "Low VRAM", lowvram_var, 5)
|
quick_lowvram_box = makecheckbox(quick_tab, "Low VRAM", lowvram_var, 5)
|
||||||
|
|
||||||
# hides gpu options when CLBlast is not chosen
|
|
||||||
def changerunmode(a,b,c):
|
def changerunmode(a,b,c):
|
||||||
index = runopts_var.get()
|
index = runopts_var.get()
|
||||||
if index == "Use CLBlast" or index == "Use CuBLAS":
|
if index == "Use CLBlast" or index == "Use CuBLAS":
|
||||||
gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw")
|
gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw")
|
||||||
gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
|
|
||||||
quick_gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw")
|
quick_gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw")
|
||||||
quick_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
|
if index == "Use CLBlast":
|
||||||
|
gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
|
||||||
|
quick_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
|
||||||
|
elif index == "Use CuBLAS":
|
||||||
|
CUDA_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
|
||||||
|
CUDA_quick_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
|
||||||
else:
|
else:
|
||||||
gpu_selector_label.grid_forget()
|
gpu_selector_label.grid_forget()
|
||||||
gpu_selector_box.grid_forget()
|
gpu_selector_box.grid_forget()
|
||||||
|
CUDA_gpu_selector_box.grid_forget()
|
||||||
quick_gpu_selector_label.grid_forget()
|
quick_gpu_selector_label.grid_forget()
|
||||||
quick_gpu_selector_box.grid_forget()
|
quick_gpu_selector_box.grid_forget()
|
||||||
|
CUDA_quick_gpu_selector_box.grid_forget()
|
||||||
|
|
||||||
if index == "Use CuBLAS":
|
if index == "Use CuBLAS":
|
||||||
lowvram_box.grid(row=4, column=0, padx=8, pady=1, stick="nw")
|
lowvram_box.grid(row=4, column=0, padx=8, pady=1, stick="nw")
|
||||||
|
@ -829,6 +836,7 @@ def show_new_gui():
|
||||||
gpu_layers_entry,gpu_layers_label = makelabelentry(hardware_tab,"GPU Layers:", gpulayers_var, 4, 50)
|
gpu_layers_entry,gpu_layers_label = makelabelentry(hardware_tab,"GPU Layers:", gpulayers_var, 4, 50)
|
||||||
gpu_selector_label = makelabel(hardware_tab, "GPU ID:", 3)
|
gpu_selector_label = makelabel(hardware_tab, "GPU ID:", 3)
|
||||||
gpu_selector_box = ctk.CTkComboBox(hardware_tab, values=["1","2","3"], width=60, variable=gpu_choice_var, state="readonly")
|
gpu_selector_box = ctk.CTkComboBox(hardware_tab, values=["1","2","3"], width=60, variable=gpu_choice_var, state="readonly")
|
||||||
|
CUDA_gpu_selector_box = ctk.CTkComboBox(hardware_tab, values=["1","2","3", "All"], width=60, variable=gpu_choice_var, state="readonly")
|
||||||
lowvram_box = makecheckbox(hardware_tab, "Low VRAM", lowvram_var, 5)
|
lowvram_box = makecheckbox(hardware_tab, "Low VRAM", lowvram_var, 5)
|
||||||
|
|
||||||
# presets selector
|
# presets selector
|
||||||
|
@ -958,12 +966,18 @@ def show_new_gui():
|
||||||
args.stream = stream.get()==1
|
args.stream = stream.get()==1
|
||||||
args.smartcontext = smartcontext.get()==1
|
args.smartcontext = smartcontext.get()==1
|
||||||
args.unbantokens = unbantokens.get()==1
|
args.unbantokens = unbantokens.get()==1
|
||||||
|
gpu_choice_str = gpu_choice_var.get()
|
||||||
|
if gpu_choice_str.isdigit():
|
||||||
|
gpuchoiceidx = int(gpu_choice_var.get())-1
|
||||||
|
|
||||||
gpuchoiceidx = int(gpu_choice_var.get())-1
|
|
||||||
if runopts_var.get() == runopts[1]:
|
if runopts_var.get() == runopts[1]:
|
||||||
args.useclblast = [[0,0], [1,0], [0,1]][gpuchoiceidx]
|
args.useclblast = [[0,0], [1,0], [0,1]][gpuchoiceidx]
|
||||||
if runopts_var.get() == runopts[2]:
|
if runopts_var.get() == runopts[2]:
|
||||||
args.usecublas = ["lowvram",str(gpuchoiceidx)] if lowvram_var.get() == 1 else ["normal",str(gpuchoiceidx)]
|
if gpu_choice_str.lower() == "all":
|
||||||
|
args.usecublas = ["lowvram"] if lowvram_var.get() == 1 else ["normal"]
|
||||||
|
else:
|
||||||
|
args.usecublas = ["lowvram",str(gpuchoiceidx)] if lowvram_var.get() == 1 else ["normal",str(gpuchoiceidx)]
|
||||||
|
|
||||||
if gpulayers_var.get():
|
if gpulayers_var.get():
|
||||||
args.gpulayers = int(gpulayers_var.get())
|
args.gpulayers = int(gpulayers_var.get())
|
||||||
if runopts_var.get()==runopts[3]:
|
if runopts_var.get()==runopts[3]:
|
||||||
|
@ -1329,7 +1343,7 @@ if __name__ == '__main__':
|
||||||
compatgroup = parser.add_mutually_exclusive_group()
|
compatgroup = parser.add_mutually_exclusive_group()
|
||||||
compatgroup.add_argument("--noblas", help="Do not use OpenBLAS for accelerated prompt ingestion", action='store_true')
|
compatgroup.add_argument("--noblas", help="Do not use OpenBLAS for accelerated prompt ingestion", action='store_true')
|
||||||
compatgroup.add_argument("--useclblast", help="Use CLBlast for GPU Acceleration. Must specify exactly 2 arguments, platform ID and device ID (e.g. --useclblast 1 0).", type=int, choices=range(0,9), nargs=2)
|
compatgroup.add_argument("--useclblast", help="Use CLBlast for GPU Acceleration. Must specify exactly 2 arguments, platform ID and device ID (e.g. --useclblast 1 0).", type=int, choices=range(0,9), nargs=2)
|
||||||
compatgroup.add_argument("--usecublas", help="Use CuBLAS for GPU Acceleration. Requires Nvidia GPU. Select lowvram to not allocate VRAM scratch buffer. Enter a number after to select a different main GPU.", nargs='*',metavar=('[lowvram|normal] [main GPU ID]'), choices=['normal', 'lowvram', '0', '1', '2'])
|
compatgroup.add_argument("--usecublas", help="Use CuBLAS for GPU Acceleration. Requires CUDA. Select lowvram to not allocate VRAM scratch buffer. Enter a number afterwards to select and use 1 GPU. Leaving no number will use all GPUs.", nargs='*',metavar=('[lowvram|normal] [main GPU ID]'), choices=['normal', 'lowvram', '0', '1', '2'])
|
||||||
parser.add_argument("--gpulayers", help="Set number of layers to offload to GPU when using GPU. Requires GPU.",metavar=('[GPU layers]'), type=int, default=0)
|
parser.add_argument("--gpulayers", help="Set number of layers to offload to GPU when using GPU. Requires GPU.",metavar=('[GPU layers]'), type=int, default=0)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
main(args)
|
main(args)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue