wip on embedded horde worker
This commit is contained in:
parent
06c08576f7
commit
75064b4ada
1 changed files with 115 additions and 6 deletions
121
koboldcpp.py
121
koboldcpp.py
|
@ -286,6 +286,7 @@ defaultport = 5001
|
|||
KcppVersion = "1.36"
|
||||
showdebug = True
|
||||
showsamplerwarning = True
|
||||
exited = False
|
||||
|
||||
class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||
sys_version = ""
|
||||
|
@ -581,6 +582,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
|||
|
||||
|
||||
def RunServerMultiThreaded(addr, port, embedded_kailite = None):
|
||||
global exited
|
||||
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
||||
sock.bind((addr, port))
|
||||
|
@ -601,12 +603,15 @@ def RunServerMultiThreaded(addr, port, embedded_kailite = None):
|
|||
self.httpd.server_bind = self.server_close = lambda self: None
|
||||
self.httpd.serve_forever()
|
||||
except (KeyboardInterrupt,SystemExit):
|
||||
exited = True
|
||||
self.httpd.server_close()
|
||||
sys.exit(0)
|
||||
finally:
|
||||
exited = True
|
||||
self.httpd.server_close()
|
||||
sys.exit(0)
|
||||
def stop(self):
|
||||
exited = True
|
||||
self.httpd.server_close()
|
||||
|
||||
numThreads = 6
|
||||
|
@ -617,6 +622,7 @@ def RunServerMultiThreaded(addr, port, embedded_kailite = None):
|
|||
try:
|
||||
time.sleep(10)
|
||||
except KeyboardInterrupt:
|
||||
exited = True
|
||||
for i in range(numThreads):
|
||||
threadArr[i].stop()
|
||||
sys.exit(0)
|
||||
|
@ -777,6 +783,8 @@ def show_new_gui():
|
|||
horde_name_var = ctk.StringVar(value="koboldcpp")
|
||||
horde_gen_var = ctk.StringVar(value=maxhordelen)
|
||||
horde_context_var = ctk.StringVar(value=maxhordectx)
|
||||
horde_apikey_var = ctk.StringVar(value="")
|
||||
horde_workername_var = ctk.StringVar(value="")
|
||||
usehorde_var = ctk.IntVar()
|
||||
|
||||
# Quick Launch Tab
|
||||
|
@ -938,13 +946,15 @@ def show_new_gui():
|
|||
# horde
|
||||
makelabel(network_tab, "Horde:", 3).grid(pady=10)
|
||||
|
||||
horde_name_entry, horde_name_label = makelabelentry(network_tab, "Horde Name:", horde_name_var, 5, 200)
|
||||
horde_name_entry, horde_name_label = makelabelentry(network_tab, "Horde Model Name:", horde_name_var, 5, 180)
|
||||
horde_gen_entry, horde_gen_label = makelabelentry(network_tab, "Gen. Length:", horde_gen_var, 6, 50)
|
||||
horde_context_entry, horde_context_label = makelabelentry(network_tab, "Max Context:",horde_context_var, 7, 50)
|
||||
horde_apikey_entry, horde_apikey_label = makelabelentry(network_tab, "API Key (If Embedded Worker):",horde_apikey_var, 8, 180)
|
||||
horde_workername_entry, horde_workername_label = makelabelentry(network_tab, "Horde Worker Name:",horde_workername_var, 9, 180)
|
||||
|
||||
def togglehorde(a,b,c):
|
||||
labels = [horde_name_label, horde_gen_label, horde_context_label]
|
||||
for idx, item in enumerate([horde_name_entry, horde_gen_entry, horde_context_entry]):
|
||||
labels = [horde_name_label, horde_gen_label, horde_context_label, horde_apikey_label, horde_workername_label]
|
||||
for idx, item in enumerate([horde_name_entry, horde_gen_entry, horde_context_entry, horde_apikey_entry, horde_workername_entry]):
|
||||
if usehorde_var.get() == 1:
|
||||
item.grid(row=5 + idx, column = 1, padx=8, pady=1, stick="nw")
|
||||
labels[idx].grid(row=5 + idx, padx=8, pady=1, stick="nw")
|
||||
|
@ -1022,7 +1032,10 @@ def show_new_gui():
|
|||
args.port_param = defaultport if port_var.get()=="" else int(port_var.get())
|
||||
args.host = host_var.get()
|
||||
|
||||
args.hordeconfig = None if usehorde_var.get() == 0 else [horde_name_var.get(), horde_gen_var.get(), horde_context_var.get()]
|
||||
if horde_apikey_var.get()=="" or horde_workername_var.get()=="":
|
||||
args.hordeconfig = None if usehorde_var.get() == 0 else [horde_name_var.get(), horde_gen_var.get(), horde_context_var.get()]
|
||||
else:
|
||||
args.hordeconfig = None if usehorde_var.get() == 0 else [horde_name_var.get(), horde_gen_var.get(), horde_context_var.get(), horde_apikey_var.get(), horde_workername_var.get()]
|
||||
|
||||
def import_vars(dict):
|
||||
threads_var.set(dict["threads"])
|
||||
|
@ -1106,6 +1119,9 @@ def show_new_gui():
|
|||
horde_name_var.set(dict["hordeconfig"][0])
|
||||
horde_gen_var.set(dict["hordeconfig"][1])
|
||||
horde_context_var.set(dict["hordeconfig"][2])
|
||||
if len(dict["hordeconfig"]) > 4:
|
||||
horde_apikey_var.set(dict["hordeconfig"][3])
|
||||
horde_workername_var.set(dict["hordeconfig"][4])
|
||||
|
||||
def save_config():
|
||||
file_type = [("KoboldCpp Settings", "*.kcpps")]
|
||||
|
@ -1316,8 +1332,96 @@ def show_old_gui():
|
|||
time.sleep(2)
|
||||
sys.exit(2)
|
||||
|
||||
def main(args):
|
||||
#A very simple and stripped down embedded horde worker with no dependencies
|
||||
def run_horde_worker(args, api_key, worker_name):
|
||||
import urllib.request
|
||||
global friendlymodelname, maxhordectx, maxhordelen, exited
|
||||
|
||||
def make_url_request(url, data, method='POST'):
|
||||
try:
|
||||
request = None
|
||||
headers = {"apikey": api_key,'User-Agent':'KoboldCpp Embedded Worker v1'}
|
||||
if method=='POST':
|
||||
json_payload = json.dumps(data).encode('utf-8')
|
||||
request = urllib.request.Request(url, data=json_payload, headers=headers, method=method)
|
||||
request.add_header('Content-Type', 'application/json')
|
||||
else:
|
||||
request = urllib.request.Request(url, headers=headers, method=method)
|
||||
response_data = ""
|
||||
with urllib.request.urlopen(request) as response:
|
||||
response_data = response.read().decode('utf-8')
|
||||
json_response = json.loads(response_data)
|
||||
return json_response
|
||||
except urllib.error.HTTPError as e:
|
||||
try:
|
||||
errmsg = e.read().decode('utf-8')
|
||||
print(f"Error: {e} - {errmsg}, Make sure your Horde API key and worker name is valid.")
|
||||
except Exception as e:
|
||||
print(f"Error: {e}, Make sure your Horde API key and worker name is valid.")
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"Error: {e} - {response_data}, Make sure your Horde API key and worker name is valid.")
|
||||
return None
|
||||
|
||||
current_id = None
|
||||
current_payload = None
|
||||
print("Embedded Horde Worker '"+worker_name+"' Starting...")
|
||||
BRIDGE_AGENT = f"KoboldCppEmbedWorker:1:https://github.com/LostRuins/koboldcpp"
|
||||
cluster = "https://aihorde.net"
|
||||
while not exited:
|
||||
time.sleep(2)
|
||||
readygo = make_url_request(f'http://localhost:{args.port}/api/v1/info/version', None,'GET')
|
||||
if readygo:
|
||||
print("Embedded Horde Worker is started.")
|
||||
break
|
||||
|
||||
while not exited:
|
||||
#pop new request
|
||||
gen_dict = {
|
||||
"name": worker_name,
|
||||
"models": [friendlymodelname],
|
||||
"max_length": maxhordelen,
|
||||
"max_context_length": maxhordectx,
|
||||
"priority_usernames": [],
|
||||
"softprompts": [],
|
||||
"bridge_agent": BRIDGE_AGENT,
|
||||
}
|
||||
pop = make_url_request(f'{cluster}/api/v2/generate/text/pop',gen_dict)
|
||||
if not pop:
|
||||
print(f"Failed to fetch job from {cluster}. Waiting 5 seconds...")
|
||||
time.sleep(5)
|
||||
continue
|
||||
if not pop["id"]:
|
||||
print(f"Server {cluster} has no valid generations to do for us.")
|
||||
time.sleep(3)
|
||||
continue
|
||||
current_id = pop['id']
|
||||
current_payload = pop['payload']
|
||||
print(f"Job received from {cluster} for {current_payload.get('max_length',80)} tokens and {current_payload.get('max_context_length',1024)} max context. Starting generation...")
|
||||
|
||||
#do gen
|
||||
while not exited:
|
||||
current_generation = make_url_request(f'http://localhost:{args.port}/api/v1/generate', current_payload)
|
||||
if current_generation:
|
||||
break
|
||||
print("Server Busy - Not ready to generate...")
|
||||
time.sleep(5)
|
||||
|
||||
#submit reply
|
||||
submit_dict = {
|
||||
"id": current_id,
|
||||
"generation": current_generation,
|
||||
}
|
||||
reply = make_url_request(cluster + '/api/v2/generate/text/submit', submit_dict)
|
||||
if not reply:
|
||||
print("Error: Job submit failed.")
|
||||
else:
|
||||
print(f'Submitted generation to {cluster} with id {current_id} and contributed for {reply["reward"]}')
|
||||
current_id = None
|
||||
current_payload = None
|
||||
time.sleep(5)
|
||||
|
||||
def main(args):
|
||||
embedded_kailite = None
|
||||
if not args.model_param:
|
||||
args.model_param = args.model
|
||||
|
@ -1441,6 +1545,11 @@ def main(args):
|
|||
wb.open(epurl)
|
||||
except:
|
||||
print("--launch was set, but could not launch web browser automatically.")
|
||||
|
||||
if len(args.hordeconfig)>4:
|
||||
horde_thread = threading.Thread(target=run_horde_worker,args=(args,args.hordeconfig[3],args.hordeconfig[4]))
|
||||
horde_thread.start()
|
||||
|
||||
print(f"Please connect to custom endpoint at {epurl}")
|
||||
asyncio.run(RunServerMultiThreaded(args.host, args.port, embedded_kailite))
|
||||
|
||||
|
@ -1479,7 +1588,7 @@ if __name__ == '__main__':
|
|||
parser.add_argument("--noavx2", help="Do not use AVX2 instructions, a slower compatibility mode for older devices. Does not work with --clblast.", action='store_true')
|
||||
parser.add_argument("--debugmode", help="Shows additional debug info in the terminal.", action='store_const', const=1, default=0)
|
||||
parser.add_argument("--skiplauncher", help="Doesn't display or use the new GUI launcher.", action='store_true')
|
||||
parser.add_argument("--hordeconfig", help="Sets the display model name to something else, for easy use on AI Horde. Optional additional parameters set the horde max genlength and max ctxlen.",metavar=('[hordename]', '[hordelength] [hordectx]'), nargs='+')
|
||||
parser.add_argument("--hordeconfig", help="Sets the display model name to something else, for easy use on AI Horde. Optional additional parameters set the horde max genlength, max ctxlen, API key and worker name.",metavar=('[hordemodelname]', '[hordelength] [hordemaxctx] [hordeapikey] [hordeworkername]'), nargs='+')
|
||||
compatgroup = parser.add_mutually_exclusive_group()
|
||||
compatgroup.add_argument("--noblas", help="Do not use OpenBLAS for accelerated prompt ingestion", action='store_true')
|
||||
compatgroup.add_argument("--useclblast", help="Use CLBlast for GPU Acceleration. Must specify exactly 2 arguments, platform ID and device ID (e.g. --useclblast 1 0).", type=int, choices=range(0,9), nargs=2)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue