From da8a09ba107678f1894db1fbffd6bd05e9aee585 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Thu, 5 Oct 2023 22:24:20 +0800 Subject: [PATCH] use filename as default model name --- README.md | 1 + koboldcpp.py | 30 +++++++++++++++++++++--------- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index a475853fc..8c8f0f872 100644 --- a/README.md +++ b/README.md @@ -77,6 +77,7 @@ For more information, be sure to run the program with the `--help` flag. ## Notes - Generation delay scales linearly with original prompt length. If OpenBLAS is enabled then prompt ingestion becomes about 2-3x faster. This is automatic on windows, but will require linking on OSX and Linux. CLBlast speeds this up even further, and `--gpulayers` + `--useclblast` or `--usecublas` more so. - I have heard of someone claiming a false AV positive report. The exe is a simple pyinstaller bundle that includes the necessary python scripts and dlls to run. If this still concerns you, you might wish to rebuild everything from source code using the makefile, and you can rebuild the exe yourself with pyinstaller by using `make_pyinstaller.bat` +- API documentation available at `/api` and https://lite.koboldai.net/koboldcpp_api - Supported GGML models (Includes backward compatibility for older versions/legacy GGML models, though some newer features might be unavailable): - LLAMA and LLAMA2 (LLaMA / Alpaca / GPT4All / Vicuna / Koala / Pygmalion 7B / Metharme 7B / WizardLM and many more) - GPT-2 / Cerebras diff --git a/koboldcpp.py b/koboldcpp.py index 9f0470ed2..aa7990aab 100755 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -662,6 +662,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler): if self.path.endswith('/api/extra/abort'): if requestsinqueue==0: ag = handle.abort_generate() + time.sleep(0.3) #short delay before replying self.send_response(200) self.end_headers() self.wfile.write(json.dumps({"success": ("true" if ag else "false")}).encode()) @@ -702,7 +703,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler): }}).encode()) return if reqblocking: - requestsinqueue = (requestsinqueue - 1) if requestsinqueue>0 else 0 + requestsinqueue = (requestsinqueue - 1) if requestsinqueue > 0 else 0 try: sse_stream_flag = False @@ -727,7 +728,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler): api_format = 4 force_json = True - if api_format>0: + if api_format > 0: genparams = None try: genparams = json.loads(body) @@ -755,7 +756,6 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler): self.wfile.write(json.dumps(gen).encode()) except: print("Generate: The response could not be sent, maybe connection was terminated?") - return finally: modelbusy.release() @@ -950,10 +950,10 @@ def show_new_gui(): return entry, label - def makefileentry(parent, text, searchtext, var, row=0, width=250): + def makefileentry(parent, text, searchtext, var, row=0, width=250, filetypes=[]): makelabel(parent, text, row) def getfilename(var, text): - var.set(askopenfilename(title=text)) + var.set(askopenfilename(title=text,filetypes=filetypes)) entry = ctk.CTkEntry(parent, width, textvariable=var) entry.grid(row=row+1, column=0, padx=8, stick="nw") button = ctk.CTkButton(parent, 50, text="Browse", command= lambda a=var,b=searchtext:getfilename(a,b)) @@ -1106,7 +1106,7 @@ def show_new_gui(): makeslider(quick_tab, "Context Size:", contextsize_text, context_var, 0, len(contextsize_text)-1, 30, set=2) # load model - makefileentry(quick_tab, "Model:", "Select GGML Model File", model_var, 40, 170) + makefileentry(quick_tab, "Model:", "Select GGML Model File", model_var, 40, 170,filetypes=[("GGML Model Files", "*.gguf;*.bin;*.ggml")]) # Hardware Tab hardware_tab = tabcontent["Hardware"] @@ -1173,7 +1173,7 @@ def show_new_gui(): # Model Tab model_tab = tabcontent["Model"] - makefileentry(model_tab, "Model:", "Select GGML Model File", model_var, 1) + makefileentry(model_tab, "Model:", "Select GGML Model File", model_var, 1, filetypes=[("GGML Model Files", "*.gguf;*.bin;*.ggml")]) makefileentry(model_tab, "Lora:", "Select Lora File",lora_var, 3) makefileentry(model_tab, "Lora Base:", "Select Lora Base File", lora_base_var, 5) @@ -1646,8 +1646,14 @@ def loadconfigfile(filename): for key, value in config.items(): setattr(args, key, value) +def sanitize_string(input_string): + # alphanumeric characters, dots, dashes, and underscores + import re + sanitized_string = re.sub( r'[^\w\d\.\-_]', '', input_string) + return sanitized_string + def main(launch_args,start_server=True): - global args + global args, friendlymodelname args = launch_args embedded_kailite = None embedded_kcpp_docs = None @@ -1678,8 +1684,14 @@ def main(launch_args,start_server=True): time.sleep(3) sys.exit(2) + # sanitize and replace the default vanity name. remember me.... + if args.model_param!="": + newmdldisplayname = os.path.basename(args.model_param) + newmdldisplayname = os.path.splitext(newmdldisplayname)[0] + friendlymodelname = "koboldcpp/" + sanitize_string(newmdldisplayname) + if args.hordeconfig and args.hordeconfig[0]!="": - global friendlymodelname, maxhordelen, maxhordectx, showdebug + global maxhordelen, maxhordectx, showdebug friendlymodelname = args.hordeconfig[0] if not friendlymodelname.startswith("koboldcpp/"): friendlymodelname = "koboldcpp/" + friendlymodelname