From 191de1e8a339df41007849a912df24d23c7e4c24 Mon Sep 17 00:00:00 2001
From: Concedo <39025047+LostRuins@users.noreply.github.com>
Date: Sat, 30 Sep 2023 19:35:03 +0800
Subject: [PATCH] allow launching with kcpps files

---
 koboldcpp.py | 47 ++++++++++++++++++++++++++++-------------------
 1 file changed, 28 insertions(+), 19 deletions(-)

diff --git a/koboldcpp.py b/koboldcpp.py
index 797414c7b..a650811eb 100755
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -279,7 +279,7 @@ def load_model(model_filename):
     return ret
 
 def generate(prompt,max_length=20, max_context_length=512, temperature=0.8, top_k=120, top_a=0.0, top_p=0.85, typical_p=1.0, tfs=1.0, rep_pen=1.1, rep_pen_range=128, mirostat=0, mirostat_tau=5.0, mirostat_eta=0.1, sampler_order=[6,0,1,3,4,2,5], seed=-1, stop_sequence=[], use_default_badwordsids=True, stream_sse=False, grammar='', genkey=''):
-    global maxctx, args, currentusergenkey
+    global maxctx, args, currentusergenkey, totalgens
     inputs = generation_inputs()
     outputs = ctypes.create_unicode_buffer(ctypes.sizeof(generation_outputs))
     inputs.prompt = prompt.encode("UTF-8")
@@ -330,6 +330,7 @@ def generate(prompt,max_length=20, max_context_length=512, temperature=0.8, top_
         else:
             inputs.stop_sequence[n] = stop_sequence[n].encode("UTF-8")
     currentusergenkey = genkey
+    totalgens += 1
     ret = handle.generate(inputs,outputs)
     if(ret.status==1):
         return ret.text.decode("UTF-8","ignore")
@@ -366,6 +367,7 @@ showdebug = True
 showsamplerwarning = True
 showmaxctxwarning = True
 exitcounter = 0
+totalgens = 0
 currentusergenkey = "" #store a special key so polled streaming works even in multiuser
 args = None #global args
 
@@ -515,7 +517,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
 
 
     def do_GET(self):
-        global maxctx, maxhordelen, friendlymodelname, KcppVersion, streamLock
+        global maxctx, maxhordelen, friendlymodelname, KcppVersion, totalgens
         self.path = self.path.rstrip('/')
         response_body = None
 
@@ -570,7 +572,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
 
         elif self.path.endswith('/api/extra/generate/check'):
             pendtxtStr = ""
-            if requestsinqueue==0:
+            if requestsinqueue==0 and totalgens>0:
                 pendtxt = handle.get_pending_output()
                 pendtxtStr = ctypes.string_at(pendtxt).decode("UTF-8","ignore")
             response_body = (json.dumps({"results": [{"text": pendtxtStr}]}).encode())
@@ -595,7 +597,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
         return
 
     def do_POST(self):
-        global modelbusy, requestsinqueue, currentusergenkey
+        global modelbusy, requestsinqueue, currentusergenkey, totalgens
         content_length = int(self.headers['Content-Length'])
         body = self.rfile.read(content_length)
         self.path = self.path.rstrip('/')
@@ -637,9 +639,10 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
                 multiuserkey = ""
                 pass
 
-            if (multiuserkey!="" and multiuserkey==currentusergenkey) or requestsinqueue==0:
-                pendtxt = handle.get_pending_output()
-                pendtxtStr = ctypes.string_at(pendtxt).decode("UTF-8","ignore")
+            if totalgens>0:
+                if (multiuserkey!="" and multiuserkey==currentusergenkey) or requestsinqueue==0:
+                    pendtxt = handle.get_pending_output()
+                    pendtxtStr = ctypes.string_at(pendtxt).decode("UTF-8","ignore")
             self.send_response(200)
             self.end_headers()
             self.wfile.write(json.dumps({"results": [{"text": pendtxtStr}]}).encode())
@@ -790,10 +793,13 @@ def show_new_gui():
         import tkinter as tk
         root = tk.Tk() #we dont want the useless window to be visible, but we want it in taskbar
         root.attributes("-alpha", 0)
-        args.model_param = askopenfilename(title="Select ggml model .bin or .gguf files")
+        args.model_param = askopenfilename(title="Select ggml model .bin or .gguf file or .kcpps config")
         root.destroy()
+        if args.model_param and args.model_param!="" and args.model_param.lower().endswith('.kcpps'):
+            print("\nLoading configuration...")
+            loadconfigfile(args.model_param)
         if not args.model_param:
-            print("\nNo ggml model file was selected. Exiting.")
+            print("\nNo ggml model or kcpps file was selected. Exiting.")
             time.sleep(3)
             sys.exit(2)
         return
@@ -1180,7 +1186,7 @@ def show_new_gui():
     # launch
     def guilaunch():
         if model_var.get() == "":
-            tmp = askopenfilename(title="Select ggml model .bin or .gguf files")
+            tmp = askopenfilename(title="Select ggml model .bin or .gguf file")
             model_var.set(tmp)
         nonlocal nextstate
         nextstate = 1
@@ -1556,7 +1562,7 @@ def show_old_gui():
 
         root = tk.Tk()
         root.attributes("-alpha", 0)
-        args.model_param = askopenfilename(title="Select ggml model .bin or .gguf files")
+        args.model_param = askopenfilename(title="Select ggml model .bin or .gguf file")
         root.destroy()
         if not args.model_param:
             print("\nNo ggml model file was selected. Exiting.")
@@ -1566,7 +1572,7 @@ def show_old_gui():
     else:
         root = tk.Tk() #we dont want the useless window to be visible, but we want it in taskbar
         root.attributes("-alpha", 0)
-        args.model_param = askopenfilename(title="Select ggml model .bin or .gguf files")
+        args.model_param = askopenfilename(title="Select ggml model .bin or .gguf file")
         root.destroy()
         if not args.model_param:
             print("\nNo ggml model file was selected. Exiting.")
@@ -1785,16 +1791,19 @@ def unload_libs():
         del handle
         handle = None
 
+def loadconfigfile(filename):
+    with open(filename, 'r') as f:
+        config = json.load(f)
+        for key, value in config.items():
+            setattr(args, key, value)
+
 def main(launch_args,start_server=True):
     global args
     args = launch_args
     embedded_kailite = None
-    if args.config:
-        if isinstance(args.config, str) and os.path.exists(args.config):
-            with open(args.config, 'r') as f:
-                config = json.load(f)
-            for key, value in config.items():
-                setattr(args, key, value)
+    if args.config and len(args.config)==1:
+        if isinstance(args.config[0], str) and os.path.exists(args.config[0]):
+           loadconfigfile(args.config[0])
         else:
             print("Specified kcpp config file invalid or not found.")
             time.sleep(3)
@@ -1955,7 +1964,7 @@ if __name__ == '__main__':
     parser.add_argument("--host", help="Host IP to listen on. If empty, all routable interfaces are accepted.", default="")
     parser.add_argument("--launch", help="Launches a web browser when load is completed.", action='store_true')
     parser.add_argument("--lora", help="LLAMA models only, applies a lora file on top of model. Experimental.", metavar=('[lora_filename]', '[lora_base]'), nargs='+')
-    parser.add_argument("--config", help="Load settings from a .kcpps file. Other arguments will be ignored", type=str, nargs='?')
+    parser.add_argument("--config", help="Load settings from a .kcpps file. Other arguments will be ignored", type=str, nargs=1)
     physical_core_limit = 1
     if os.cpu_count()!=None and os.cpu_count()>1:
         physical_core_limit = int(os.cpu_count()/2)