From 8ecf505d5de3262caeb2e000f862c30eb005763a Mon Sep 17 00:00:00 2001
From: Concedo <39025047+LostRuins@users.noreply.github.com>
Date: Sun, 24 Sep 2023 01:20:09 +0800
Subject: [PATCH] improved embedded horde worker (+2 squashed commit)

Squashed commit:

[99234379] improved embedded horde worker

[ebcd1968] update lite
---
 klite.embd   | 49 ++++++++++++++++++++++++++++++++++---------------
 koboldcpp.py | 40 +++++++++++++++++++++++++---------------
 2 files changed, 59 insertions(+), 30 deletions(-)

diff --git a/klite.embd b/klite.embd
index 1294ddf28..6cf222e3e 100644
--- a/klite.embd
+++ b/klite.embd
@@ -6,7 +6,7 @@ It requires no dependencies, installation or setup.
 Just copy this single static HTML file anywhere and open it in a browser, or from a webserver.
 Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite.
 Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp. Please do not remove this line.
-Current version: 68
+Current version: 69
 -Concedo
 -->
 
@@ -2940,7 +2940,7 @@ Current version: 68
 		last_selected_preset: 0,
 		gui_type_chat: 1, //0=standard, 1=messenger, 2=aesthetic
 		gui_type_instruct: 0, //0=standard, 1=messenger, 2=aesthetic
-		multiline_replies: false,
+		multiline_replies: true,
 		allow_continue_chat: false,
 		idle_responses: 0,
 		idle_duration: 60,
@@ -4229,8 +4229,8 @@ Current version: 68
 						let cdef = data.definition?data.definition.replace("END_OF_DIALOG","").trim():"";
 						let cdesc = data.description?data.description:"";
 						let greeting = data.greeting?data.greeting:"";
-						let previewtxt = replaceAll(cdesc,"{{char}}",botname);
-						previewtxt = replaceAll(previewtxt,"{{user}}","You");
+						let previewtxt = replaceAll(cdesc,"{{char}}",botname,true);
+						previewtxt = replaceAll(previewtxt,"{{user}}","You",true);
 						temp_scenario =
 						{
 							"title":data.title?data.title:"",
@@ -5983,6 +5983,10 @@ Current version: 68
 				document.getElementById('instruct_starttag').value = "[INST] ";
 				document.getElementById('instruct_endtag').value = " [/INST]";
 				break;
+			case "5": //Q & A
+				document.getElementById('instruct_starttag').value = "\\nQuestion: ";
+				document.getElementById('instruct_endtag').value = "\\nAnswer: ";
+				break;
 			default:
 				break;
 		}
@@ -6268,8 +6272,8 @@ Current version: 68
 		//only do this for chat and instruct modes
 		if(localsettings.opmode==3||localsettings.opmode==4)
 		{
-			inputtxt = replaceAll(inputtxt,"{{user}}",localsettings.chatname?localsettings.chatname:"You");
-			inputtxt = replaceAll(inputtxt,"{{char}}",localsettings.chatopponent?localsettings.chatopponent:defaultchatopponent);
+			inputtxt = replaceAll(inputtxt,"{{user}}",localsettings.chatname?localsettings.chatname:"You",true);
+			inputtxt = replaceAll(inputtxt,"{{char}}",localsettings.chatopponent?localsettings.chatopponent:defaultchatopponent,true);
 			inputtxt = replaceAll(inputtxt,instructstartplaceholder,get_instruct_starttag(false));
 			inputtxt = replaceAll(inputtxt,instructendplaceholder,get_instruct_endtag(false));
 			//failsafe to handle removing newline tags
@@ -6616,7 +6620,9 @@ Current version: 68
 					{
 						let recenttext = gametext_arr[gametext_arr.length-1].toLowerCase();
 						let spokennames = coarr.filter(x=>(recenttext.includes(x.toLowerCase())));
-						if(spokennames.length>0)
+						let selfname = localsettings.chatname + "\: ";
+						let wasself = (recenttext.includes(selfname.toLowerCase()));
+						if(wasself && spokennames.length>0)
 						{
 							co = spokennames[Math.floor(Math.random()*spokennames.length)];
 						}
@@ -8848,15 +8854,15 @@ Current version: 68
 			this.bubbleColor_you = '#29343a';
 			this.bubbleColor_AI = 'rgba(20, 20, 40, 1)';
 
-			this.background_margin = [10, 10, 5, 0];
-			this.background_padding = [25, 25, 10, 10];
-			this.background_minHeight = 100;
+			this.background_margin = [5, 5, 5, 0];
+			this.background_padding = [15, 15, 10, 10];
+			this.background_minHeight = 80;
 			this.centerHorizontally = false;
 
 			this.border_style = 'Rounded';
-			this.portrait_width_AI = 100;
+			this.portrait_width_AI = 80;
 			this.portrait_ratio_AI = 1.0;
-			this.portrait_width_you = 100;
+			this.portrait_width_you = 80;
 			this.portrait_ratio_you = 1.0;
 
 			this.show_chat_names = true;
@@ -9174,7 +9180,9 @@ Current version: 68
 		let newbodystr = noSystemPrompt ? input : style('sys') + input;					 // First, create the string we'll transform. Style system bubble if we should.
 		if (newbodystr.endsWith(bot)) { newbodystr = newbodystr.slice(0, -bot.length); } // Remove the last chat bubble if prompt ends with `end_sequence`.
 		newbodystr = transformInputToAestheticStyle(newbodystr); 						 // Transform input to aesthetic style, reduce any unnecessary spaces or newlines, and trim empty replies if they exist.
-		if (synchro_pending_stream != "") { newbodystr += getStreamingText(); } 		 // Add the pending stream if it's needed. This will add any streamed text to a new bubble for the AI.
+		if (synchro_pending_stream != "") {
+			newbodystr += getStreamingText();
+		} 		 // Add the pending stream if it's needed. This will add any streamed text to a new bubble for the AI.
 		newbodystr += contextDict.closeTag + '</p></div></div>';						 // Lastly, append the closing div so our body's raw form is completed.
 		if (aestheticInstructUISettings.use_markdown) {																// If markdown is enabled, style the content of each bubble as well.
 			let internalHTMLparts = []; // We'll cache the embedded HTML parts here to keep them intact.
@@ -9219,9 +9227,19 @@ Current version: 68
 		function transformInputToAestheticStyle(bodyStr) { // Trim unnecessary empty space and new lines, and append * or " to each bubble if start/end sequence ends with * or ", to preserve styling.
 			bodyStr = bodyStr.replaceAll(you + '\n', you).replaceAll(you + ' ', you).replaceAll(you, style('you') + `${you.endsWith('*') ? '*' : ''}` + `${you.endsWith('"') ? '"' : ''}`);
 			bodyStr = bodyStr.replaceAll(bot + '\n', bot).replaceAll(bot + ' ', bot).replaceAll(bot, style('AI') + `${bot.endsWith('*') ? '*' : ''}` + `${bot.endsWith('"') ? '"' : ''}`);
-			return bodyStr.replaceAll('"', '&quot;');
+			if(gametext_arr.length==0)
+			{
+				return bodyStr; //to allow html in the welcome text
+			}
+			else
+			{
+				return bodyStr.replaceAll('"', '&quot;');
+			}
+		}
+		function getStreamingText() {
+			let isChatBotReply = (localsettings.opmode==3 && pending_context_preinjection.startsWith("\n") && pending_context_preinjection.endsWith(":"));
+			return `${(input.endsWith(bot) || isChatBotReply) ? style('AI') + `${bot.endsWith('*') ? '*' : ''}` + `${bot.endsWith('"') ? '"' : ''}` : ''}` + escapeHtml(pending_context_preinjection) + escapeHtml(synchro_pending_stream);
 		}
-		function getStreamingText() { return `${input.endsWith(bot) ? style('AI') + `${bot.endsWith('*') ? '*' : ''}` + `${bot.endsWith('"') ? '"' : ''}` : ''}` + escapeHtml(pending_context_preinjection) + escapeHtml(synchro_pending_stream); }
 	}
 
 	function updateTextPreview() {
@@ -9813,6 +9831,7 @@ Current version: 68
 								<option value="2">Vicuna</option>
 								<option value="3">Metharme</option>
 								<option value="4">Llama 2 Chat</option>
+								<option value="5">Q & A</option>
 							</select>
 							<table class="settingsmall text-center" style="border-spacing: 4px 2px;	border-collapse: separate;">
 								<tr>
diff --git a/koboldcpp.py b/koboldcpp.py
index 4fb6d7410..e312612bd 100755
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -1542,11 +1542,16 @@ def show_old_gui():
 #A very simple and stripped down embedded horde worker with no dependencies
 def run_horde_worker(args, api_key, worker_name):
     import urllib.request
+    from datetime import datetime
     global friendlymodelname, maxhordectx, maxhordelen, exitcounter, modelbusy
     epurl = f"http://localhost:{args.port}"
     if args.host!="":
         epurl = f"http://{args.host}:{args.port}"
 
+    def print_with_time(txt):
+        print(f"{datetime.now().strftime('[%H:%M:%S]')} " + txt)
+
+
     def make_url_request(url, data, method='POST'):
         try:
             request = None
@@ -1565,12 +1570,12 @@ def run_horde_worker(args, api_key, worker_name):
         except urllib.error.HTTPError as e:
             try:
                 errmsg = e.read().decode('utf-8')
-                print(f"Error: {e} - {errmsg}, Make sure your Horde API key and worker name is valid.")
+                print_with_time(f"Error: {e} - {errmsg}, Make sure your Horde API key and worker name is valid.")
             except Exception as e:
-                print(f"Error: {e}, Make sure your Horde API key and worker name is valid.")
+                print_with_time(f"Error: {e}, Make sure your Horde API key and worker name is valid.")
             return None
         except Exception as e:
-            print(f"Error: {e} - {response_data}, Make sure your Horde API key and worker name is valid.")
+            print_with_time(f"Error: {e} - {response_data}, Make sure your Horde API key and worker name is valid.")
             return None
 
     current_id = None
@@ -1584,7 +1589,7 @@ def run_horde_worker(args, api_key, worker_name):
         time.sleep(3)
         readygo = make_url_request(f'{epurl}/api/v1/info/version', None,'GET')
         if readygo:
-            print("Embedded Horde Worker is started.")
+            print_with_time(f"Embedded Horde Worker is started.")
             break
 
     while exitcounter < 10:
@@ -1593,7 +1598,7 @@ def run_horde_worker(args, api_key, worker_name):
 
         #first, make sure we are not generating
         if modelbusy.locked():
-            time.sleep(0.5)
+            time.sleep(0.3)
             continue
 
         #pop new request
@@ -1609,20 +1614,23 @@ def run_horde_worker(args, api_key, worker_name):
         pop = make_url_request(f'{cluster}/api/v2/generate/text/pop',gen_dict)
         if not pop:
             exitcounter += 1
-            print(f"Failed to fetch job from {cluster}. Waiting 5 seconds...")
+            print_with_time(f"Failed to fetch job from {cluster}. Waiting 5 seconds...")
             time.sleep(5)
             continue
         if not pop["id"]:
-            slp = (2 if sleepy_counter<10 else (3 if sleepy_counter<20 else 4))
+            slp = (1 if sleepy_counter<10 else (2 if sleepy_counter<25 else 3))
             #print(f"Server {cluster} has no valid generations for us. Sleep for {slp}s")
             time.sleep(slp)
             sleepy_counter += 1
+            if sleepy_counter==20:
+                print_with_time(f"No recent jobs, entering low power mode...")
             continue
 
         sleepy_counter = 0
         current_id = pop['id']
         current_payload = pop['payload']
-        print(f"\nJob received from {cluster} for {current_payload.get('max_length',80)} tokens and {current_payload.get('max_context_length',1024)} max context. Starting generation...")
+        print(f"") #empty newline
+        print_with_time(f"Job received from {cluster} for {current_payload.get('max_length',80)} tokens and {current_payload.get('max_context_length',1024)} max context. Starting generation...")
 
         #do gen
         while exitcounter < 10:
@@ -1634,10 +1642,11 @@ def run_horde_worker(args, api_key, worker_name):
                     currentjob_attempts += 1
                     if currentjob_attempts>5:
                         break
-            print("Server Busy - Not ready to generate...")
+            print_with_time("Server Busy - Not ready to generate...")
             time.sleep(5)
 
         #submit reply
+        print(f"") #empty newline
         if current_generation:
             submit_dict = {
                 "id": current_id,
@@ -1647,19 +1656,20 @@ def run_horde_worker(args, api_key, worker_name):
             reply = make_url_request(cluster + '/api/v2/generate/text/submit', submit_dict)
             if not reply:
                 exitcounter += 1
-                print("\nError: Job submit failed.")
+                print_with_time("Error: Job submit failed.")
             else:
-                print(f'\nSubmitted generation to {cluster} with id {current_id} and contributed for {reply["reward"]}')
+                print_with_time(f'Submitted generation to {cluster} with id {current_id} and contributed for {reply["reward"]}')
         else:
-            print("\nError: Abandoned current job due to errors. Getting new job.")
+            print_with_time("Error: Abandoned current job due to errors. Getting new job.")
         current_id = None
         current_payload = None
-        time.sleep(1)
+        time.sleep(0.2)
+
     if exitcounter<100:
-        print("Horde Worker Shutdown - Too many errors.")
+        print_with_time("Horde Worker Shutdown - Too many errors.")
         time.sleep(3)
     else:
-        print("Horde Worker Shutdown - Server Closing.")
+        print_with_time("Horde Worker Shutdown - Server Closing.")
         time.sleep(2)
     sys.exit(2)