From 8ecf505d5de3262caeb2e000f862c30eb005763a Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sun, 24 Sep 2023 01:20:09 +0800 Subject: [PATCH] improved embedded horde worker (+2 squashed commit) Squashed commit: [99234379] improved embedded horde worker [ebcd1968] update lite --- klite.embd | 49 ++++++++++++++++++++++++++++++++++--------------- koboldcpp.py | 40 +++++++++++++++++++++++++--------------- 2 files changed, 59 insertions(+), 30 deletions(-) diff --git a/klite.embd b/klite.embd index 1294ddf28..6cf222e3e 100644 --- a/klite.embd +++ b/klite.embd @@ -6,7 +6,7 @@ It requires no dependencies, installation or setup. Just copy this single static HTML file anywhere and open it in a browser, or from a webserver. Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite. Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp. Please do not remove this line. -Current version: 68 +Current version: 69 -Concedo --> @@ -2940,7 +2940,7 @@ Current version: 68 last_selected_preset: 0, gui_type_chat: 1, //0=standard, 1=messenger, 2=aesthetic gui_type_instruct: 0, //0=standard, 1=messenger, 2=aesthetic - multiline_replies: false, + multiline_replies: true, allow_continue_chat: false, idle_responses: 0, idle_duration: 60, @@ -4229,8 +4229,8 @@ Current version: 68 let cdef = data.definition?data.definition.replace("END_OF_DIALOG","").trim():""; let cdesc = data.description?data.description:""; let greeting = data.greeting?data.greeting:""; - let previewtxt = replaceAll(cdesc,"{{char}}",botname); - previewtxt = replaceAll(previewtxt,"{{user}}","You"); + let previewtxt = replaceAll(cdesc,"{{char}}",botname,true); + previewtxt = replaceAll(previewtxt,"{{user}}","You",true); temp_scenario = { "title":data.title?data.title:"", @@ -5983,6 +5983,10 @@ Current version: 68 document.getElementById('instruct_starttag').value = "[INST] "; document.getElementById('instruct_endtag').value = " [/INST]"; break; + case "5": //Q & A + document.getElementById('instruct_starttag').value = "\\nQuestion: "; + document.getElementById('instruct_endtag').value = "\\nAnswer: "; + break; default: break; } @@ -6268,8 +6272,8 @@ Current version: 68 //only do this for chat and instruct modes if(localsettings.opmode==3||localsettings.opmode==4) { - inputtxt = replaceAll(inputtxt,"{{user}}",localsettings.chatname?localsettings.chatname:"You"); - inputtxt = replaceAll(inputtxt,"{{char}}",localsettings.chatopponent?localsettings.chatopponent:defaultchatopponent); + inputtxt = replaceAll(inputtxt,"{{user}}",localsettings.chatname?localsettings.chatname:"You",true); + inputtxt = replaceAll(inputtxt,"{{char}}",localsettings.chatopponent?localsettings.chatopponent:defaultchatopponent,true); inputtxt = replaceAll(inputtxt,instructstartplaceholder,get_instruct_starttag(false)); inputtxt = replaceAll(inputtxt,instructendplaceholder,get_instruct_endtag(false)); //failsafe to handle removing newline tags @@ -6616,7 +6620,9 @@ Current version: 68 { let recenttext = gametext_arr[gametext_arr.length-1].toLowerCase(); let spokennames = coarr.filter(x=>(recenttext.includes(x.toLowerCase()))); - if(spokennames.length>0) + let selfname = localsettings.chatname + "\: "; + let wasself = (recenttext.includes(selfname.toLowerCase())); + if(wasself && spokennames.length>0) { co = spokennames[Math.floor(Math.random()*spokennames.length)]; } @@ -8848,15 +8854,15 @@ Current version: 68 this.bubbleColor_you = '#29343a'; this.bubbleColor_AI = 'rgba(20, 20, 40, 1)'; - this.background_margin = [10, 10, 5, 0]; - this.background_padding = [25, 25, 10, 10]; - this.background_minHeight = 100; + this.background_margin = [5, 5, 5, 0]; + this.background_padding = [15, 15, 10, 10]; + this.background_minHeight = 80; this.centerHorizontally = false; this.border_style = 'Rounded'; - this.portrait_width_AI = 100; + this.portrait_width_AI = 80; this.portrait_ratio_AI = 1.0; - this.portrait_width_you = 100; + this.portrait_width_you = 80; this.portrait_ratio_you = 1.0; this.show_chat_names = true; @@ -9174,7 +9180,9 @@ Current version: 68 let newbodystr = noSystemPrompt ? input : style('sys') + input; // First, create the string we'll transform. Style system bubble if we should. if (newbodystr.endsWith(bot)) { newbodystr = newbodystr.slice(0, -bot.length); } // Remove the last chat bubble if prompt ends with `end_sequence`. newbodystr = transformInputToAestheticStyle(newbodystr); // Transform input to aesthetic style, reduce any unnecessary spaces or newlines, and trim empty replies if they exist. - if (synchro_pending_stream != "") { newbodystr += getStreamingText(); } // Add the pending stream if it's needed. This will add any streamed text to a new bubble for the AI. + if (synchro_pending_stream != "") { + newbodystr += getStreamingText(); + } // Add the pending stream if it's needed. This will add any streamed text to a new bubble for the AI. newbodystr += contextDict.closeTag + '

'; // Lastly, append the closing div so our body's raw form is completed. if (aestheticInstructUISettings.use_markdown) { // If markdown is enabled, style the content of each bubble as well. let internalHTMLparts = []; // We'll cache the embedded HTML parts here to keep them intact. @@ -9219,9 +9227,19 @@ Current version: 68 function transformInputToAestheticStyle(bodyStr) { // Trim unnecessary empty space and new lines, and append * or " to each bubble if start/end sequence ends with * or ", to preserve styling. bodyStr = bodyStr.replaceAll(you + '\n', you).replaceAll(you + ' ', you).replaceAll(you, style('you') + `${you.endsWith('*') ? '*' : ''}` + `${you.endsWith('"') ? '"' : ''}`); bodyStr = bodyStr.replaceAll(bot + '\n', bot).replaceAll(bot + ' ', bot).replaceAll(bot, style('AI') + `${bot.endsWith('*') ? '*' : ''}` + `${bot.endsWith('"') ? '"' : ''}`); - return bodyStr.replaceAll('"', '"'); + if(gametext_arr.length==0) + { + return bodyStr; //to allow html in the welcome text + } + else + { + return bodyStr.replaceAll('"', '"'); + } + } + function getStreamingText() { + let isChatBotReply = (localsettings.opmode==3 && pending_context_preinjection.startsWith("\n") && pending_context_preinjection.endsWith(":")); + return `${(input.endsWith(bot) || isChatBotReply) ? style('AI') + `${bot.endsWith('*') ? '*' : ''}` + `${bot.endsWith('"') ? '"' : ''}` : ''}` + escapeHtml(pending_context_preinjection) + escapeHtml(synchro_pending_stream); } - function getStreamingText() { return `${input.endsWith(bot) ? style('AI') + `${bot.endsWith('*') ? '*' : ''}` + `${bot.endsWith('"') ? '"' : ''}` : ''}` + escapeHtml(pending_context_preinjection) + escapeHtml(synchro_pending_stream); } } function updateTextPreview() { @@ -9813,6 +9831,7 @@ Current version: 68 + diff --git a/koboldcpp.py b/koboldcpp.py index 4fb6d7410..e312612bd 100755 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -1542,11 +1542,16 @@ def show_old_gui(): #A very simple and stripped down embedded horde worker with no dependencies def run_horde_worker(args, api_key, worker_name): import urllib.request + from datetime import datetime global friendlymodelname, maxhordectx, maxhordelen, exitcounter, modelbusy epurl = f"http://localhost:{args.port}" if args.host!="": epurl = f"http://{args.host}:{args.port}" + def print_with_time(txt): + print(f"{datetime.now().strftime('[%H:%M:%S]')} " + txt) + + def make_url_request(url, data, method='POST'): try: request = None @@ -1565,12 +1570,12 @@ def run_horde_worker(args, api_key, worker_name): except urllib.error.HTTPError as e: try: errmsg = e.read().decode('utf-8') - print(f"Error: {e} - {errmsg}, Make sure your Horde API key and worker name is valid.") + print_with_time(f"Error: {e} - {errmsg}, Make sure your Horde API key and worker name is valid.") except Exception as e: - print(f"Error: {e}, Make sure your Horde API key and worker name is valid.") + print_with_time(f"Error: {e}, Make sure your Horde API key and worker name is valid.") return None except Exception as e: - print(f"Error: {e} - {response_data}, Make sure your Horde API key and worker name is valid.") + print_with_time(f"Error: {e} - {response_data}, Make sure your Horde API key and worker name is valid.") return None current_id = None @@ -1584,7 +1589,7 @@ def run_horde_worker(args, api_key, worker_name): time.sleep(3) readygo = make_url_request(f'{epurl}/api/v1/info/version', None,'GET') if readygo: - print("Embedded Horde Worker is started.") + print_with_time(f"Embedded Horde Worker is started.") break while exitcounter < 10: @@ -1593,7 +1598,7 @@ def run_horde_worker(args, api_key, worker_name): #first, make sure we are not generating if modelbusy.locked(): - time.sleep(0.5) + time.sleep(0.3) continue #pop new request @@ -1609,20 +1614,23 @@ def run_horde_worker(args, api_key, worker_name): pop = make_url_request(f'{cluster}/api/v2/generate/text/pop',gen_dict) if not pop: exitcounter += 1 - print(f"Failed to fetch job from {cluster}. Waiting 5 seconds...") + print_with_time(f"Failed to fetch job from {cluster}. Waiting 5 seconds...") time.sleep(5) continue if not pop["id"]: - slp = (2 if sleepy_counter<10 else (3 if sleepy_counter<20 else 4)) + slp = (1 if sleepy_counter<10 else (2 if sleepy_counter<25 else 3)) #print(f"Server {cluster} has no valid generations for us. Sleep for {slp}s") time.sleep(slp) sleepy_counter += 1 + if sleepy_counter==20: + print_with_time(f"No recent jobs, entering low power mode...") continue sleepy_counter = 0 current_id = pop['id'] current_payload = pop['payload'] - print(f"\nJob received from {cluster} for {current_payload.get('max_length',80)} tokens and {current_payload.get('max_context_length',1024)} max context. Starting generation...") + print(f"") #empty newline + print_with_time(f"Job received from {cluster} for {current_payload.get('max_length',80)} tokens and {current_payload.get('max_context_length',1024)} max context. Starting generation...") #do gen while exitcounter < 10: @@ -1634,10 +1642,11 @@ def run_horde_worker(args, api_key, worker_name): currentjob_attempts += 1 if currentjob_attempts>5: break - print("Server Busy - Not ready to generate...") + print_with_time("Server Busy - Not ready to generate...") time.sleep(5) #submit reply + print(f"") #empty newline if current_generation: submit_dict = { "id": current_id, @@ -1647,19 +1656,20 @@ def run_horde_worker(args, api_key, worker_name): reply = make_url_request(cluster + '/api/v2/generate/text/submit', submit_dict) if not reply: exitcounter += 1 - print("\nError: Job submit failed.") + print_with_time("Error: Job submit failed.") else: - print(f'\nSubmitted generation to {cluster} with id {current_id} and contributed for {reply["reward"]}') + print_with_time(f'Submitted generation to {cluster} with id {current_id} and contributed for {reply["reward"]}') else: - print("\nError: Abandoned current job due to errors. Getting new job.") + print_with_time("Error: Abandoned current job due to errors. Getting new job.") current_id = None current_payload = None - time.sleep(1) + time.sleep(0.2) + if exitcounter<100: - print("Horde Worker Shutdown - Too many errors.") + print_with_time("Horde Worker Shutdown - Too many errors.") time.sleep(3) else: - print("Horde Worker Shutdown - Server Closing.") + print_with_time("Horde Worker Shutdown - Server Closing.") time.sleep(2) sys.exit(2)