improved embedded horde worker (+2 squashed commit)

Squashed commit:

[99234379] improved embedded horde worker

[ebcd1968] update lite
This commit is contained in:
Concedo 2023-09-24 01:20:09 +08:00
parent 32cf02487e
commit 8ecf505d5d
2 changed files with 59 additions and 30 deletions

View file

@ -6,7 +6,7 @@ It requires no dependencies, installation or setup.
Just copy this single static HTML file anywhere and open it in a browser, or from a webserver. Just copy this single static HTML file anywhere and open it in a browser, or from a webserver.
Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite. Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite.
Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp. Please do not remove this line. Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp. Please do not remove this line.
Current version: 68 Current version: 69
-Concedo -Concedo
--> -->
@ -2940,7 +2940,7 @@ Current version: 68
last_selected_preset: 0, last_selected_preset: 0,
gui_type_chat: 1, //0=standard, 1=messenger, 2=aesthetic gui_type_chat: 1, //0=standard, 1=messenger, 2=aesthetic
gui_type_instruct: 0, //0=standard, 1=messenger, 2=aesthetic gui_type_instruct: 0, //0=standard, 1=messenger, 2=aesthetic
multiline_replies: false, multiline_replies: true,
allow_continue_chat: false, allow_continue_chat: false,
idle_responses: 0, idle_responses: 0,
idle_duration: 60, idle_duration: 60,
@ -4229,8 +4229,8 @@ Current version: 68
let cdef = data.definition?data.definition.replace("END_OF_DIALOG","").trim():""; let cdef = data.definition?data.definition.replace("END_OF_DIALOG","").trim():"";
let cdesc = data.description?data.description:""; let cdesc = data.description?data.description:"";
let greeting = data.greeting?data.greeting:""; let greeting = data.greeting?data.greeting:"";
let previewtxt = replaceAll(cdesc,"{{char}}",botname); let previewtxt = replaceAll(cdesc,"{{char}}",botname,true);
previewtxt = replaceAll(previewtxt,"{{user}}","You"); previewtxt = replaceAll(previewtxt,"{{user}}","You",true);
temp_scenario = temp_scenario =
{ {
"title":data.title?data.title:"", "title":data.title?data.title:"",
@ -5983,6 +5983,10 @@ Current version: 68
document.getElementById('instruct_starttag').value = "[INST] "; document.getElementById('instruct_starttag').value = "[INST] ";
document.getElementById('instruct_endtag').value = " [/INST]"; document.getElementById('instruct_endtag').value = " [/INST]";
break; break;
case "5": //Q & A
document.getElementById('instruct_starttag').value = "\\nQuestion: ";
document.getElementById('instruct_endtag').value = "\\nAnswer: ";
break;
default: default:
break; break;
} }
@ -6268,8 +6272,8 @@ Current version: 68
//only do this for chat and instruct modes //only do this for chat and instruct modes
if(localsettings.opmode==3||localsettings.opmode==4) if(localsettings.opmode==3||localsettings.opmode==4)
{ {
inputtxt = replaceAll(inputtxt,"{{user}}",localsettings.chatname?localsettings.chatname:"You"); inputtxt = replaceAll(inputtxt,"{{user}}",localsettings.chatname?localsettings.chatname:"You",true);
inputtxt = replaceAll(inputtxt,"{{char}}",localsettings.chatopponent?localsettings.chatopponent:defaultchatopponent); inputtxt = replaceAll(inputtxt,"{{char}}",localsettings.chatopponent?localsettings.chatopponent:defaultchatopponent,true);
inputtxt = replaceAll(inputtxt,instructstartplaceholder,get_instruct_starttag(false)); inputtxt = replaceAll(inputtxt,instructstartplaceholder,get_instruct_starttag(false));
inputtxt = replaceAll(inputtxt,instructendplaceholder,get_instruct_endtag(false)); inputtxt = replaceAll(inputtxt,instructendplaceholder,get_instruct_endtag(false));
//failsafe to handle removing newline tags //failsafe to handle removing newline tags
@ -6616,7 +6620,9 @@ Current version: 68
{ {
let recenttext = gametext_arr[gametext_arr.length-1].toLowerCase(); let recenttext = gametext_arr[gametext_arr.length-1].toLowerCase();
let spokennames = coarr.filter(x=>(recenttext.includes(x.toLowerCase()))); let spokennames = coarr.filter(x=>(recenttext.includes(x.toLowerCase())));
if(spokennames.length>0) let selfname = localsettings.chatname + "\: ";
let wasself = (recenttext.includes(selfname.toLowerCase()));
if(wasself && spokennames.length>0)
{ {
co = spokennames[Math.floor(Math.random()*spokennames.length)]; co = spokennames[Math.floor(Math.random()*spokennames.length)];
} }
@ -8848,15 +8854,15 @@ Current version: 68
this.bubbleColor_you = '#29343a'; this.bubbleColor_you = '#29343a';
this.bubbleColor_AI = 'rgba(20, 20, 40, 1)'; this.bubbleColor_AI = 'rgba(20, 20, 40, 1)';
this.background_margin = [10, 10, 5, 0]; this.background_margin = [5, 5, 5, 0];
this.background_padding = [25, 25, 10, 10]; this.background_padding = [15, 15, 10, 10];
this.background_minHeight = 100; this.background_minHeight = 80;
this.centerHorizontally = false; this.centerHorizontally = false;
this.border_style = 'Rounded'; this.border_style = 'Rounded';
this.portrait_width_AI = 100; this.portrait_width_AI = 80;
this.portrait_ratio_AI = 1.0; this.portrait_ratio_AI = 1.0;
this.portrait_width_you = 100; this.portrait_width_you = 80;
this.portrait_ratio_you = 1.0; this.portrait_ratio_you = 1.0;
this.show_chat_names = true; this.show_chat_names = true;
@ -9174,7 +9180,9 @@ Current version: 68
let newbodystr = noSystemPrompt ? input : style('sys') + input; // First, create the string we'll transform. Style system bubble if we should. let newbodystr = noSystemPrompt ? input : style('sys') + input; // First, create the string we'll transform. Style system bubble if we should.
if (newbodystr.endsWith(bot)) { newbodystr = newbodystr.slice(0, -bot.length); } // Remove the last chat bubble if prompt ends with `end_sequence`. if (newbodystr.endsWith(bot)) { newbodystr = newbodystr.slice(0, -bot.length); } // Remove the last chat bubble if prompt ends with `end_sequence`.
newbodystr = transformInputToAestheticStyle(newbodystr); // Transform input to aesthetic style, reduce any unnecessary spaces or newlines, and trim empty replies if they exist. newbodystr = transformInputToAestheticStyle(newbodystr); // Transform input to aesthetic style, reduce any unnecessary spaces or newlines, and trim empty replies if they exist.
if (synchro_pending_stream != "") { newbodystr += getStreamingText(); } // Add the pending stream if it's needed. This will add any streamed text to a new bubble for the AI. if (synchro_pending_stream != "") {
newbodystr += getStreamingText();
} // Add the pending stream if it's needed. This will add any streamed text to a new bubble for the AI.
newbodystr += contextDict.closeTag + '</p></div></div>'; // Lastly, append the closing div so our body's raw form is completed. newbodystr += contextDict.closeTag + '</p></div></div>'; // Lastly, append the closing div so our body's raw form is completed.
if (aestheticInstructUISettings.use_markdown) { // If markdown is enabled, style the content of each bubble as well. if (aestheticInstructUISettings.use_markdown) { // If markdown is enabled, style the content of each bubble as well.
let internalHTMLparts = []; // We'll cache the embedded HTML parts here to keep them intact. let internalHTMLparts = []; // We'll cache the embedded HTML parts here to keep them intact.
@ -9219,9 +9227,19 @@ Current version: 68
function transformInputToAestheticStyle(bodyStr) { // Trim unnecessary empty space and new lines, and append * or " to each bubble if start/end sequence ends with * or ", to preserve styling. function transformInputToAestheticStyle(bodyStr) { // Trim unnecessary empty space and new lines, and append * or " to each bubble if start/end sequence ends with * or ", to preserve styling.
bodyStr = bodyStr.replaceAll(you + '\n', you).replaceAll(you + ' ', you).replaceAll(you, style('you') + `${you.endsWith('*') ? '*' : ''}` + `${you.endsWith('"') ? '"' : ''}`); bodyStr = bodyStr.replaceAll(you + '\n', you).replaceAll(you + ' ', you).replaceAll(you, style('you') + `${you.endsWith('*') ? '*' : ''}` + `${you.endsWith('"') ? '"' : ''}`);
bodyStr = bodyStr.replaceAll(bot + '\n', bot).replaceAll(bot + ' ', bot).replaceAll(bot, style('AI') + `${bot.endsWith('*') ? '*' : ''}` + `${bot.endsWith('"') ? '"' : ''}`); bodyStr = bodyStr.replaceAll(bot + '\n', bot).replaceAll(bot + ' ', bot).replaceAll(bot, style('AI') + `${bot.endsWith('*') ? '*' : ''}` + `${bot.endsWith('"') ? '"' : ''}`);
if(gametext_arr.length==0)
{
return bodyStr; //to allow html in the welcome text
}
else
{
return bodyStr.replaceAll('"', '&quot;'); return bodyStr.replaceAll('"', '&quot;');
} }
function getStreamingText() { return `${input.endsWith(bot) ? style('AI') + `${bot.endsWith('*') ? '*' : ''}` + `${bot.endsWith('"') ? '"' : ''}` : ''}` + escapeHtml(pending_context_preinjection) + escapeHtml(synchro_pending_stream); } }
function getStreamingText() {
let isChatBotReply = (localsettings.opmode==3 && pending_context_preinjection.startsWith("\n") && pending_context_preinjection.endsWith(":"));
return `${(input.endsWith(bot) || isChatBotReply) ? style('AI') + `${bot.endsWith('*') ? '*' : ''}` + `${bot.endsWith('"') ? '"' : ''}` : ''}` + escapeHtml(pending_context_preinjection) + escapeHtml(synchro_pending_stream);
}
} }
function updateTextPreview() { function updateTextPreview() {
@ -9813,6 +9831,7 @@ Current version: 68
<option value="2">Vicuna</option> <option value="2">Vicuna</option>
<option value="3">Metharme</option> <option value="3">Metharme</option>
<option value="4">Llama 2 Chat</option> <option value="4">Llama 2 Chat</option>
<option value="5">Q & A</option>
</select> </select>
<table class="settingsmall text-center" style="border-spacing: 4px 2px; border-collapse: separate;"> <table class="settingsmall text-center" style="border-spacing: 4px 2px; border-collapse: separate;">
<tr> <tr>

View file

@ -1542,11 +1542,16 @@ def show_old_gui():
#A very simple and stripped down embedded horde worker with no dependencies #A very simple and stripped down embedded horde worker with no dependencies
def run_horde_worker(args, api_key, worker_name): def run_horde_worker(args, api_key, worker_name):
import urllib.request import urllib.request
from datetime import datetime
global friendlymodelname, maxhordectx, maxhordelen, exitcounter, modelbusy global friendlymodelname, maxhordectx, maxhordelen, exitcounter, modelbusy
epurl = f"http://localhost:{args.port}" epurl = f"http://localhost:{args.port}"
if args.host!="": if args.host!="":
epurl = f"http://{args.host}:{args.port}" epurl = f"http://{args.host}:{args.port}"
def print_with_time(txt):
print(f"{datetime.now().strftime('[%H:%M:%S]')} " + txt)
def make_url_request(url, data, method='POST'): def make_url_request(url, data, method='POST'):
try: try:
request = None request = None
@ -1565,12 +1570,12 @@ def run_horde_worker(args, api_key, worker_name):
except urllib.error.HTTPError as e: except urllib.error.HTTPError as e:
try: try:
errmsg = e.read().decode('utf-8') errmsg = e.read().decode('utf-8')
print(f"Error: {e} - {errmsg}, Make sure your Horde API key and worker name is valid.") print_with_time(f"Error: {e} - {errmsg}, Make sure your Horde API key and worker name is valid.")
except Exception as e: except Exception as e:
print(f"Error: {e}, Make sure your Horde API key and worker name is valid.") print_with_time(f"Error: {e}, Make sure your Horde API key and worker name is valid.")
return None return None
except Exception as e: except Exception as e:
print(f"Error: {e} - {response_data}, Make sure your Horde API key and worker name is valid.") print_with_time(f"Error: {e} - {response_data}, Make sure your Horde API key and worker name is valid.")
return None return None
current_id = None current_id = None
@ -1584,7 +1589,7 @@ def run_horde_worker(args, api_key, worker_name):
time.sleep(3) time.sleep(3)
readygo = make_url_request(f'{epurl}/api/v1/info/version', None,'GET') readygo = make_url_request(f'{epurl}/api/v1/info/version', None,'GET')
if readygo: if readygo:
print("Embedded Horde Worker is started.") print_with_time(f"Embedded Horde Worker is started.")
break break
while exitcounter < 10: while exitcounter < 10:
@ -1593,7 +1598,7 @@ def run_horde_worker(args, api_key, worker_name):
#first, make sure we are not generating #first, make sure we are not generating
if modelbusy.locked(): if modelbusy.locked():
time.sleep(0.5) time.sleep(0.3)
continue continue
#pop new request #pop new request
@ -1609,20 +1614,23 @@ def run_horde_worker(args, api_key, worker_name):
pop = make_url_request(f'{cluster}/api/v2/generate/text/pop',gen_dict) pop = make_url_request(f'{cluster}/api/v2/generate/text/pop',gen_dict)
if not pop: if not pop:
exitcounter += 1 exitcounter += 1
print(f"Failed to fetch job from {cluster}. Waiting 5 seconds...") print_with_time(f"Failed to fetch job from {cluster}. Waiting 5 seconds...")
time.sleep(5) time.sleep(5)
continue continue
if not pop["id"]: if not pop["id"]:
slp = (2 if sleepy_counter<10 else (3 if sleepy_counter<20 else 4)) slp = (1 if sleepy_counter<10 else (2 if sleepy_counter<25 else 3))
#print(f"Server {cluster} has no valid generations for us. Sleep for {slp}s") #print(f"Server {cluster} has no valid generations for us. Sleep for {slp}s")
time.sleep(slp) time.sleep(slp)
sleepy_counter += 1 sleepy_counter += 1
if sleepy_counter==20:
print_with_time(f"No recent jobs, entering low power mode...")
continue continue
sleepy_counter = 0 sleepy_counter = 0
current_id = pop['id'] current_id = pop['id']
current_payload = pop['payload'] current_payload = pop['payload']
print(f"\nJob received from {cluster} for {current_payload.get('max_length',80)} tokens and {current_payload.get('max_context_length',1024)} max context. Starting generation...") print(f"") #empty newline
print_with_time(f"Job received from {cluster} for {current_payload.get('max_length',80)} tokens and {current_payload.get('max_context_length',1024)} max context. Starting generation...")
#do gen #do gen
while exitcounter < 10: while exitcounter < 10:
@ -1634,10 +1642,11 @@ def run_horde_worker(args, api_key, worker_name):
currentjob_attempts += 1 currentjob_attempts += 1
if currentjob_attempts>5: if currentjob_attempts>5:
break break
print("Server Busy - Not ready to generate...") print_with_time("Server Busy - Not ready to generate...")
time.sleep(5) time.sleep(5)
#submit reply #submit reply
print(f"") #empty newline
if current_generation: if current_generation:
submit_dict = { submit_dict = {
"id": current_id, "id": current_id,
@ -1647,19 +1656,20 @@ def run_horde_worker(args, api_key, worker_name):
reply = make_url_request(cluster + '/api/v2/generate/text/submit', submit_dict) reply = make_url_request(cluster + '/api/v2/generate/text/submit', submit_dict)
if not reply: if not reply:
exitcounter += 1 exitcounter += 1
print("\nError: Job submit failed.") print_with_time("Error: Job submit failed.")
else: else:
print(f'\nSubmitted generation to {cluster} with id {current_id} and contributed for {reply["reward"]}') print_with_time(f'Submitted generation to {cluster} with id {current_id} and contributed for {reply["reward"]}')
else: else:
print("\nError: Abandoned current job due to errors. Getting new job.") print_with_time("Error: Abandoned current job due to errors. Getting new job.")
current_id = None current_id = None
current_payload = None current_payload = None
time.sleep(1) time.sleep(0.2)
if exitcounter<100: if exitcounter<100:
print("Horde Worker Shutdown - Too many errors.") print_with_time("Horde Worker Shutdown - Too many errors.")
time.sleep(3) time.sleep(3)
else: else:
print("Horde Worker Shutdown - Server Closing.") print_with_time("Horde Worker Shutdown - Server Closing.")
time.sleep(2) time.sleep(2)
sys.exit(2) sys.exit(2)