improved SSE streamiing

This commit is contained in:
Concedo 2023-09-28 17:33:34 +08:00
parent 38d4c6cedd
commit 6a821b268a
2 changed files with 48 additions and 27 deletions

View file

@ -2959,7 +2959,7 @@ Current version: 71
instruct_starttag: "\\n### Instruction:\\n", instruct_starttag: "\\n### Instruction:\\n",
instruct_endtag: "\\n### Response:\\n", instruct_endtag: "\\n### Response:\\n",
instruct_has_markdown: false, instruct_has_markdown: false,
raw_instruct_tags: false, placeholder_tags: true,
persist_session: true, persist_session: true,
speech_synth: 0, //0 is disabled speech_synth: 0, //0 is disabled
beep_on: false, beep_on: false,
@ -5144,8 +5144,11 @@ Current version: 71
{ {
desired_oai_ep = desired_oai_ep.slice(0, -1); desired_oai_ep = desired_oai_ep.slice(0, -1);
} }
if(desired_oai_ep!="" && desired_oai_ep.length > 4 && !desired_oai_ep.slice(-4).toLowerCase().includes("/v")) { if (document.getElementById("oaiaddversion").checked)
desired_oai_ep = desired_oai_ep + "/v1"; {
if(desired_oai_ep!="" && desired_oai_ep.length > 4 && !desired_oai_ep.slice(-4).toLowerCase().includes("/v")) {
desired_oai_ep = desired_oai_ep + "/v1";
}
} }
if(desired_oai_key!="" && desired_oai_ep!="") if(desired_oai_key!="" && desired_oai_ep!="")
{ {
@ -5288,8 +5291,11 @@ Current version: 71
{ {
desired_claude_ep = desired_claude_ep.slice(0, -1); desired_claude_ep = desired_claude_ep.slice(0, -1);
} }
if(desired_claude_ep!="" && desired_claude_ep.length > 4 && !desired_claude_ep.slice(-4).toLowerCase().includes("/v")) { if (document.getElementById("claudeaddversion").checked)
desired_claude_ep = desired_claude_ep + "/v1"; {
if (desired_claude_ep != "" && desired_claude_ep.length > 4 && !desired_claude_ep.slice(-4).toLowerCase().includes("/v")) {
desired_claude_ep = desired_claude_ep + "/v1";
}
} }
if(desired_claude_key!="" && desired_claude_ep!="") if(desired_claude_key!="" && desired_claude_ep!="")
{ {
@ -5815,7 +5821,7 @@ Current version: 71
document.getElementById("idle_duration").value = localsettings.idle_duration; document.getElementById("idle_duration").value = localsettings.idle_duration;
document.getElementById("adventure_context_mod").checked = localsettings.adventure_context_mod; document.getElementById("adventure_context_mod").checked = localsettings.adventure_context_mod;
document.getElementById("instruct_has_markdown").checked = localsettings.instruct_has_markdown; document.getElementById("instruct_has_markdown").checked = localsettings.instruct_has_markdown;
document.getElementById("raw_instruct_tags").checked = localsettings.raw_instruct_tags; document.getElementById("placeholder_tags").checked = localsettings.placeholder_tags;
document.getElementById("auto_ctxlen").checked = localsettings.auto_ctxlen; document.getElementById("auto_ctxlen").checked = localsettings.auto_ctxlen;
document.getElementById("auto_genamt").checked = localsettings.auto_genamt; document.getElementById("auto_genamt").checked = localsettings.auto_genamt;
if(localflag) if(localflag)
@ -5994,7 +6000,7 @@ Current version: 71
localsettings.idle_duration = document.getElementById("idle_duration").value; localsettings.idle_duration = document.getElementById("idle_duration").value;
localsettings.adventure_context_mod = (document.getElementById("adventure_context_mod").checked ? true : false); localsettings.adventure_context_mod = (document.getElementById("adventure_context_mod").checked ? true : false);
localsettings.instruct_has_markdown = (document.getElementById("instruct_has_markdown").checked ? true : false); localsettings.instruct_has_markdown = (document.getElementById("instruct_has_markdown").checked ? true : false);
localsettings.raw_instruct_tags = (document.getElementById("raw_instruct_tags").checked ? true : false); localsettings.placeholder_tags = (document.getElementById("placeholder_tags").checked ? true : false);
localsettings.generate_images = document.getElementById("generate_images").value; localsettings.generate_images = document.getElementById("generate_images").value;
localsettings.opmode = document.getElementById("opmode").value; localsettings.opmode = document.getElementById("opmode").value;
localsettings.chatname = document.getElementById("chatname").value; localsettings.chatname = document.getElementById("chatname").value;
@ -6382,7 +6388,7 @@ Current version: 71
function replace_placeholders(inputtxt) function replace_placeholders(inputtxt)
{ {
//only do this for chat and instruct modes //only do this for chat and instruct modes
if(localsettings.opmode==3||localsettings.opmode==4) if(localsettings.placeholder_tags)
{ {
inputtxt = replaceAll(inputtxt,"{{user}}",localsettings.chatname?localsettings.chatname:"You",true); inputtxt = replaceAll(inputtxt,"{{user}}",localsettings.chatname?localsettings.chatname:"You",true);
inputtxt = replaceAll(inputtxt,"{{char}}",localsettings.chatopponent?localsettings.chatopponent:defaultchatopponent,true); inputtxt = replaceAll(inputtxt,"{{char}}",localsettings.chatopponent?localsettings.chatopponent:defaultchatopponent,true);
@ -6583,7 +6589,7 @@ Current version: 71
if(newgen != "") if(newgen != "")
{ {
//append instruction for instruct mode //append instruction for instruct mode
if (localsettings.raw_instruct_tags) { if (!localsettings.placeholder_tags) {
newgen = get_instruct_starttag(false) + newgen + get_instruct_endtag(false); newgen = get_instruct_starttag(false) + newgen + get_instruct_endtag(false);
} }
else { else {
@ -6593,7 +6599,7 @@ Current version: 71
else //may be continuting existing instruction OR starting a brand new session. check if first action else //may be continuting existing instruction OR starting a brand new session. check if first action
{ {
if (gametext_arr.length == 0) { if (gametext_arr.length == 0) {
if (localsettings.raw_instruct_tags) { if (!localsettings.placeholder_tags) {
newgen = get_instruct_endtag(false); //bot response as first msg newgen = get_instruct_endtag(false); //bot response as first msg
} else { } else {
newgen = instructendplaceholder; newgen = instructendplaceholder;
@ -7560,12 +7566,6 @@ Current version: 71
//handle stopping tokens if they got missed (eg. horde) //handle stopping tokens if they got missed (eg. horde)
gentxt = trim_extra_stop_seqs(gentxt,true); gentxt = trim_extra_stop_seqs(gentxt,true);
//trim trailing whitespace, and multiple newlines
if (localsettings.trimwhitespace) {
gentxt = gentxt.replace(/[\t\r\n ]+$/, '');
gentxt = gentxt.replace(/[\r\n]+/g, '\n');
}
//always trim incomplete sentences for adventure and chat (if not multiline) //always trim incomplete sentences for adventure and chat (if not multiline)
if (localsettings.opmode == 2 || (localsettings.opmode == 3 && !localsettings.allow_continue_chat) || localsettings.trimsentences == true) { if (localsettings.opmode == 2 || (localsettings.opmode == 3 && !localsettings.allow_continue_chat) || localsettings.trimsentences == true) {
gentxt = end_trim_to_sentence(gentxt,true); gentxt = end_trim_to_sentence(gentxt,true);
@ -7574,6 +7574,12 @@ Current version: 71
//do a second pass, this time removing the actual stop token //do a second pass, this time removing the actual stop token
gentxt = trim_extra_stop_seqs(gentxt,false); gentxt = trim_extra_stop_seqs(gentxt,false);
//trim trailing whitespace, and multiple newlines
if (localsettings.trimwhitespace) {
gentxt = gentxt.replace(/[\t\r\n ]+$/, '');
gentxt = gentxt.replace(/[\r\n]+/g, '\n');
}
//if we are in adventure mode, truncate to action if it appears //if we are in adventure mode, truncate to action if it appears
if (localsettings.opmode == 2) if (localsettings.opmode == 2)
{ {
@ -9753,6 +9759,8 @@ Current version: 71
<option value="gpt-4">gpt-4</option> <option value="gpt-4">gpt-4</option>
<option value="gpt-4-32k">gpt-4-32k</option> <option value="gpt-4-32k">gpt-4-32k</option>
</select> </select>
<input type="checkbox" id="oaiaddversion" onchange="" checked>
<div class="box-label" title="Add endpoint version">Add Endpoint Version</div>
<input type="checkbox" id="jailbreakprompt" onchange="togglejailbreak()"> <input type="checkbox" id="jailbreakprompt" onchange="togglejailbreak()">
<div class="box-label" title="Adds extra text to improve AI response">Improve Prompt (System Message Injection)</div> <div class="box-label" title="Adds extra text to improve AI response">Improve Prompt (System Message Injection)</div>
<input class="form-control hidden" type="text" id="jailbreakprompttext" placeholder="(Enter System Message)" <input class="form-control hidden" type="text" id="jailbreakprompttext" placeholder="(Enter System Message)"
@ -9784,6 +9792,8 @@ Current version: 71
<option value="claude-2">claude-2</option> <option value="claude-2">claude-2</option>
<option value="claude-2.0">claude-2.0</option> <option value="claude-2.0">claude-2.0</option>
</select> </select>
<input type="checkbox" id="claudeaddversion" onchange="" checked>
<div class="box-label" title="Add endpoint version">Add Endpoint Version</div>
<input type="checkbox" id="clauderenamecompat" onchange=""> <input type="checkbox" id="clauderenamecompat" onchange="">
<div class="box-label" title="Rename User and Bot tags to work with claude, force inject them otherwise">Claude Compatibility Rename Fix</div> <div class="box-label" title="Rename User and Bot tags to work with claude, force inject them otherwise">Claude Compatibility Rename Fix</div>
</div> </div>
@ -10032,9 +10042,7 @@ Current version: 71
<td><input class="settinglabel miniinput" type="text" placeholder="\\n### Response:\\n" value="" id="instruct_endtag" onchange="edit_instruct_tag_format()" title="The sequence to end an instruction prompt"></td> <td><input class="settinglabel miniinput" type="text" placeholder="\\n### Response:\\n" value="" id="instruct_endtag" onchange="edit_instruct_tag_format()" title="The sequence to end an instruction prompt"></td>
</tr> </tr>
</table> </table>
<div class="justifyleft settingsmall">Raw Instruct Tags <span class="helpicon">?<span
class="helptext">If enabled, uses instruct tags verbatim. If disabled, uses universal instruct placeholders that get swapped on submit.</span></span> </div>
<input type="checkbox" id="raw_instruct_tags" style="margin:0px 0 0;">
<div class="justifyleft settingsmall">Enable Markdown <span class="helpicon">?<span <div class="justifyleft settingsmall">Enable Markdown <span class="helpicon">?<span
class="helptext">Allows the UI to use markdown formatting such as quotes and code blocks.</span></span> </div> class="helptext">Allows the UI to use markdown formatting such as quotes and code blocks.</span></span> </div>
<input type="checkbox" id="instruct_has_markdown" style="margin:0px 0 0;"> <input type="checkbox" id="instruct_has_markdown" style="margin:0px 0 0;">
@ -10213,6 +10221,11 @@ Current version: 71
<div class="justifyleft settingsmall" title="Allow the EOS token and others to be used">Unban EOS Tokens </div> <div class="justifyleft settingsmall" title="Allow the EOS token and others to be used">Unban EOS Tokens </div>
<input type="checkbox" id="unban_tokens" style="margin:0px 0 0;"> <input type="checkbox" id="unban_tokens" style="margin:0px 0 0;">
</div> </div>
<div class="settinglabel">
<div class="justifyleft settingsmall">Placeholder Tags <span class="helpicon">?<span
class="helptext">If enabled, uses universal {{user}} and {{[INPUT]}} placeholders that get swapped on submit. If disabled, uses plaintext chat or instruct tags verbatim.</span></span></div>
<input type="checkbox" id="placeholder_tags" style="margin:0px 0 0;">
</div>
</div> </div>

View file

@ -461,7 +461,9 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
current_token = 0 current_token = 0
incomplete_token_buffer = bytearray() incomplete_token_buffer = bytearray()
while not handle.has_finished(): while True:
streamDone = handle.has_finished() #exit next loop on done
tokenStr = ""
streamcount = handle.get_stream_count() streamcount = handle.get_stream_count()
while current_token < streamcount: while current_token < streamcount:
token = handle.new_token(current_token) token = handle.new_token(current_token)
@ -470,17 +472,23 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
break break
current_token += 1 current_token += 1
newbyte = ctypes.string_at(token) newbyte = ctypes.string_at(token)
incomplete_token_buffer += bytearray(newbyte) incomplete_token_buffer += bytearray(newbyte)
tokenStr = incomplete_token_buffer.decode("UTF-8","ignore") tokenSeg = incomplete_token_buffer.decode("UTF-8","ignore")
if tokenStr!="": if tokenSeg!="":
incomplete_token_buffer.clear() incomplete_token_buffer.clear()
event_data = {"token": tokenStr} tokenStr += tokenSeg
event_str = json.dumps(event_data)
await self.send_sse_event("message", event_str)
await asyncio.sleep(0.02) #this should keep things responsive if tokenStr!="":
event_data = {"token": tokenStr}
event_str = json.dumps(event_data)
tokenStr = ""
await self.send_sse_event("message", event_str)
else:
await asyncio.sleep(0.02) #this should keep things responsive
if streamDone:
break
# flush buffers, sleep a bit to make sure all data sent, and then force close the connection # flush buffers, sleep a bit to make sure all data sent, and then force close the connection
self.wfile.flush() self.wfile.flush()