deprecate some launcher arguments.

2023-10-01 22:30:48 +08:00 · 2023-10-01 22:30:48 +08:00 · dffc6bee74
commit dffc6bee74
parent b49a5bc546
2 changed files with 95 additions and 33 deletions
--- a/klite.embd
+++ b/klite.embd
@ -6,7 +6,7 @@ It requires no dependencies, installation or setup.
 Just copy this single static HTML file anywhere and open it in a browser, or from a webserver.
 Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite.
 Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp. Please do not remove this line.
-Current version: 73
+Current version: 74
 -Concedo
 -->

@ -2898,6 +2898,7 @@ Current version: 73
 	var horde_poll_nearly_completed = false; //if true, increase polling rate
 	var prev_hl_chunk = null; //will store the last highlighted element
 	var pending_context_preinjection = ""; //this will be injected before the AI's next RESPONSE
+	var last_reply_was_empty = false; //set to true if last reply is empty
 	var current_memory = ""; //stored memory
 	var current_anote = ""; //stored author note
 	var current_anotetemplate = "[Author\'s note: <|>]";
@ -2951,7 +2952,7 @@ Current version: 73
 		autoscroll: true, //automatically scroll to bottom on render
 		trimsentences: true, //trim to last punctuation
 		trimwhitespace: false, //trim trailing whitespace
-		unban_tokens: false, //allow the EOS token when using locally
+		eos_ban_mode: 0, //allow the EOS token when using locally 0=auto,1=unban,2=ban
 		opmode: 1, //what mode are we in? 1=story, 2=adventure, 3=chat, 4=instruct
 		adventure_is_action: false, //in adventure mode, determine story or action
 		adventure_context_mod: true, //extra injection for adventure mode
@ -2966,6 +2967,7 @@ Current version: 73
 		beep_on: false,
 		image_styles: "",
 		grammar:"",
+		tokenstreaming: (localflag?true:false),
 		generate_images: (localflag?"":"stable_diffusion"), //"" is disabled and "*" is all, anything else is the model name pulled from stable horde
 		img_autogen: false,
 		img_allownsfw: true,
@ -3105,12 +3107,6 @@ Current version: 73
 			}
 		}

-		const tokenstreaming = urlParams.get('streaming');
-		if(tokenstreaming)
-		{
-			document.getElementById("tokenstreaming").checked = true;
-		}
-
 		const fromfile = ( window.location.protocol == 'file:' );
 		if(!dbgmode && !fromfile){
 			if(!window.console) window.console = {};
@ -3145,6 +3141,12 @@ Current version: 73
 			console.log("Discarded invalid local save: " + e);
 		}

+		const tokenstreaming = urlParams.get('streaming');
+		if(tokenstreaming)
+		{
+			localsettings.tokenstreaming = true;
+		}
+
 		//toggle genimg btn
 		if (localsettings.generate_images) {
 			document.getElementById("btn_genimg").classList.remove("hidden");
@ -3483,7 +3485,7 @@ Current version: 73
 	//0 is none, 1 is pseudostreaming, 2 is true streaming
 	function determine_streaming_type()
 	{
-		let streamtype = (document.getElementById("tokenstreaming").checked ? 1 : 0);
+		let streamtype = (localsettings.tokenstreaming ? 1 : 0);
 		let pstreamamount = urlParams.get('streamamount');
 		if(streamtype==1 && is_using_kcpp_with_streaming() && (pstreamamount == null || pstreamamount <= 0))
 		{
@ -3497,6 +3499,27 @@ Current version: 73
 		return streamtype;
 	}

+	function determine_if_ban_eos(input_was_empty) {
+		if (localsettings.eos_ban_mode == 0) {
+			if (localsettings.opmode == 1) {
+				return true; //story mode always ban
+			}
+			else if (localsettings.opmode == 3 && !localsettings.allow_continue_chat) {
+				return false; //chat mode always unban unless cont allowed
+			}
+			else if (!input_was_empty) //if user input is not empty, ALWAYS unban EOS.
+			{
+				return false;
+			}
+			else {
+				return last_reply_was_empty;
+			}
+		}
+		else {
+			return (localsettings.eos_ban_mode == 2 ? true : false);
+		}
+	}
+
 	function is_using_web_lite()
 	{
 		return (window.location.hostname.includes("koboldai.net") || window.location.hostname.includes("kaihordewebui.github.io"));
@ -4703,6 +4726,15 @@ Current version: 73
 			}
 		}
 	}
+
+	function format_uptime(seconds)
+	{
+		const days = Math.floor(seconds / (3600 * 24));
+		const hours = Math.floor((seconds % (3600 * 24)) / 3600);
+		const minutes = Math.floor((seconds % 3600) / 60);
+		return days+"d "+hours+"h "+minutes+"m";
+	}
+
 	function show_workers() {
 		document.getElementById("workercontainer").classList.remove("hidden");

@ -4729,7 +4761,7 @@ Current version: 73
 				allmdls += escapeHtml(elem.models[n].substring(0, 32));
 			}

-			str += "<tr id='workertablerow_"+i+"'><td>" + workerNameHtml + "</td><td>" + allmdls + "</td><td>" + elem.max_length + " / " + elem.max_context_length + "<br>(" + tokenspersec + " T/s)</td><td "+brokenstyle+">" + elem.uptime + "<br>(" + elem.requests_fulfilled + " jobs)</td><td "+style+">" + elem.kudos_rewards.toFixed(0) + "</td><td>"+clustertag+"</td></tr>";
+			str += "<tr id='workertablerow_"+i+"'><td>" + workerNameHtml + "</td><td>" + allmdls + "</td><td>" + elem.max_length + " / " + elem.max_context_length + "<br>(" + tokenspersec + " T/s)</td><td "+brokenstyle+">" + format_uptime(elem.uptime) + "<br>(" + elem.requests_fulfilled + " jobs)</td><td "+style+">" + elem.kudos_rewards.toFixed(0) + "</td><td>"+clustertag+"</td></tr>";
 		}
 		document.getElementById("workertable").innerHTML = str;
 		document.getElementById("worktitlecount").innerText = "Worker List - Total " + worker_data_showonly.length;
@ -4757,7 +4789,7 @@ Current version: 73
 						let brokenstyle = (elem.maintenance_mode ? "style=\"color:#ee4444;\"" : "");
 						let workerNameHtml = escapeHtml(elem.name.substring(0, 32));
 						let eleminfo = ((elem.info && elem.info!="")?elem.info:"");
-						str += "<tr><td>" + workerNameHtml + "</td><td><input class='' style='color:#000000;' id='mwc_desc_"+i+"' placeholder='Worker Description' value='"+eleminfo+"''></td><td "+brokenstyle+">" + elem.uptime + "<br>(" + elem.requests_fulfilled + " jobs)</td><td "+style+">" + elem.kudos_rewards.toFixed(0) + "</td><td>"+(elem.online?"Online":"Offline")+"</td><td><input type='checkbox' id='mwc_maint_"+i+"' "+(elem.maintenance_mode?"checked":"")+"></td></tr>";
+						str += "<tr><td>" + workerNameHtml + "</td><td><input class='' style='color:#000000;' id='mwc_desc_"+i+"' placeholder='Worker Description' value='"+eleminfo+"''></td><td "+brokenstyle+">" + format_uptime(elem.uptime) + "<br>(" + elem.requests_fulfilled + " jobs)</td><td "+style+">" + elem.kudos_rewards.toFixed(0) + "</td><td>"+(elem.online?"Online":"Offline")+"</td><td><input type='checkbox' id='mwc_maint_"+i+"' "+(elem.maintenance_mode?"checked":"")+"></td></tr>";
 					}
 					document.getElementById("myownworkertable").innerHTML = str;

@ -5819,7 +5851,7 @@ Current version: 73
 		document.getElementById("invert_colors").checked = localsettings.invert_colors;
 		document.getElementById("trimsentences").checked = localsettings.trimsentences;
 		document.getElementById("trimwhitespace").checked = localsettings.trimwhitespace;
-		document.getElementById("unban_tokens").checked = localsettings.unban_tokens;
+		document.getElementById("eos_ban_mode").value = localsettings.eos_ban_mode;
 		document.getElementById("persist_session").checked = localsettings.persist_session;
 		document.getElementById("opmode").value = localsettings.opmode;
 		document.getElementById("chatname").value = localsettings.chatname;
@ -5912,6 +5944,7 @@ Current version: 73
 			sdmodelshtml += "<option value=\"" + stablemodels[i].name + " (" + stablemodels[i].count + ")\">";
 		}
 		document.getElementById("sdmodels").innerHTML = sdmodelshtml;
+		document.getElementById("tokenstreaming").checked = localsettings.tokenstreaming;
 		document.getElementById("img_autogen").checked = localsettings.img_autogen;
 		document.getElementById("save_images").checked = localsettings.save_images;
 		document.getElementById("prompt_for_savename").checked = localsettings.prompt_for_savename;
@ -6026,7 +6059,7 @@ Current version: 73
 		localsettings.invert_colors = (document.getElementById("invert_colors").checked ? true : false);
 		localsettings.trimsentences = (document.getElementById("trimsentences").checked ? true : false);
 		localsettings.trimwhitespace = (document.getElementById("trimwhitespace").checked ? true : false);
-		localsettings.unban_tokens = (document.getElementById("unban_tokens").checked ? true : false);
+		localsettings.eos_ban_mode = document.getElementById("eos_ban_mode").value;
 		localsettings.persist_session = (document.getElementById("persist_session").checked ? true : false);
 		if(document.getElementById("opmode").value==3)
 		{
@ -6073,6 +6106,7 @@ Current version: 73

 		localsettings.image_styles = pendingstyle;
 		localsettings.grammar = pendinggrammar;
+		localsettings.tokenstreaming = (document.getElementById("tokenstreaming").checked ? true : false);
 		localsettings.img_autogen = (document.getElementById("img_autogen").checked ? true : false);
 		localsettings.save_images = (document.getElementById("save_images").checked ? true : false);
 		localsettings.prompt_for_savename = (document.getElementById("prompt_for_savename").checked ? true : false);
@ -6317,7 +6351,7 @@ Current version: 73
 		//v2 api specific fields
 		submit_payload.workers = selected_workers.map((m) => { return m.id });

-		dispatch_submit_generation(submit_payload);
+		dispatch_submit_generation(submit_payload,false);
 		render_gametext();
 		document.getElementById("memorytext").value = "[<|Generating summary, do not close window...|>]"
 		};
@ -6403,6 +6437,7 @@ Current version: 73
 		synchro_polled_response = null;
 		synchro_pending_stream = "";
 		waiting_for_autosummary = false;
+		last_reply_was_empty = false;
 		current_memory = "";
 		current_anote = "";
 		current_wi = [];
@ -6640,9 +6675,10 @@ Current version: 73
 	function submit_generation() {

 		let newgen = document.getElementById("input_text").value;
+		const user_input_empty = (newgen.trim()=="");
 		let doNotGenerate = false;

-		if (newgen.trim() != "" || gametext_arr.length > 0 || current_memory != "" || current_anote != "")
+		if (!user_input_empty || gametext_arr.length > 0 || current_memory != "" || current_anote != "")
 		{
 			waiting_for_autosummary = false;
 			idle_timer = 0;
@ -7045,7 +7081,7 @@ Current version: 73

 			if (!doNotGenerate)
 			{
-				dispatch_submit_generation(submit_payload);
+				dispatch_submit_generation(submit_payload, user_input_empty);
 			}
 			else
 			{
@ -7056,7 +7092,7 @@ Current version: 73
 		}
 	}

-	function dispatch_submit_generation(submit_payload)
+	function dispatch_submit_generation(submit_payload, input_was_empty) //if input is not empty, always unban eos
 	{
 		console.log(submit_payload);
 		last_request_str = JSON.stringify(submit_payload);
@ -7132,7 +7168,7 @@ Current version: 73
 				//version 1.2.4 and later supports unban tokens
 				if (kobold_endpoint_version && kobold_endpoint_version != "" && compare_version_str(kobold_endpoint_version, "1.2.3") > 0)
 				{
-					submit_payload.use_default_badwordsids = (localsettings.unban_tokens?false:true);
+					submit_payload.use_default_badwordsids = determine_if_ban_eos(input_was_empty);
 				}

 				let pseudostreaming = (determine_streaming_type()==1);
@ -7411,7 +7447,7 @@ Current version: 73
 			}

 			//horde supports unban tokens
-			submit_payload.use_default_badwordsids = (localsettings.unban_tokens?false:true);
+			submit_payload.use_default_badwordsids = determine_if_ban_eos(input_was_empty);

 			fetch(selectedhorde.submit_endpoint, {
 				method: 'POST', // or 'PUT'
@ -7632,7 +7668,6 @@ Current version: 73
 	}

 	function handle_incoming_text(gentxt, genworker, genmdl, genkudos) {
-
 		//handle stopping tokens if they got missed (eg. horde)
 		gentxt = trim_extra_stop_seqs(gentxt,true);

@ -7984,6 +8019,7 @@ Current version: 73
 						pending_response_id = "";
 						poll_in_progress = false;
 						let resp = synchro_polled_response;
+						last_reply_was_empty = (resp=="" || resp.trim()=="");
 						if (resp != null && resp != "") {
 							let gentxt = resp;
 							let genworker = "Custom Endpoint";
@ -8051,6 +8087,7 @@ Current version: 73
 														handle_incoming_autosummary(gentxt);
 													}
 													else {
+														last_reply_was_empty = (gentxt=="" || gentxt.trim()=="");
 														handle_incoming_text(gentxt, genworker, genmdl, genkudos);
 													}
 												}
@ -8138,6 +8175,7 @@ Current version: 73
 			if (oldInnerText != edited) {
 				gametext_arr = [];
 				redo_arr = [];
+				last_reply_was_empty = false;
 				retry_prev_text = "";
 				redo_prev_text = "";

@ -8991,7 +9029,7 @@ Current version: 73

 		console.log("Clear story");
 		if (pending_response_id == "" && gametext_arr.length > 0) {
-
+			last_reply_was_empty = false;
 			while(gametext_arr.length > 0)
 			{
 				if(retry_prev_text!="")
@ -9017,6 +9055,7 @@ Current version: 73
 	}
 	function btn_back() {
 		if (pending_response_id == "" && gametext_arr.length > 0) {
+			last_reply_was_empty = false;
 			if(retry_prev_text!="")
 			{
 				redo_prev_text = gametext_arr.pop();
@ -9039,6 +9078,7 @@ Current version: 73

 		console.log("Redo All story");
 		if (pending_response_id == "" && redo_arr.length > 0) {
+			last_reply_was_empty = false;
 			while(redo_arr.length > 0)
 			{
 				retry_prev_text = "";
@ -9058,11 +9098,13 @@ Current version: 73
 	function btn_redo() {
 		if (pending_response_id == "") {
 			if (redo_arr.length > 0) {
+				last_reply_was_empty = false;
 				retry_prev_text = "";
 				let popped = redo_arr.pop();
 				gametext_arr.push(popped);
 				render_gametext();
 			}else if (redo_prev_text != "") {
+				last_reply_was_empty = false;
 				retry_prev_text = gametext_arr.pop();
 				gametext_arr.push(redo_prev_text);
 				redo_prev_text = "";
@ -9074,6 +9116,7 @@ Current version: 73
 	function btn_retry() {
 		if (pending_response_id == "" && (gametext_arr.length > 1 ||
 		(gametext_arr.length > 0 && (current_memory != "" || current_anote != "")))) {
+			last_reply_was_empty = false;
 			let boxtextstash = document.getElementById("input_text").value;
 			document.getElementById("input_text").value = "";
 			let temp = gametext_arr[gametext_arr.length-1];
@ -10105,7 +10148,7 @@ Current version: 73
 							  </table>

 							<div class="settinglabel">
-							<div class="justifyleft settingsmall" title="Whether to allow multiple lines in AI responses. Not recommended.">Multiline Replies </div>
+							<div class="justifyleft settingsmall" title="Whether to allow multiple lines in AI responses.">Multiline Replies </div>
 							<input type="checkbox" id="multiline_replies" style="margin:0px 0 0;">
 							</div>
 							<div class="settinglabel">
@ -10316,9 +10359,13 @@ Current version: 73
 						   <input type="checkbox" id="trimwhitespace" style="margin:0px 0px 0px auto;">
 						</div>
 						<div class="settinglabel">
-							<div class="justifyleft settingsmall" title="">Unban EOS Tokens <span class="helpicon">?<span
+							<div class="justifyleft settingsmall" title="">EOS Token Ban <span class="helpicon">?<span
 								class="helptext">Allow the End-Of-Stream (EOS) token and potentially other restricted special tokens to be generated.</span></span></div>
-						   <input type="checkbox" id="unban_tokens" style="margin:0px 0px 0px auto;">
+						   <select style="padding:1px; height:auto; width: 34px; appearance: none; font-size: 7pt; margin:0px 0px 0px auto;" class="form-control" id="eos_ban_mode">
+								<option value="0">Auto</option>
+								<option value="1">Unban</option>
+								<option value="2">Ban</option>
+							</select>
 						</div>
 						<div class="settinglabel">
 							<div class="justifyleft settingsmall">Placeholder Tags <span class="helpicon">?<span
--- a/koboldcpp.py
+++ b/koboldcpp.py
@ -1,9 +1,12 @@
 #!/usr/bin/env python3
 #-*- coding: utf-8 -*-

-# A hacky little script from Concedo that exposes llama.cpp function bindings
-# allowing it to be used via a simulated kobold api endpoint
-# generation delay scales linearly with original prompt length.
+# KoboldCpp is an easy-to-use AI text-generation software for GGML models.
+# It's a single self contained distributable from Concedo, that builds off llama.cpp,
+# and adds a versatile Kobold API endpoint, additional format support,
+# backward compatibility, as well as a fancy UI with persistent stories,
+# editing tools, save formats, memory, world info, author's note, characters,
+# scenarios and everything Kobold and Kobold Lite have to offer.

 import ctypes
 import os
@ -364,7 +367,7 @@ maxhordelen = 256
 modelbusy = threading.Lock()
 requestsinqueue = 0
 defaultport = 5001
-KcppVersion = "1.45"
+KcppVersion = "1.45.1"
 showdebug = True
 showsamplerwarning = True
 showmaxctxwarning = True
@ -1948,6 +1951,17 @@ def main(launch_args,start_server=True):
        timer_thread = threading.Timer(1, onready_subprocess) #1 second delay
        timer_thread.start()

+    # show deprecation warnings
+    if args.unbantokens:
+        print("WARNING: --unbantokens is DEPRECATED and will be removed soon! EOS unbans should now be set via the generate API.")
+    if args.usemirostat:
+        print("WARNING: --usemirostat is DEPRECATED and will be removed soon! Mirostat values should now be set via the generate API.")
+    if args.stream:
+        print("WARNING: --stream is DEPRECATED and will be removed soon! This was a Kobold Lite only parameter, which is now a proper setting toggle inside Lite.")
+    if args.psutil_set_threads:
+        print("WARNING: --psutil_set_threads is DEPRECATED and will be removed soon! This parameter was generally unhelpful and unnecessary, as the defaults were usually sufficient")
+
+
    if start_server:
        print(f"Please connect to custom endpoint at {epurl}")
        asyncio.run(RunServerMultiThreaded(args.host, args.port, embedded_kailite))
@ -1974,22 +1988,18 @@ if __name__ == '__main__':
    default_threads = (physical_core_limit if physical_core_limit<=3 else max(3,physical_core_limit-1))
    parser.add_argument("--threads", help="Use a custom number of threads if specified. Otherwise, uses an amount based on CPU cores", type=int, default=default_threads)
    parser.add_argument("--blasthreads", help="Use a different number of threads during BLAS if specified. Otherwise, has the same value as --threads",metavar=('[threads]'), type=int, default=0)
-    parser.add_argument("--psutil_set_threads", help="Experimental flag. If set, uses psutils to determine thread count based on physical cores.", action='store_true')
    parser.add_argument("--highpriority", help="Experimental flag. If set, increases the process CPU priority, potentially speeding up generation. Use caution.", action='store_true')
    parser.add_argument("--contextsize", help="Controls the memory allocated for maximum context size, only change if you need more RAM for big contexts. (default 2048)", type=int,choices=[512,1024,2048,3072,4096,6144,8192,12288,16384,24576,32768], default=2048)
    parser.add_argument("--blasbatchsize", help="Sets the batch size used in BLAS processing (default 512). Setting it to -1 disables BLAS mode, but keeps other benefits like GPU offload.", type=int,choices=[-1,32,64,128,256,512,1024,2048], default=512)
    parser.add_argument("--ropeconfig", help="If set, uses customized RoPE scaling from configured frequency scale and frequency base (e.g. --ropeconfig 0.25 10000). Otherwise, uses NTK-Aware scaling set automatically based on context size. For linear rope, simply set the freq-scale and ignore the freq-base",metavar=('[rope-freq-scale]', '[rope-freq-base]'), default=[0.0, 10000.0], type=float, nargs='+')
-    parser.add_argument("--stream", help="Uses streaming when generating tokens. Only for the Kobold Lite UI.", action='store_true')
    parser.add_argument("--smartcontext", help="Reserving a portion of context to try processing less frequently.", action='store_true')
-    parser.add_argument("--unbantokens", help="Normally, KoboldAI prevents the EOS token from being generated. This flag unbans it.", action='store_true')
    parser.add_argument("--bantokens", help="You can manually specify a list of token SUBSTRINGS that the AI cannot use. This bans ALL instances of that substring.", metavar=('[token_substrings]'), nargs='+')
-    parser.add_argument("--usemirostat", help="Experimental! Replaces your samplers with mirostat. Takes 3 params = [type(0/1/2), tau(5.0), eta(0.1)].",metavar=('[type]', '[tau]', '[eta]'), type=float, nargs=3)
    parser.add_argument("--forceversion", help="If the model file format detection fails (e.g. rogue modified model) you can set this to override the detected format (enter desired version, e.g. 401 for GPTNeoX-Type2).",metavar=('[version]'), type=int, default=0)
    parser.add_argument("--nommap", help="If set, do not use mmap to load newer models", action='store_true')
    parser.add_argument("--usemlock", help="For Apple Systems. Force system to keep model in RAM rather than swapping or compressing", action='store_true')
    parser.add_argument("--noavx2", help="Do not use AVX2 instructions, a slower compatibility mode for older devices. Does not work with --clblast.", action='store_true')
    parser.add_argument("--debugmode", help="Shows additional debug info in the terminal.", action='store_const', const=1, default=0)
-    parser.add_argument("--skiplauncher", help="Doesn't display or use the new GUI launcher.", action='store_true')
+    parser.add_argument("--skiplauncher", help="Doesn't display or use the GUI launcher.", action='store_true')
    parser.add_argument("--hordeconfig", help="Sets the display model name to something else, for easy use on AI Horde. Optional additional parameters set the horde max genlength, max ctxlen, API key and worker name.",metavar=('[hordemodelname]', '[hordegenlength] [hordemaxctx] [hordeapikey] [hordeworkername]'), nargs='+')
    compatgroup = parser.add_mutually_exclusive_group()
    compatgroup.add_argument("--noblas", help="Do not use OpenBLAS for accelerated prompt ingestion", action='store_true')
@ -2001,5 +2011,10 @@ if __name__ == '__main__':
    parser.add_argument("--multiuser", help="Runs in multiuser mode, which queues incoming requests instead of blocking them. Polled-streaming is disabled while multiple requests are in queue.", action='store_true')
    parser.add_argument("--foreground", help="Windows only. Sends the terminal to the foreground every time a new prompt is generated. This helps avoid some idle slowdown issues.", action='store_true')

+    #deprecated
+    parser.add_argument("--psutil_set_threads", help="--psutil_set_threads is DEPRECATED and will be removed soon! This parameter was generally unhelpful and unnecessary,  as the defaults were usually sufficient.", action='store_true')
+    parser.add_argument("--stream", help="--stream is DEPRECATED and will be removed soon! This was a Kobold Lite only parameter, which is now a proper setting toggle inside Lite.", action='store_true')
+    parser.add_argument("--unbantokens", help="--unbantokens is DEPRECATED and will be removed soon! EOS unbans should now be set via the generate API", action='store_true')
+    parser.add_argument("--usemirostat", help="--usemirostat is DEPRECATED and will be removed soon! Mirostat values should now be set via the generate API",metavar=('[type]', '[tau]', '[eta]'), type=float, nargs=3)

    main(parser.parse_args(),start_server=True)