flag to retain grammar, fix makefile (+2 squashed commit)

Squashed commit: [d5cd3f28] flag to retain grammar, fix makefile [b3352963] updated lite to v73
2023-10-01 11:46:50 +08:00 · 2023-10-01 11:46:50 +08:00 · bc841ec302
commit bc841ec302
parent 7ab01ee3c6
5 changed files with 66 additions and 13 deletions
--- a/2
+++ b/2
@ -358,7 +358,7 @@ KQ1 =
 KQ2 =
 KQ3 =
 ifndef LLAMA_NO_K_QUANTS
-KQ1 = kquants.o
+KQ1 = k_quants.o
 KQ2 = k_quants_noavx2.o
 KQ3 = k_quants_failsafe.o
 k_quants.o: k_quants.c k_quants.h ggml.h ggml-cuda.h
--- a/expose.h
+++ b/expose.h
@ -73,6 +73,7 @@ struct generation_inputs
    const char * stop_sequence[stop_token_max];
    const bool stream_sse;
    const char * grammar;
+    const bool grammar_retain_state;
 };
 struct generation_outputs
 {
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@ -44,6 +44,7 @@ std::vector<std::string> generated_tokens;

 llama_grammar *  grammar = nullptr; //currently used grammar
 grammar_parser::parse_state parsed_grammar;
+static std::string current_grammar = "";

 //return val: 0=fail, 1=(original ggml, alpaca), 2=(ggmf), 3=(ggjt)
 static FileFormat file_format = FileFormat::BADFORMAT;
@ -1282,7 +1283,20 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
    generated_tokens.clear(); // New Generation, new tokens

    std::string grammarstr = inputs.grammar;
-    load_grammar(grammarstr);
+    bool grammar_retain_state = inputs.grammar_retain_state;
+    if(grammar_retain_state)
+    {
+        if(grammarstr=="" || current_grammar!=grammarstr) //if grammar is identical, retain state
+        {
+            load_grammar(grammarstr);
+        }
+    }
+    else
+    {
+        load_grammar(grammarstr);
+    }
+    current_grammar = grammarstr;
+

    if (params.repeat_last_n < 1)
    {
--- a/klite.embd
+++ b/klite.embd
@ -6,7 +6,7 @@ It requires no dependencies, installation or setup.
 Just copy this single static HTML file anywhere and open it in a browser, or from a webserver.
 Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite.
 Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp. Please do not remove this line.
-Current version: 72
+Current version: 73
 -Concedo
 -->

@ -5823,7 +5823,8 @@ Current version: 72
 		document.getElementById("persist_session").checked = localsettings.persist_session;
 		document.getElementById("opmode").value = localsettings.opmode;
 		document.getElementById("chatname").value = localsettings.chatname;
-		document.getElementById("chatopponent").value = localsettings.chatopponent;
+		document.getElementById("chatopponent").value = replaceAll(localsettings.chatopponent,"||$||","\n");
+		handle_bot_name_onchange();
 		document.getElementById("instruct_starttag").value = localsettings.instruct_starttag;
 		document.getElementById("instruct_endtag").value = localsettings.instruct_endtag;
 		document.getElementById("top_k").value = localsettings.top_k;
@ -5845,6 +5846,7 @@ Current version: 72
 		}

 		document.getElementById("setgrammar").disabled = !is_using_kcpp_with_grammar();
+		document.getElementById("grammar_retain_state").disabled = document.getElementById("setgrammar").disabled;

 		if(custom_kobold_endpoint!="")
 		{
@ -6047,7 +6049,7 @@ Current version: 72
 		if (localsettings.chatname == null || localsettings.chatname == "") {
 			localsettings.chatname = "You";
 		}
-		localsettings.chatopponent = document.getElementById("chatopponent").value;
+		localsettings.chatopponent = replaceAll(document.getElementById("chatopponent").value,"\n","||$||");
 		localsettings.instruct_starttag = document.getElementById("instruct_starttag").value;
 		if (localsettings.instruct_starttag == null || localsettings.instruct_starttag == "") {
 			localsettings.instruct_starttag = "\\n### Instruction:\\n";
@ -6154,6 +6156,26 @@ Current version: 72
 		document.getElementById('instruct_tag_format').value = "0";
 	}

+	function handle_bot_name_input()
+	{
+		let textarea = document.getElementById("chatopponent");
+		textarea.value = replaceAll(textarea.value,"||$||","\n");
+		let numberOfLineBreaks = (textarea.value.match(/\n/g) || []).length;
+		numberOfLineBreaks = numberOfLineBreaks>8?8:numberOfLineBreaks;
+		textarea.rows = numberOfLineBreaks+1;
+	}
+	function handle_bot_name_onchange()
+	{
+		let textarea = document.getElementById("chatopponent");
+		textarea.value = replaceAll(textarea.value,"||$||","\n");
+		textarea.value = textarea.value.replace(/[\r\n]+/g, '\n');
+		textarea.value = textarea.value.trim();
+		let numberOfLineBreaks = (textarea.value.match(/\n/g) || []).length;
+		numberOfLineBreaks = numberOfLineBreaks>8?8:numberOfLineBreaks;
+		textarea.rows = numberOfLineBreaks+1;
+	}
+
+
 	function toggle_uistyle()
 	{
 		//show or hide the 'Customize UI' button based on whether the Aesthetic Instruct UI Mode is active or not.
@ -7008,6 +7030,7 @@ Current version: 72
 				if(localsettings.grammar && localsettings.grammar!="")
 				{
 					submit_payload.params.grammar = localsettings.grammar;
+					submit_payload.params.grammar_retain_state = document.getElementById("grammar_retain_state").checked;
 				}
 			}

@ -7902,7 +7925,7 @@ Current version: 72
 		++poll_ticks_passed;

 		//for horde requests, slow down by 3 times unless almost done
-		if(!is_using_custom_ep() && poll_ticks_passed%3!=0 && !horde_poll_nearly_completed)
+		if(!is_using_custom_ep() && (horde_poll_nearly_completed?(poll_ticks_passed%2!=0):(poll_ticks_passed%3!=0)))
 		{
 			return;
 		}
@ -8055,7 +8078,7 @@ Current version: 72
 											mtl.classList.add("redloader");
 										} else if (data.processing == 1 && data.queue_position == 0) {
 											mtl.classList.add("greenloader");
-											if(data.wait_time<=3)
+											if(data.wait_time<5)
 											{
 												horde_poll_nearly_completed = true;
 											}
@ -8344,8 +8367,18 @@ Current version: 72
 					whorun = "<br>There are <span class=\"color_orange\">" + selected_models.reduce((s, a) => s + a.count, 0) + "</span> <a class=\"color_green\" href=\"#\" onclick=\"get_and_show_workers()\">volunteer(s)</a> running selected models with a total queue length of <span class=\"color_orange\">"+ selected_models.reduce((s, a) => s + a.queued, 0) + "</span> tokens";
 				}
 				let nowmode = (localsettings.opmode==1?"Story Mode":(localsettings.opmode==2?"Adventure Mode":(localsettings.opmode==3?"Chat Mode":"Instruct Mode")));
+				let selmodelstr = "";
+				const maxmodelnames = 7;
+				if(selected_models.length>maxmodelnames)
+				{
+					let shortenedarr = selected_models.slice(0, maxmodelnames-1);
+					selmodelstr = shortenedarr.reduce((s, a) => s + (s == "" ? "" : ", ") + a.name, "") + " and " + (selected_models.length-(maxmodelnames-1)) + " others";
+				}else{
+					selmodelstr = selected_models.reduce((s, a) => s + (s == "" ? "" : ", ") + a.name, "");
+				}
+
 				document.getElementById("gametext").innerHTML = "Welcome to <span class=\"color_cyan\">KoboldAI Lite</span>!<br>You are using the models <span class=\"color_green\">"
-					+ selected_models.reduce((s, a) => s + (s == "" ? "" : ", ") + a.name, "") + "</span>" + (selected_workers.length == 0 ? "" : (" (Pinned to " + selected_workers.length + " worker IDs)"))
+					+ selmodelstr + "</span>" + (selected_workers.length == 0 ? "" : (" (Pinned to " + selected_workers.length + " worker IDs)"))
 					+ "." + whorun +".<br><br><b><span class=\"color_orange\">"+ nowmode +" Selected</span></b> - Enter a prompt below to begin!" + "<br>Or, <a href=\"#\" class=\"color_blueurl\" onclick=\"document.getElementById('loadfileinput').click()\">load a <b>JSON File</b> or a <b>Character Card</b> here.</a>" + "<br>Or, <a href=\"#\" class=\"color_blueurl\" onclick=\"display_scenarios()\">select a <b>Quick Start Scenario</b> here.</a><br>";
 			}

@ -10063,11 +10096,11 @@ Current version: 72
 							<table class="settingsmall text-center" style="border-spacing: 4px 2px;	border-collapse: separate;">
 								<tr>
 								<th>Your Name</th>
-								<th>AI Name <span class="helpicon">?<span class="helptext">The name of the person you want to chat with. Multiple opponents can be specified, creating a group chat, separate their names with ||$||</span></span></th>
+								<th>AI Name <span class="helpicon">?<span class="helptext">The name of the person you want to chat with. Multiple opponents can be specified, creating a group chat, separate their names using multiple lines.</span></span></th>
 								</tr>
 								<tr>
-								<td><input class="settinglabel miniinput" type="text" placeholder="(Enter Name)" value="" id="chatname" title="The name that you will be chatting as"></td>
-								<td><input class="settinglabel miniinput" type="text" placeholder="(Auto)" value="" id="chatopponent"  title="The name of the person you want to chat with"></td>
+								<td style="vertical-align: top;"><input class="settinglabel miniinput" type="text" placeholder="(Enter Name)" value="" id="chatname" title="The name that you will be chatting as"></td>
+								<td style="vertical-align: top;"><textarea class="settinglabel miniinput" style="resize: none;overflow:hidden;" id="chatopponent" placeholder="(Auto)" rows="1" wrap="off" title="The name of the person you want to chat with" oninput="handle_bot_name_input()" onchange="handle_bot_name_onchange()"></textarea></td>
 								</tr>
 							  </table>

@ -10195,6 +10228,8 @@ Current version: 72
 						<div class="settinglabel">
 							<div class="justifyleft settingsmall">Additional Configs <span class="helpicon">?<span class="helptext">Grammar Sampling (KCPP) - Allows you to constrain output to fit specific structures.</span></span></div>
 							<button id="setgrammar" type="button" class="btn btn-primary" style="padding:2px 3px;margin-top:2px;font-size:11px;" onclick="selectGrammar()">Set Grammar</button>
+							<div class="settingsmall" style="padding:2px 3px;margin-top:4px;" title="Do not reset grammar on generate. May not work with multiple users.">Retain </div>
+						   	<input type="checkbox" id="grammar_retain_state" style="padding:2px 3px;margin-top:6px;height: max-content;">
 						</div>
 					</div>
 				</div>
--- a/koboldcpp.py
+++ b/koboldcpp.py
@ -64,7 +64,8 @@ class generation_inputs(ctypes.Structure):
                ("unban_tokens_rt", ctypes.c_bool),
                ("stop_sequence", ctypes.c_char_p * stop_token_max),
                ("stream_sse", ctypes.c_bool),
-                ("grammar", ctypes.c_char_p)]
+                ("grammar", ctypes.c_char_p),
+                ("grammar_retain_state", ctypes.c_bool)]

 class generation_outputs(ctypes.Structure):
    _fields_ = [("status", ctypes.c_int),
@ -278,7 +279,7 @@ def load_model(model_filename):
    ret = handle.load_model(inputs)
    return ret

-def generate(prompt,max_length=20, max_context_length=512, temperature=0.8, top_k=120, top_a=0.0, top_p=0.85, typical_p=1.0, tfs=1.0, rep_pen=1.1, rep_pen_range=128, mirostat=0, mirostat_tau=5.0, mirostat_eta=0.1, sampler_order=[6,0,1,3,4,2,5], seed=-1, stop_sequence=[], use_default_badwordsids=True, stream_sse=False, grammar='', genkey=''):
+def generate(prompt,max_length=20, max_context_length=512, temperature=0.8, top_k=120, top_a=0.0, top_p=0.85, typical_p=1.0, tfs=1.0, rep_pen=1.1, rep_pen_range=128, mirostat=0, mirostat_tau=5.0, mirostat_eta=0.1, sampler_order=[6,0,1,3,4,2,5], seed=-1, stop_sequence=[], use_default_badwordsids=True, stream_sse=False, grammar='', grammar_retain_state=False, genkey=''):
    global maxctx, args, currentusergenkey, totalgens
    inputs = generation_inputs()
    outputs = ctypes.create_unicode_buffer(ctypes.sizeof(generation_outputs))
@ -301,6 +302,7 @@ def generate(prompt,max_length=20, max_context_length=512, temperature=0.8, top_
    inputs.rep_pen_range = rep_pen_range
    inputs.stream_sse = stream_sse
    inputs.grammar = grammar.encode("UTF-8")
+    inputs.grammar_retain_state = grammar_retain_state
    inputs.unban_tokens_rt = not use_default_badwordsids
    if args.usemirostat and args.usemirostat[0]>0:
        inputs.mirostat = int(args.usemirostat[0])
@ -423,6 +425,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
                use_default_badwordsids=genparams.get('use_default_badwordsids', True),
                stream_sse=stream_flag,
                grammar=genparams.get('grammar', ''),
+                grammar_retain_state = genparams.get('grammar_retain_state', False),
                genkey=genparams.get('genkey', ''))

        recvtxt = ""