diff --git a/Makefile b/Makefile index 039e08e27..695a943ea 100644 --- a/Makefile +++ b/Makefile @@ -358,7 +358,7 @@ KQ1 = KQ2 = KQ3 = ifndef LLAMA_NO_K_QUANTS -KQ1 = kquants.o +KQ1 = k_quants.o KQ2 = k_quants_noavx2.o KQ3 = k_quants_failsafe.o k_quants.o: k_quants.c k_quants.h ggml.h ggml-cuda.h diff --git a/expose.h b/expose.h index fdef1e252..8791a1915 100644 --- a/expose.h +++ b/expose.h @@ -73,6 +73,7 @@ struct generation_inputs const char * stop_sequence[stop_token_max]; const bool stream_sse; const char * grammar; + const bool grammar_retain_state; }; struct generation_outputs { diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 451c9be42..39820712f 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -44,6 +44,7 @@ std::vector generated_tokens; llama_grammar * grammar = nullptr; //currently used grammar grammar_parser::parse_state parsed_grammar; +static std::string current_grammar = ""; //return val: 0=fail, 1=(original ggml, alpaca), 2=(ggmf), 3=(ggjt) static FileFormat file_format = FileFormat::BADFORMAT; @@ -1282,7 +1283,20 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o generated_tokens.clear(); // New Generation, new tokens std::string grammarstr = inputs.grammar; - load_grammar(grammarstr); + bool grammar_retain_state = inputs.grammar_retain_state; + if(grammar_retain_state) + { + if(grammarstr=="" || current_grammar!=grammarstr) //if grammar is identical, retain state + { + load_grammar(grammarstr); + } + } + else + { + load_grammar(grammarstr); + } + current_grammar = grammarstr; + if (params.repeat_last_n < 1) { diff --git a/klite.embd b/klite.embd index 803e9a25a..3a3864fe3 100644 --- a/klite.embd +++ b/klite.embd @@ -6,7 +6,7 @@ It requires no dependencies, installation or setup. Just copy this single static HTML file anywhere and open it in a browser, or from a webserver. Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite. Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp. Please do not remove this line. -Current version: 72 +Current version: 73 -Concedo --> @@ -5823,7 +5823,8 @@ Current version: 72 document.getElementById("persist_session").checked = localsettings.persist_session; document.getElementById("opmode").value = localsettings.opmode; document.getElementById("chatname").value = localsettings.chatname; - document.getElementById("chatopponent").value = localsettings.chatopponent; + document.getElementById("chatopponent").value = replaceAll(localsettings.chatopponent,"||$||","\n"); + handle_bot_name_onchange(); document.getElementById("instruct_starttag").value = localsettings.instruct_starttag; document.getElementById("instruct_endtag").value = localsettings.instruct_endtag; document.getElementById("top_k").value = localsettings.top_k; @@ -5845,6 +5846,7 @@ Current version: 72 } document.getElementById("setgrammar").disabled = !is_using_kcpp_with_grammar(); + document.getElementById("grammar_retain_state").disabled = document.getElementById("setgrammar").disabled; if(custom_kobold_endpoint!="") { @@ -6047,7 +6049,7 @@ Current version: 72 if (localsettings.chatname == null || localsettings.chatname == "") { localsettings.chatname = "You"; } - localsettings.chatopponent = document.getElementById("chatopponent").value; + localsettings.chatopponent = replaceAll(document.getElementById("chatopponent").value,"\n","||$||"); localsettings.instruct_starttag = document.getElementById("instruct_starttag").value; if (localsettings.instruct_starttag == null || localsettings.instruct_starttag == "") { localsettings.instruct_starttag = "\\n### Instruction:\\n"; @@ -6154,6 +6156,26 @@ Current version: 72 document.getElementById('instruct_tag_format').value = "0"; } + function handle_bot_name_input() + { + let textarea = document.getElementById("chatopponent"); + textarea.value = replaceAll(textarea.value,"||$||","\n"); + let numberOfLineBreaks = (textarea.value.match(/\n/g) || []).length; + numberOfLineBreaks = numberOfLineBreaks>8?8:numberOfLineBreaks; + textarea.rows = numberOfLineBreaks+1; + } + function handle_bot_name_onchange() + { + let textarea = document.getElementById("chatopponent"); + textarea.value = replaceAll(textarea.value,"||$||","\n"); + textarea.value = textarea.value.replace(/[\r\n]+/g, '\n'); + textarea.value = textarea.value.trim(); + let numberOfLineBreaks = (textarea.value.match(/\n/g) || []).length; + numberOfLineBreaks = numberOfLineBreaks>8?8:numberOfLineBreaks; + textarea.rows = numberOfLineBreaks+1; + } + + function toggle_uistyle() { //show or hide the 'Customize UI' button based on whether the Aesthetic Instruct UI Mode is active or not. @@ -7008,6 +7030,7 @@ Current version: 72 if(localsettings.grammar && localsettings.grammar!="") { submit_payload.params.grammar = localsettings.grammar; + submit_payload.params.grammar_retain_state = document.getElementById("grammar_retain_state").checked; } } @@ -7902,7 +7925,7 @@ Current version: 72 ++poll_ticks_passed; //for horde requests, slow down by 3 times unless almost done - if(!is_using_custom_ep() && poll_ticks_passed%3!=0 && !horde_poll_nearly_completed) + if(!is_using_custom_ep() && (horde_poll_nearly_completed?(poll_ticks_passed%2!=0):(poll_ticks_passed%3!=0))) { return; } @@ -8055,7 +8078,7 @@ Current version: 72 mtl.classList.add("redloader"); } else if (data.processing == 1 && data.queue_position == 0) { mtl.classList.add("greenloader"); - if(data.wait_time<=3) + if(data.wait_time<5) { horde_poll_nearly_completed = true; } @@ -8344,8 +8367,18 @@ Current version: 72 whorun = "
There are " + selected_models.reduce((s, a) => s + a.count, 0) + " volunteer(s) running selected models with a total queue length of "+ selected_models.reduce((s, a) => s + a.queued, 0) + " tokens"; } let nowmode = (localsettings.opmode==1?"Story Mode":(localsettings.opmode==2?"Adventure Mode":(localsettings.opmode==3?"Chat Mode":"Instruct Mode"))); + let selmodelstr = ""; + const maxmodelnames = 7; + if(selected_models.length>maxmodelnames) + { + let shortenedarr = selected_models.slice(0, maxmodelnames-1); + selmodelstr = shortenedarr.reduce((s, a) => s + (s == "" ? "" : ", ") + a.name, "") + " and " + (selected_models.length-(maxmodelnames-1)) + " others"; + }else{ + selmodelstr = selected_models.reduce((s, a) => s + (s == "" ? "" : ", ") + a.name, ""); + } + document.getElementById("gametext").innerHTML = "Welcome to KoboldAI Lite!
You are using the models " - + selected_models.reduce((s, a) => s + (s == "" ? "" : ", ") + a.name, "") + "" + (selected_workers.length == 0 ? "" : (" (Pinned to " + selected_workers.length + " worker IDs)")) + + selmodelstr + "" + (selected_workers.length == 0 ? "" : (" (Pinned to " + selected_workers.length + " worker IDs)")) + "." + whorun +".

"+ nowmode +" Selected - Enter a prompt below to begin!" + "
Or, load a JSON File or a Character Card here." + "
Or, select a Quick Start Scenario here.
"; } @@ -10063,11 +10096,11 @@ Current version: 72 - + - - + +
Your NameAI Name ?The name of the person you want to chat with. Multiple opponents can be specified, creating a group chat, separate their names with ||$||AI Name ?The name of the person you want to chat with. Multiple opponents can be specified, creating a group chat, separate their names using multiple lines.
@@ -10195,6 +10228,8 @@ Current version: 72
Additional Configs ?Grammar Sampling (KCPP) - Allows you to constrain output to fit specific structures.
+
Retain
+
diff --git a/koboldcpp.py b/koboldcpp.py index a650811eb..c695c2029 100755 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -64,7 +64,8 @@ class generation_inputs(ctypes.Structure): ("unban_tokens_rt", ctypes.c_bool), ("stop_sequence", ctypes.c_char_p * stop_token_max), ("stream_sse", ctypes.c_bool), - ("grammar", ctypes.c_char_p)] + ("grammar", ctypes.c_char_p), + ("grammar_retain_state", ctypes.c_bool)] class generation_outputs(ctypes.Structure): _fields_ = [("status", ctypes.c_int), @@ -278,7 +279,7 @@ def load_model(model_filename): ret = handle.load_model(inputs) return ret -def generate(prompt,max_length=20, max_context_length=512, temperature=0.8, top_k=120, top_a=0.0, top_p=0.85, typical_p=1.0, tfs=1.0, rep_pen=1.1, rep_pen_range=128, mirostat=0, mirostat_tau=5.0, mirostat_eta=0.1, sampler_order=[6,0,1,3,4,2,5], seed=-1, stop_sequence=[], use_default_badwordsids=True, stream_sse=False, grammar='', genkey=''): +def generate(prompt,max_length=20, max_context_length=512, temperature=0.8, top_k=120, top_a=0.0, top_p=0.85, typical_p=1.0, tfs=1.0, rep_pen=1.1, rep_pen_range=128, mirostat=0, mirostat_tau=5.0, mirostat_eta=0.1, sampler_order=[6,0,1,3,4,2,5], seed=-1, stop_sequence=[], use_default_badwordsids=True, stream_sse=False, grammar='', grammar_retain_state=False, genkey=''): global maxctx, args, currentusergenkey, totalgens inputs = generation_inputs() outputs = ctypes.create_unicode_buffer(ctypes.sizeof(generation_outputs)) @@ -301,6 +302,7 @@ def generate(prompt,max_length=20, max_context_length=512, temperature=0.8, top_ inputs.rep_pen_range = rep_pen_range inputs.stream_sse = stream_sse inputs.grammar = grammar.encode("UTF-8") + inputs.grammar_retain_state = grammar_retain_state inputs.unban_tokens_rt = not use_default_badwordsids if args.usemirostat and args.usemirostat[0]>0: inputs.mirostat = int(args.usemirostat[0]) @@ -423,6 +425,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler): use_default_badwordsids=genparams.get('use_default_badwordsids', True), stream_sse=stream_flag, grammar=genparams.get('grammar', ''), + grammar_retain_state = genparams.get('grammar_retain_state', False), genkey=genparams.get('genkey', '')) recvtxt = ""