flag to retain grammar, fix makefile (+2 squashed commit)

Squashed commit:

[d5cd3f28] flag to retain grammar, fix makefile

[b3352963] updated lite to v73
This commit is contained in:
Concedo 2023-10-01 11:46:50 +08:00
parent 7ab01ee3c6
commit bc841ec302
5 changed files with 66 additions and 13 deletions

View file

@ -358,7 +358,7 @@ KQ1 =
KQ2 = KQ2 =
KQ3 = KQ3 =
ifndef LLAMA_NO_K_QUANTS ifndef LLAMA_NO_K_QUANTS
KQ1 = kquants.o KQ1 = k_quants.o
KQ2 = k_quants_noavx2.o KQ2 = k_quants_noavx2.o
KQ3 = k_quants_failsafe.o KQ3 = k_quants_failsafe.o
k_quants.o: k_quants.c k_quants.h ggml.h ggml-cuda.h k_quants.o: k_quants.c k_quants.h ggml.h ggml-cuda.h

View file

@ -73,6 +73,7 @@ struct generation_inputs
const char * stop_sequence[stop_token_max]; const char * stop_sequence[stop_token_max];
const bool stream_sse; const bool stream_sse;
const char * grammar; const char * grammar;
const bool grammar_retain_state;
}; };
struct generation_outputs struct generation_outputs
{ {

View file

@ -44,6 +44,7 @@ std::vector<std::string> generated_tokens;
llama_grammar * grammar = nullptr; //currently used grammar llama_grammar * grammar = nullptr; //currently used grammar
grammar_parser::parse_state parsed_grammar; grammar_parser::parse_state parsed_grammar;
static std::string current_grammar = "";
//return val: 0=fail, 1=(original ggml, alpaca), 2=(ggmf), 3=(ggjt) //return val: 0=fail, 1=(original ggml, alpaca), 2=(ggmf), 3=(ggjt)
static FileFormat file_format = FileFormat::BADFORMAT; static FileFormat file_format = FileFormat::BADFORMAT;
@ -1282,7 +1283,20 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
generated_tokens.clear(); // New Generation, new tokens generated_tokens.clear(); // New Generation, new tokens
std::string grammarstr = inputs.grammar; std::string grammarstr = inputs.grammar;
bool grammar_retain_state = inputs.grammar_retain_state;
if(grammar_retain_state)
{
if(grammarstr=="" || current_grammar!=grammarstr) //if grammar is identical, retain state
{
load_grammar(grammarstr); load_grammar(grammarstr);
}
}
else
{
load_grammar(grammarstr);
}
current_grammar = grammarstr;
if (params.repeat_last_n < 1) if (params.repeat_last_n < 1)
{ {

View file

@ -6,7 +6,7 @@ It requires no dependencies, installation or setup.
Just copy this single static HTML file anywhere and open it in a browser, or from a webserver. Just copy this single static HTML file anywhere and open it in a browser, or from a webserver.
Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite. Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite.
Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp. Please do not remove this line. Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp. Please do not remove this line.
Current version: 72 Current version: 73
-Concedo -Concedo
--> -->
@ -5823,7 +5823,8 @@ Current version: 72
document.getElementById("persist_session").checked = localsettings.persist_session; document.getElementById("persist_session").checked = localsettings.persist_session;
document.getElementById("opmode").value = localsettings.opmode; document.getElementById("opmode").value = localsettings.opmode;
document.getElementById("chatname").value = localsettings.chatname; document.getElementById("chatname").value = localsettings.chatname;
document.getElementById("chatopponent").value = localsettings.chatopponent; document.getElementById("chatopponent").value = replaceAll(localsettings.chatopponent,"||$||","\n");
handle_bot_name_onchange();
document.getElementById("instruct_starttag").value = localsettings.instruct_starttag; document.getElementById("instruct_starttag").value = localsettings.instruct_starttag;
document.getElementById("instruct_endtag").value = localsettings.instruct_endtag; document.getElementById("instruct_endtag").value = localsettings.instruct_endtag;
document.getElementById("top_k").value = localsettings.top_k; document.getElementById("top_k").value = localsettings.top_k;
@ -5845,6 +5846,7 @@ Current version: 72
} }
document.getElementById("setgrammar").disabled = !is_using_kcpp_with_grammar(); document.getElementById("setgrammar").disabled = !is_using_kcpp_with_grammar();
document.getElementById("grammar_retain_state").disabled = document.getElementById("setgrammar").disabled;
if(custom_kobold_endpoint!="") if(custom_kobold_endpoint!="")
{ {
@ -6047,7 +6049,7 @@ Current version: 72
if (localsettings.chatname == null || localsettings.chatname == "") { if (localsettings.chatname == null || localsettings.chatname == "") {
localsettings.chatname = "You"; localsettings.chatname = "You";
} }
localsettings.chatopponent = document.getElementById("chatopponent").value; localsettings.chatopponent = replaceAll(document.getElementById("chatopponent").value,"\n","||$||");
localsettings.instruct_starttag = document.getElementById("instruct_starttag").value; localsettings.instruct_starttag = document.getElementById("instruct_starttag").value;
if (localsettings.instruct_starttag == null || localsettings.instruct_starttag == "") { if (localsettings.instruct_starttag == null || localsettings.instruct_starttag == "") {
localsettings.instruct_starttag = "\\n### Instruction:\\n"; localsettings.instruct_starttag = "\\n### Instruction:\\n";
@ -6154,6 +6156,26 @@ Current version: 72
document.getElementById('instruct_tag_format').value = "0"; document.getElementById('instruct_tag_format').value = "0";
} }
function handle_bot_name_input()
{
let textarea = document.getElementById("chatopponent");
textarea.value = replaceAll(textarea.value,"||$||","\n");
let numberOfLineBreaks = (textarea.value.match(/\n/g) || []).length;
numberOfLineBreaks = numberOfLineBreaks>8?8:numberOfLineBreaks;
textarea.rows = numberOfLineBreaks+1;
}
function handle_bot_name_onchange()
{
let textarea = document.getElementById("chatopponent");
textarea.value = replaceAll(textarea.value,"||$||","\n");
textarea.value = textarea.value.replace(/[\r\n]+/g, '\n');
textarea.value = textarea.value.trim();
let numberOfLineBreaks = (textarea.value.match(/\n/g) || []).length;
numberOfLineBreaks = numberOfLineBreaks>8?8:numberOfLineBreaks;
textarea.rows = numberOfLineBreaks+1;
}
function toggle_uistyle() function toggle_uistyle()
{ {
//show or hide the 'Customize UI' button based on whether the Aesthetic Instruct UI Mode is active or not. //show or hide the 'Customize UI' button based on whether the Aesthetic Instruct UI Mode is active or not.
@ -7008,6 +7030,7 @@ Current version: 72
if(localsettings.grammar && localsettings.grammar!="") if(localsettings.grammar && localsettings.grammar!="")
{ {
submit_payload.params.grammar = localsettings.grammar; submit_payload.params.grammar = localsettings.grammar;
submit_payload.params.grammar_retain_state = document.getElementById("grammar_retain_state").checked;
} }
} }
@ -7902,7 +7925,7 @@ Current version: 72
++poll_ticks_passed; ++poll_ticks_passed;
//for horde requests, slow down by 3 times unless almost done //for horde requests, slow down by 3 times unless almost done
if(!is_using_custom_ep() && poll_ticks_passed%3!=0 && !horde_poll_nearly_completed) if(!is_using_custom_ep() && (horde_poll_nearly_completed?(poll_ticks_passed%2!=0):(poll_ticks_passed%3!=0)))
{ {
return; return;
} }
@ -8055,7 +8078,7 @@ Current version: 72
mtl.classList.add("redloader"); mtl.classList.add("redloader");
} else if (data.processing == 1 && data.queue_position == 0) { } else if (data.processing == 1 && data.queue_position == 0) {
mtl.classList.add("greenloader"); mtl.classList.add("greenloader");
if(data.wait_time<=3) if(data.wait_time<5)
{ {
horde_poll_nearly_completed = true; horde_poll_nearly_completed = true;
} }
@ -8344,8 +8367,18 @@ Current version: 72
whorun = "<br>There are <span class=\"color_orange\">" + selected_models.reduce((s, a) => s + a.count, 0) + "</span> <a class=\"color_green\" href=\"#\" onclick=\"get_and_show_workers()\">volunteer(s)</a> running selected models with a total queue length of <span class=\"color_orange\">"+ selected_models.reduce((s, a) => s + a.queued, 0) + "</span> tokens"; whorun = "<br>There are <span class=\"color_orange\">" + selected_models.reduce((s, a) => s + a.count, 0) + "</span> <a class=\"color_green\" href=\"#\" onclick=\"get_and_show_workers()\">volunteer(s)</a> running selected models with a total queue length of <span class=\"color_orange\">"+ selected_models.reduce((s, a) => s + a.queued, 0) + "</span> tokens";
} }
let nowmode = (localsettings.opmode==1?"Story Mode":(localsettings.opmode==2?"Adventure Mode":(localsettings.opmode==3?"Chat Mode":"Instruct Mode"))); let nowmode = (localsettings.opmode==1?"Story Mode":(localsettings.opmode==2?"Adventure Mode":(localsettings.opmode==3?"Chat Mode":"Instruct Mode")));
let selmodelstr = "";
const maxmodelnames = 7;
if(selected_models.length>maxmodelnames)
{
let shortenedarr = selected_models.slice(0, maxmodelnames-1);
selmodelstr = shortenedarr.reduce((s, a) => s + (s == "" ? "" : ", ") + a.name, "") + " and " + (selected_models.length-(maxmodelnames-1)) + " others";
}else{
selmodelstr = selected_models.reduce((s, a) => s + (s == "" ? "" : ", ") + a.name, "");
}
document.getElementById("gametext").innerHTML = "Welcome to <span class=\"color_cyan\">KoboldAI Lite</span>!<br>You are using the models <span class=\"color_green\">" document.getElementById("gametext").innerHTML = "Welcome to <span class=\"color_cyan\">KoboldAI Lite</span>!<br>You are using the models <span class=\"color_green\">"
+ selected_models.reduce((s, a) => s + (s == "" ? "" : ", ") + a.name, "") + "</span>" + (selected_workers.length == 0 ? "" : (" (Pinned to " + selected_workers.length + " worker IDs)")) + selmodelstr + "</span>" + (selected_workers.length == 0 ? "" : (" (Pinned to " + selected_workers.length + " worker IDs)"))
+ "." + whorun +".<br><br><b><span class=\"color_orange\">"+ nowmode +" Selected</span></b> - Enter a prompt below to begin!" + "<br>Or, <a href=\"#\" class=\"color_blueurl\" onclick=\"document.getElementById('loadfileinput').click()\">load a <b>JSON File</b> or a <b>Character Card</b> here.</a>" + "<br>Or, <a href=\"#\" class=\"color_blueurl\" onclick=\"display_scenarios()\">select a <b>Quick Start Scenario</b> here.</a><br>"; + "." + whorun +".<br><br><b><span class=\"color_orange\">"+ nowmode +" Selected</span></b> - Enter a prompt below to begin!" + "<br>Or, <a href=\"#\" class=\"color_blueurl\" onclick=\"document.getElementById('loadfileinput').click()\">load a <b>JSON File</b> or a <b>Character Card</b> here.</a>" + "<br>Or, <a href=\"#\" class=\"color_blueurl\" onclick=\"display_scenarios()\">select a <b>Quick Start Scenario</b> here.</a><br>";
} }
@ -10063,11 +10096,11 @@ Current version: 72
<table class="settingsmall text-center" style="border-spacing: 4px 2px; border-collapse: separate;"> <table class="settingsmall text-center" style="border-spacing: 4px 2px; border-collapse: separate;">
<tr> <tr>
<th>Your Name</th> <th>Your Name</th>
<th>AI Name <span class="helpicon">?<span class="helptext">The name of the person you want to chat with. Multiple opponents can be specified, creating a group chat, separate their names with ||$||</span></span></th> <th>AI Name <span class="helpicon">?<span class="helptext">The name of the person you want to chat with. Multiple opponents can be specified, creating a group chat, separate their names using multiple lines.</span></span></th>
</tr> </tr>
<tr> <tr>
<td><input class="settinglabel miniinput" type="text" placeholder="(Enter Name)" value="" id="chatname" title="The name that you will be chatting as"></td> <td style="vertical-align: top;"><input class="settinglabel miniinput" type="text" placeholder="(Enter Name)" value="" id="chatname" title="The name that you will be chatting as"></td>
<td><input class="settinglabel miniinput" type="text" placeholder="(Auto)" value="" id="chatopponent" title="The name of the person you want to chat with"></td> <td style="vertical-align: top;"><textarea class="settinglabel miniinput" style="resize: none;overflow:hidden;" id="chatopponent" placeholder="(Auto)" rows="1" wrap="off" title="The name of the person you want to chat with" oninput="handle_bot_name_input()" onchange="handle_bot_name_onchange()"></textarea></td>
</tr> </tr>
</table> </table>
@ -10195,6 +10228,8 @@ Current version: 72
<div class="settinglabel"> <div class="settinglabel">
<div class="justifyleft settingsmall">Additional Configs <span class="helpicon">?<span class="helptext">Grammar Sampling (KCPP) - Allows you to constrain output to fit specific structures.</span></span></div> <div class="justifyleft settingsmall">Additional Configs <span class="helpicon">?<span class="helptext">Grammar Sampling (KCPP) - Allows you to constrain output to fit specific structures.</span></span></div>
<button id="setgrammar" type="button" class="btn btn-primary" style="padding:2px 3px;margin-top:2px;font-size:11px;" onclick="selectGrammar()">Set Grammar</button> <button id="setgrammar" type="button" class="btn btn-primary" style="padding:2px 3px;margin-top:2px;font-size:11px;" onclick="selectGrammar()">Set Grammar</button>
<div class="settingsmall" style="padding:2px 3px;margin-top:4px;" title="Do not reset grammar on generate. May not work with multiple users.">Retain </div>
<input type="checkbox" id="grammar_retain_state" style="padding:2px 3px;margin-top:6px;height: max-content;">
</div> </div>
</div> </div>
</div> </div>

View file

@ -64,7 +64,8 @@ class generation_inputs(ctypes.Structure):
("unban_tokens_rt", ctypes.c_bool), ("unban_tokens_rt", ctypes.c_bool),
("stop_sequence", ctypes.c_char_p * stop_token_max), ("stop_sequence", ctypes.c_char_p * stop_token_max),
("stream_sse", ctypes.c_bool), ("stream_sse", ctypes.c_bool),
("grammar", ctypes.c_char_p)] ("grammar", ctypes.c_char_p),
("grammar_retain_state", ctypes.c_bool)]
class generation_outputs(ctypes.Structure): class generation_outputs(ctypes.Structure):
_fields_ = [("status", ctypes.c_int), _fields_ = [("status", ctypes.c_int),
@ -278,7 +279,7 @@ def load_model(model_filename):
ret = handle.load_model(inputs) ret = handle.load_model(inputs)
return ret return ret
def generate(prompt,max_length=20, max_context_length=512, temperature=0.8, top_k=120, top_a=0.0, top_p=0.85, typical_p=1.0, tfs=1.0, rep_pen=1.1, rep_pen_range=128, mirostat=0, mirostat_tau=5.0, mirostat_eta=0.1, sampler_order=[6,0,1,3,4,2,5], seed=-1, stop_sequence=[], use_default_badwordsids=True, stream_sse=False, grammar='', genkey=''): def generate(prompt,max_length=20, max_context_length=512, temperature=0.8, top_k=120, top_a=0.0, top_p=0.85, typical_p=1.0, tfs=1.0, rep_pen=1.1, rep_pen_range=128, mirostat=0, mirostat_tau=5.0, mirostat_eta=0.1, sampler_order=[6,0,1,3,4,2,5], seed=-1, stop_sequence=[], use_default_badwordsids=True, stream_sse=False, grammar='', grammar_retain_state=False, genkey=''):
global maxctx, args, currentusergenkey, totalgens global maxctx, args, currentusergenkey, totalgens
inputs = generation_inputs() inputs = generation_inputs()
outputs = ctypes.create_unicode_buffer(ctypes.sizeof(generation_outputs)) outputs = ctypes.create_unicode_buffer(ctypes.sizeof(generation_outputs))
@ -301,6 +302,7 @@ def generate(prompt,max_length=20, max_context_length=512, temperature=0.8, top_
inputs.rep_pen_range = rep_pen_range inputs.rep_pen_range = rep_pen_range
inputs.stream_sse = stream_sse inputs.stream_sse = stream_sse
inputs.grammar = grammar.encode("UTF-8") inputs.grammar = grammar.encode("UTF-8")
inputs.grammar_retain_state = grammar_retain_state
inputs.unban_tokens_rt = not use_default_badwordsids inputs.unban_tokens_rt = not use_default_badwordsids
if args.usemirostat and args.usemirostat[0]>0: if args.usemirostat and args.usemirostat[0]>0:
inputs.mirostat = int(args.usemirostat[0]) inputs.mirostat = int(args.usemirostat[0])
@ -423,6 +425,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
use_default_badwordsids=genparams.get('use_default_badwordsids', True), use_default_badwordsids=genparams.get('use_default_badwordsids', True),
stream_sse=stream_flag, stream_sse=stream_flag,
grammar=genparams.get('grammar', ''), grammar=genparams.get('grammar', ''),
grammar_retain_state = genparams.get('grammar_retain_state', False),
genkey=genparams.get('genkey', '')) genkey=genparams.get('genkey', ''))
recvtxt = "" recvtxt = ""