deprecate some launcher arguments.

This commit is contained in:
Concedo 2023-10-01 22:30:48 +08:00
parent b49a5bc546
commit dffc6bee74
2 changed files with 95 additions and 33 deletions

View file

@ -6,7 +6,7 @@ It requires no dependencies, installation or setup.
Just copy this single static HTML file anywhere and open it in a browser, or from a webserver.
Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite.
Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp. Please do not remove this line.
Current version: 73
Current version: 74
-Concedo
-->
@ -2898,6 +2898,7 @@ Current version: 73
var horde_poll_nearly_completed = false; //if true, increase polling rate
var prev_hl_chunk = null; //will store the last highlighted element
var pending_context_preinjection = ""; //this will be injected before the AI's next RESPONSE
var last_reply_was_empty = false; //set to true if last reply is empty
var current_memory = ""; //stored memory
var current_anote = ""; //stored author note
var current_anotetemplate = "[Author\'s note: <|>]";
@ -2951,7 +2952,7 @@ Current version: 73
autoscroll: true, //automatically scroll to bottom on render
trimsentences: true, //trim to last punctuation
trimwhitespace: false, //trim trailing whitespace
unban_tokens: false, //allow the EOS token when using locally
eos_ban_mode: 0, //allow the EOS token when using locally 0=auto,1=unban,2=ban
opmode: 1, //what mode are we in? 1=story, 2=adventure, 3=chat, 4=instruct
adventure_is_action: false, //in adventure mode, determine story or action
adventure_context_mod: true, //extra injection for adventure mode
@ -2966,6 +2967,7 @@ Current version: 73
beep_on: false,
image_styles: "",
grammar:"",
tokenstreaming: (localflag?true:false),
generate_images: (localflag?"":"stable_diffusion"), //"" is disabled and "*" is all, anything else is the model name pulled from stable horde
img_autogen: false,
img_allownsfw: true,
@ -3105,12 +3107,6 @@ Current version: 73
}
}
const tokenstreaming = urlParams.get('streaming');
if(tokenstreaming)
{
document.getElementById("tokenstreaming").checked = true;
}
const fromfile = ( window.location.protocol == 'file:' );
if(!dbgmode && !fromfile){
if(!window.console) window.console = {};
@ -3145,6 +3141,12 @@ Current version: 73
console.log("Discarded invalid local save: " + e);
}
const tokenstreaming = urlParams.get('streaming');
if(tokenstreaming)
{
localsettings.tokenstreaming = true;
}
//toggle genimg btn
if (localsettings.generate_images) {
document.getElementById("btn_genimg").classList.remove("hidden");
@ -3483,7 +3485,7 @@ Current version: 73
//0 is none, 1 is pseudostreaming, 2 is true streaming
function determine_streaming_type()
{
let streamtype = (document.getElementById("tokenstreaming").checked ? 1 : 0);
let streamtype = (localsettings.tokenstreaming ? 1 : 0);
let pstreamamount = urlParams.get('streamamount');
if(streamtype==1 && is_using_kcpp_with_streaming() && (pstreamamount == null || pstreamamount <= 0))
{
@ -3497,6 +3499,27 @@ Current version: 73
return streamtype;
}
function determine_if_ban_eos(input_was_empty) {
if (localsettings.eos_ban_mode == 0) {
if (localsettings.opmode == 1) {
return true; //story mode always ban
}
else if (localsettings.opmode == 3 && !localsettings.allow_continue_chat) {
return false; //chat mode always unban unless cont allowed
}
else if (!input_was_empty) //if user input is not empty, ALWAYS unban EOS.
{
return false;
}
else {
return last_reply_was_empty;
}
}
else {
return (localsettings.eos_ban_mode == 2 ? true : false);
}
}
function is_using_web_lite()
{
return (window.location.hostname.includes("koboldai.net") || window.location.hostname.includes("kaihordewebui.github.io"));
@ -4703,6 +4726,15 @@ Current version: 73
}
}
}
function format_uptime(seconds)
{
const days = Math.floor(seconds / (3600 * 24));
const hours = Math.floor((seconds % (3600 * 24)) / 3600);
const minutes = Math.floor((seconds % 3600) / 60);
return days+"d "+hours+"h "+minutes+"m";
}
function show_workers() {
document.getElementById("workercontainer").classList.remove("hidden");
@ -4729,7 +4761,7 @@ Current version: 73
allmdls += escapeHtml(elem.models[n].substring(0, 32));
}
str += "<tr id='workertablerow_"+i+"'><td>" + workerNameHtml + "</td><td>" + allmdls + "</td><td>" + elem.max_length + " / " + elem.max_context_length + "<br>(" + tokenspersec + " T/s)</td><td "+brokenstyle+">" + elem.uptime + "<br>(" + elem.requests_fulfilled + " jobs)</td><td "+style+">" + elem.kudos_rewards.toFixed(0) + "</td><td>"+clustertag+"</td></tr>";
str += "<tr id='workertablerow_"+i+"'><td>" + workerNameHtml + "</td><td>" + allmdls + "</td><td>" + elem.max_length + " / " + elem.max_context_length + "<br>(" + tokenspersec + " T/s)</td><td "+brokenstyle+">" + format_uptime(elem.uptime) + "<br>(" + elem.requests_fulfilled + " jobs)</td><td "+style+">" + elem.kudos_rewards.toFixed(0) + "</td><td>"+clustertag+"</td></tr>";
}
document.getElementById("workertable").innerHTML = str;
document.getElementById("worktitlecount").innerText = "Worker List - Total " + worker_data_showonly.length;
@ -4757,7 +4789,7 @@ Current version: 73
let brokenstyle = (elem.maintenance_mode ? "style=\"color:#ee4444;\"" : "");
let workerNameHtml = escapeHtml(elem.name.substring(0, 32));
let eleminfo = ((elem.info && elem.info!="")?elem.info:"");
str += "<tr><td>" + workerNameHtml + "</td><td><input class='' style='color:#000000;' id='mwc_desc_"+i+"' placeholder='Worker Description' value='"+eleminfo+"''></td><td "+brokenstyle+">" + elem.uptime + "<br>(" + elem.requests_fulfilled + " jobs)</td><td "+style+">" + elem.kudos_rewards.toFixed(0) + "</td><td>"+(elem.online?"Online":"Offline")+"</td><td><input type='checkbox' id='mwc_maint_"+i+"' "+(elem.maintenance_mode?"checked":"")+"></td></tr>";
str += "<tr><td>" + workerNameHtml + "</td><td><input class='' style='color:#000000;' id='mwc_desc_"+i+"' placeholder='Worker Description' value='"+eleminfo+"''></td><td "+brokenstyle+">" + format_uptime(elem.uptime) + "<br>(" + elem.requests_fulfilled + " jobs)</td><td "+style+">" + elem.kudos_rewards.toFixed(0) + "</td><td>"+(elem.online?"Online":"Offline")+"</td><td><input type='checkbox' id='mwc_maint_"+i+"' "+(elem.maintenance_mode?"checked":"")+"></td></tr>";
}
document.getElementById("myownworkertable").innerHTML = str;
@ -5819,7 +5851,7 @@ Current version: 73
document.getElementById("invert_colors").checked = localsettings.invert_colors;
document.getElementById("trimsentences").checked = localsettings.trimsentences;
document.getElementById("trimwhitespace").checked = localsettings.trimwhitespace;
document.getElementById("unban_tokens").checked = localsettings.unban_tokens;
document.getElementById("eos_ban_mode").value = localsettings.eos_ban_mode;
document.getElementById("persist_session").checked = localsettings.persist_session;
document.getElementById("opmode").value = localsettings.opmode;
document.getElementById("chatname").value = localsettings.chatname;
@ -5912,6 +5944,7 @@ Current version: 73
sdmodelshtml += "<option value=\"" + stablemodels[i].name + " (" + stablemodels[i].count + ")\">";
}
document.getElementById("sdmodels").innerHTML = sdmodelshtml;
document.getElementById("tokenstreaming").checked = localsettings.tokenstreaming;
document.getElementById("img_autogen").checked = localsettings.img_autogen;
document.getElementById("save_images").checked = localsettings.save_images;
document.getElementById("prompt_for_savename").checked = localsettings.prompt_for_savename;
@ -6026,7 +6059,7 @@ Current version: 73
localsettings.invert_colors = (document.getElementById("invert_colors").checked ? true : false);
localsettings.trimsentences = (document.getElementById("trimsentences").checked ? true : false);
localsettings.trimwhitespace = (document.getElementById("trimwhitespace").checked ? true : false);
localsettings.unban_tokens = (document.getElementById("unban_tokens").checked ? true : false);
localsettings.eos_ban_mode = document.getElementById("eos_ban_mode").value;
localsettings.persist_session = (document.getElementById("persist_session").checked ? true : false);
if(document.getElementById("opmode").value==3)
{
@ -6073,6 +6106,7 @@ Current version: 73
localsettings.image_styles = pendingstyle;
localsettings.grammar = pendinggrammar;
localsettings.tokenstreaming = (document.getElementById("tokenstreaming").checked ? true : false);
localsettings.img_autogen = (document.getElementById("img_autogen").checked ? true : false);
localsettings.save_images = (document.getElementById("save_images").checked ? true : false);
localsettings.prompt_for_savename = (document.getElementById("prompt_for_savename").checked ? true : false);
@ -6317,7 +6351,7 @@ Current version: 73
//v2 api specific fields
submit_payload.workers = selected_workers.map((m) => { return m.id });
dispatch_submit_generation(submit_payload);
dispatch_submit_generation(submit_payload,false);
render_gametext();
document.getElementById("memorytext").value = "[<|Generating summary, do not close window...|>]"
};
@ -6403,6 +6437,7 @@ Current version: 73
synchro_polled_response = null;
synchro_pending_stream = "";
waiting_for_autosummary = false;
last_reply_was_empty = false;
current_memory = "";
current_anote = "";
current_wi = [];
@ -6640,9 +6675,10 @@ Current version: 73
function submit_generation() {
let newgen = document.getElementById("input_text").value;
const user_input_empty = (newgen.trim()=="");
let doNotGenerate = false;
if (newgen.trim() != "" || gametext_arr.length > 0 || current_memory != "" || current_anote != "")
if (!user_input_empty || gametext_arr.length > 0 || current_memory != "" || current_anote != "")
{
waiting_for_autosummary = false;
idle_timer = 0;
@ -7045,7 +7081,7 @@ Current version: 73
if (!doNotGenerate)
{
dispatch_submit_generation(submit_payload);
dispatch_submit_generation(submit_payload, user_input_empty);
}
else
{
@ -7056,7 +7092,7 @@ Current version: 73
}
}
function dispatch_submit_generation(submit_payload)
function dispatch_submit_generation(submit_payload, input_was_empty) //if input is not empty, always unban eos
{
console.log(submit_payload);
last_request_str = JSON.stringify(submit_payload);
@ -7132,7 +7168,7 @@ Current version: 73
//version 1.2.4 and later supports unban tokens
if (kobold_endpoint_version && kobold_endpoint_version != "" && compare_version_str(kobold_endpoint_version, "1.2.3") > 0)
{
submit_payload.use_default_badwordsids = (localsettings.unban_tokens?false:true);
submit_payload.use_default_badwordsids = determine_if_ban_eos(input_was_empty);
}
let pseudostreaming = (determine_streaming_type()==1);
@ -7411,7 +7447,7 @@ Current version: 73
}
//horde supports unban tokens
submit_payload.use_default_badwordsids = (localsettings.unban_tokens?false:true);
submit_payload.use_default_badwordsids = determine_if_ban_eos(input_was_empty);
fetch(selectedhorde.submit_endpoint, {
method: 'POST', // or 'PUT'
@ -7632,7 +7668,6 @@ Current version: 73
}
function handle_incoming_text(gentxt, genworker, genmdl, genkudos) {
//handle stopping tokens if they got missed (eg. horde)
gentxt = trim_extra_stop_seqs(gentxt,true);
@ -7984,6 +8019,7 @@ Current version: 73
pending_response_id = "";
poll_in_progress = false;
let resp = synchro_polled_response;
last_reply_was_empty = (resp=="" || resp.trim()=="");
if (resp != null && resp != "") {
let gentxt = resp;
let genworker = "Custom Endpoint";
@ -8051,6 +8087,7 @@ Current version: 73
handle_incoming_autosummary(gentxt);
}
else {
last_reply_was_empty = (gentxt=="" || gentxt.trim()=="");
handle_incoming_text(gentxt, genworker, genmdl, genkudos);
}
}
@ -8138,6 +8175,7 @@ Current version: 73
if (oldInnerText != edited) {
gametext_arr = [];
redo_arr = [];
last_reply_was_empty = false;
retry_prev_text = "";
redo_prev_text = "";
@ -8991,7 +9029,7 @@ Current version: 73
console.log("Clear story");
if (pending_response_id == "" && gametext_arr.length > 0) {
last_reply_was_empty = false;
while(gametext_arr.length > 0)
{
if(retry_prev_text!="")
@ -9017,6 +9055,7 @@ Current version: 73
}
function btn_back() {
if (pending_response_id == "" && gametext_arr.length > 0) {
last_reply_was_empty = false;
if(retry_prev_text!="")
{
redo_prev_text = gametext_arr.pop();
@ -9039,6 +9078,7 @@ Current version: 73
console.log("Redo All story");
if (pending_response_id == "" && redo_arr.length > 0) {
last_reply_was_empty = false;
while(redo_arr.length > 0)
{
retry_prev_text = "";
@ -9058,11 +9098,13 @@ Current version: 73
function btn_redo() {
if (pending_response_id == "") {
if (redo_arr.length > 0) {
last_reply_was_empty = false;
retry_prev_text = "";
let popped = redo_arr.pop();
gametext_arr.push(popped);
render_gametext();
}else if (redo_prev_text != "") {
last_reply_was_empty = false;
retry_prev_text = gametext_arr.pop();
gametext_arr.push(redo_prev_text);
redo_prev_text = "";
@ -9074,6 +9116,7 @@ Current version: 73
function btn_retry() {
if (pending_response_id == "" && (gametext_arr.length > 1 ||
(gametext_arr.length > 0 && (current_memory != "" || current_anote != "")))) {
last_reply_was_empty = false;
let boxtextstash = document.getElementById("input_text").value;
document.getElementById("input_text").value = "";
let temp = gametext_arr[gametext_arr.length-1];
@ -10105,7 +10148,7 @@ Current version: 73
</table>
<div class="settinglabel">
<div class="justifyleft settingsmall" title="Whether to allow multiple lines in AI responses. Not recommended.">Multiline Replies </div>
<div class="justifyleft settingsmall" title="Whether to allow multiple lines in AI responses.">Multiline Replies </div>
<input type="checkbox" id="multiline_replies" style="margin:0px 0 0;">
</div>
<div class="settinglabel">
@ -10316,9 +10359,13 @@ Current version: 73
<input type="checkbox" id="trimwhitespace" style="margin:0px 0px 0px auto;">
</div>
<div class="settinglabel">
<div class="justifyleft settingsmall" title="">Unban EOS Tokens <span class="helpicon">?<span
<div class="justifyleft settingsmall" title="">EOS Token Ban <span class="helpicon">?<span
class="helptext">Allow the End-Of-Stream (EOS) token and potentially other restricted special tokens to be generated.</span></span></div>
<input type="checkbox" id="unban_tokens" style="margin:0px 0px 0px auto;">
<select style="padding:1px; height:auto; width: 34px; appearance: none; font-size: 7pt; margin:0px 0px 0px auto;" class="form-control" id="eos_ban_mode">
<option value="0">Auto</option>
<option value="1">Unban</option>
<option value="2">Ban</option>
</select>
</div>
<div class="settinglabel">
<div class="justifyleft settingsmall">Placeholder Tags <span class="helpicon">?<span

View file

@ -1,9 +1,12 @@
#!/usr/bin/env python3
#-*- coding: utf-8 -*-
# A hacky little script from Concedo that exposes llama.cpp function bindings
# allowing it to be used via a simulated kobold api endpoint
# generation delay scales linearly with original prompt length.
# KoboldCpp is an easy-to-use AI text-generation software for GGML models.
# It's a single self contained distributable from Concedo, that builds off llama.cpp,
# and adds a versatile Kobold API endpoint, additional format support,
# backward compatibility, as well as a fancy UI with persistent stories,
# editing tools, save formats, memory, world info, author's note, characters,
# scenarios and everything Kobold and Kobold Lite have to offer.
import ctypes
import os
@ -364,7 +367,7 @@ maxhordelen = 256
modelbusy = threading.Lock()
requestsinqueue = 0
defaultport = 5001
KcppVersion = "1.45"
KcppVersion = "1.45.1"
showdebug = True
showsamplerwarning = True
showmaxctxwarning = True
@ -1948,6 +1951,17 @@ def main(launch_args,start_server=True):
timer_thread = threading.Timer(1, onready_subprocess) #1 second delay
timer_thread.start()
# show deprecation warnings
if args.unbantokens:
print("WARNING: --unbantokens is DEPRECATED and will be removed soon! EOS unbans should now be set via the generate API.")
if args.usemirostat:
print("WARNING: --usemirostat is DEPRECATED and will be removed soon! Mirostat values should now be set via the generate API.")
if args.stream:
print("WARNING: --stream is DEPRECATED and will be removed soon! This was a Kobold Lite only parameter, which is now a proper setting toggle inside Lite.")
if args.psutil_set_threads:
print("WARNING: --psutil_set_threads is DEPRECATED and will be removed soon! This parameter was generally unhelpful and unnecessary, as the defaults were usually sufficient")
if start_server:
print(f"Please connect to custom endpoint at {epurl}")
asyncio.run(RunServerMultiThreaded(args.host, args.port, embedded_kailite))
@ -1974,22 +1988,18 @@ if __name__ == '__main__':
default_threads = (physical_core_limit if physical_core_limit<=3 else max(3,physical_core_limit-1))
parser.add_argument("--threads", help="Use a custom number of threads if specified. Otherwise, uses an amount based on CPU cores", type=int, default=default_threads)
parser.add_argument("--blasthreads", help="Use a different number of threads during BLAS if specified. Otherwise, has the same value as --threads",metavar=('[threads]'), type=int, default=0)
parser.add_argument("--psutil_set_threads", help="Experimental flag. If set, uses psutils to determine thread count based on physical cores.", action='store_true')
parser.add_argument("--highpriority", help="Experimental flag. If set, increases the process CPU priority, potentially speeding up generation. Use caution.", action='store_true')
parser.add_argument("--contextsize", help="Controls the memory allocated for maximum context size, only change if you need more RAM for big contexts. (default 2048)", type=int,choices=[512,1024,2048,3072,4096,6144,8192,12288,16384,24576,32768], default=2048)
parser.add_argument("--blasbatchsize", help="Sets the batch size used in BLAS processing (default 512). Setting it to -1 disables BLAS mode, but keeps other benefits like GPU offload.", type=int,choices=[-1,32,64,128,256,512,1024,2048], default=512)
parser.add_argument("--ropeconfig", help="If set, uses customized RoPE scaling from configured frequency scale and frequency base (e.g. --ropeconfig 0.25 10000). Otherwise, uses NTK-Aware scaling set automatically based on context size. For linear rope, simply set the freq-scale and ignore the freq-base",metavar=('[rope-freq-scale]', '[rope-freq-base]'), default=[0.0, 10000.0], type=float, nargs='+')
parser.add_argument("--stream", help="Uses streaming when generating tokens. Only for the Kobold Lite UI.", action='store_true')
parser.add_argument("--smartcontext", help="Reserving a portion of context to try processing less frequently.", action='store_true')
parser.add_argument("--unbantokens", help="Normally, KoboldAI prevents the EOS token from being generated. This flag unbans it.", action='store_true')
parser.add_argument("--bantokens", help="You can manually specify a list of token SUBSTRINGS that the AI cannot use. This bans ALL instances of that substring.", metavar=('[token_substrings]'), nargs='+')
parser.add_argument("--usemirostat", help="Experimental! Replaces your samplers with mirostat. Takes 3 params = [type(0/1/2), tau(5.0), eta(0.1)].",metavar=('[type]', '[tau]', '[eta]'), type=float, nargs=3)
parser.add_argument("--forceversion", help="If the model file format detection fails (e.g. rogue modified model) you can set this to override the detected format (enter desired version, e.g. 401 for GPTNeoX-Type2).",metavar=('[version]'), type=int, default=0)
parser.add_argument("--nommap", help="If set, do not use mmap to load newer models", action='store_true')
parser.add_argument("--usemlock", help="For Apple Systems. Force system to keep model in RAM rather than swapping or compressing", action='store_true')
parser.add_argument("--noavx2", help="Do not use AVX2 instructions, a slower compatibility mode for older devices. Does not work with --clblast.", action='store_true')
parser.add_argument("--debugmode", help="Shows additional debug info in the terminal.", action='store_const', const=1, default=0)
parser.add_argument("--skiplauncher", help="Doesn't display or use the new GUI launcher.", action='store_true')
parser.add_argument("--skiplauncher", help="Doesn't display or use the GUI launcher.", action='store_true')
parser.add_argument("--hordeconfig", help="Sets the display model name to something else, for easy use on AI Horde. Optional additional parameters set the horde max genlength, max ctxlen, API key and worker name.",metavar=('[hordemodelname]', '[hordegenlength] [hordemaxctx] [hordeapikey] [hordeworkername]'), nargs='+')
compatgroup = parser.add_mutually_exclusive_group()
compatgroup.add_argument("--noblas", help="Do not use OpenBLAS for accelerated prompt ingestion", action='store_true')
@ -2001,5 +2011,10 @@ if __name__ == '__main__':
parser.add_argument("--multiuser", help="Runs in multiuser mode, which queues incoming requests instead of blocking them. Polled-streaming is disabled while multiple requests are in queue.", action='store_true')
parser.add_argument("--foreground", help="Windows only. Sends the terminal to the foreground every time a new prompt is generated. This helps avoid some idle slowdown issues.", action='store_true')
#deprecated
parser.add_argument("--psutil_set_threads", help="--psutil_set_threads is DEPRECATED and will be removed soon! This parameter was generally unhelpful and unnecessary, as the defaults were usually sufficient.", action='store_true')
parser.add_argument("--stream", help="--stream is DEPRECATED and will be removed soon! This was a Kobold Lite only parameter, which is now a proper setting toggle inside Lite.", action='store_true')
parser.add_argument("--unbantokens", help="--unbantokens is DEPRECATED and will be removed soon! EOS unbans should now be set via the generate API", action='store_true')
parser.add_argument("--usemirostat", help="--usemirostat is DEPRECATED and will be removed soon! Mirostat values should now be set via the generate API",metavar=('[type]', '[tau]', '[eta]'), type=float, nargs=3)
main(parser.parse_args(),start_server=True)