deprecate some launcher arguments.

This commit is contained in:
Concedo 2023-10-01 22:30:48 +08:00
parent b49a5bc546
commit dffc6bee74
2 changed files with 95 additions and 33 deletions

View file

@ -6,7 +6,7 @@ It requires no dependencies, installation or setup.
Just copy this single static HTML file anywhere and open it in a browser, or from a webserver. Just copy this single static HTML file anywhere and open it in a browser, or from a webserver.
Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite. Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite.
Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp. Please do not remove this line. Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp. Please do not remove this line.
Current version: 73 Current version: 74
-Concedo -Concedo
--> -->
@ -2898,6 +2898,7 @@ Current version: 73
var horde_poll_nearly_completed = false; //if true, increase polling rate var horde_poll_nearly_completed = false; //if true, increase polling rate
var prev_hl_chunk = null; //will store the last highlighted element var prev_hl_chunk = null; //will store the last highlighted element
var pending_context_preinjection = ""; //this will be injected before the AI's next RESPONSE var pending_context_preinjection = ""; //this will be injected before the AI's next RESPONSE
var last_reply_was_empty = false; //set to true if last reply is empty
var current_memory = ""; //stored memory var current_memory = ""; //stored memory
var current_anote = ""; //stored author note var current_anote = ""; //stored author note
var current_anotetemplate = "[Author\'s note: <|>]"; var current_anotetemplate = "[Author\'s note: <|>]";
@ -2951,7 +2952,7 @@ Current version: 73
autoscroll: true, //automatically scroll to bottom on render autoscroll: true, //automatically scroll to bottom on render
trimsentences: true, //trim to last punctuation trimsentences: true, //trim to last punctuation
trimwhitespace: false, //trim trailing whitespace trimwhitespace: false, //trim trailing whitespace
unban_tokens: false, //allow the EOS token when using locally eos_ban_mode: 0, //allow the EOS token when using locally 0=auto,1=unban,2=ban
opmode: 1, //what mode are we in? 1=story, 2=adventure, 3=chat, 4=instruct opmode: 1, //what mode are we in? 1=story, 2=adventure, 3=chat, 4=instruct
adventure_is_action: false, //in adventure mode, determine story or action adventure_is_action: false, //in adventure mode, determine story or action
adventure_context_mod: true, //extra injection for adventure mode adventure_context_mod: true, //extra injection for adventure mode
@ -2966,6 +2967,7 @@ Current version: 73
beep_on: false, beep_on: false,
image_styles: "", image_styles: "",
grammar:"", grammar:"",
tokenstreaming: (localflag?true:false),
generate_images: (localflag?"":"stable_diffusion"), //"" is disabled and "*" is all, anything else is the model name pulled from stable horde generate_images: (localflag?"":"stable_diffusion"), //"" is disabled and "*" is all, anything else is the model name pulled from stable horde
img_autogen: false, img_autogen: false,
img_allownsfw: true, img_allownsfw: true,
@ -3105,12 +3107,6 @@ Current version: 73
} }
} }
const tokenstreaming = urlParams.get('streaming');
if(tokenstreaming)
{
document.getElementById("tokenstreaming").checked = true;
}
const fromfile = ( window.location.protocol == 'file:' ); const fromfile = ( window.location.protocol == 'file:' );
if(!dbgmode && !fromfile){ if(!dbgmode && !fromfile){
if(!window.console) window.console = {}; if(!window.console) window.console = {};
@ -3145,6 +3141,12 @@ Current version: 73
console.log("Discarded invalid local save: " + e); console.log("Discarded invalid local save: " + e);
} }
const tokenstreaming = urlParams.get('streaming');
if(tokenstreaming)
{
localsettings.tokenstreaming = true;
}
//toggle genimg btn //toggle genimg btn
if (localsettings.generate_images) { if (localsettings.generate_images) {
document.getElementById("btn_genimg").classList.remove("hidden"); document.getElementById("btn_genimg").classList.remove("hidden");
@ -3483,7 +3485,7 @@ Current version: 73
//0 is none, 1 is pseudostreaming, 2 is true streaming //0 is none, 1 is pseudostreaming, 2 is true streaming
function determine_streaming_type() function determine_streaming_type()
{ {
let streamtype = (document.getElementById("tokenstreaming").checked ? 1 : 0); let streamtype = (localsettings.tokenstreaming ? 1 : 0);
let pstreamamount = urlParams.get('streamamount'); let pstreamamount = urlParams.get('streamamount');
if(streamtype==1 && is_using_kcpp_with_streaming() && (pstreamamount == null || pstreamamount <= 0)) if(streamtype==1 && is_using_kcpp_with_streaming() && (pstreamamount == null || pstreamamount <= 0))
{ {
@ -3497,6 +3499,27 @@ Current version: 73
return streamtype; return streamtype;
} }
function determine_if_ban_eos(input_was_empty) {
if (localsettings.eos_ban_mode == 0) {
if (localsettings.opmode == 1) {
return true; //story mode always ban
}
else if (localsettings.opmode == 3 && !localsettings.allow_continue_chat) {
return false; //chat mode always unban unless cont allowed
}
else if (!input_was_empty) //if user input is not empty, ALWAYS unban EOS.
{
return false;
}
else {
return last_reply_was_empty;
}
}
else {
return (localsettings.eos_ban_mode == 2 ? true : false);
}
}
function is_using_web_lite() function is_using_web_lite()
{ {
return (window.location.hostname.includes("koboldai.net") || window.location.hostname.includes("kaihordewebui.github.io")); return (window.location.hostname.includes("koboldai.net") || window.location.hostname.includes("kaihordewebui.github.io"));
@ -4703,6 +4726,15 @@ Current version: 73
} }
} }
} }
function format_uptime(seconds)
{
const days = Math.floor(seconds / (3600 * 24));
const hours = Math.floor((seconds % (3600 * 24)) / 3600);
const minutes = Math.floor((seconds % 3600) / 60);
return days+"d "+hours+"h "+minutes+"m";
}
function show_workers() { function show_workers() {
document.getElementById("workercontainer").classList.remove("hidden"); document.getElementById("workercontainer").classList.remove("hidden");
@ -4729,7 +4761,7 @@ Current version: 73
allmdls += escapeHtml(elem.models[n].substring(0, 32)); allmdls += escapeHtml(elem.models[n].substring(0, 32));
} }
str += "<tr id='workertablerow_"+i+"'><td>" + workerNameHtml + "</td><td>" + allmdls + "</td><td>" + elem.max_length + " / " + elem.max_context_length + "<br>(" + tokenspersec + " T/s)</td><td "+brokenstyle+">" + elem.uptime + "<br>(" + elem.requests_fulfilled + " jobs)</td><td "+style+">" + elem.kudos_rewards.toFixed(0) + "</td><td>"+clustertag+"</td></tr>"; str += "<tr id='workertablerow_"+i+"'><td>" + workerNameHtml + "</td><td>" + allmdls + "</td><td>" + elem.max_length + " / " + elem.max_context_length + "<br>(" + tokenspersec + " T/s)</td><td "+brokenstyle+">" + format_uptime(elem.uptime) + "<br>(" + elem.requests_fulfilled + " jobs)</td><td "+style+">" + elem.kudos_rewards.toFixed(0) + "</td><td>"+clustertag+"</td></tr>";
} }
document.getElementById("workertable").innerHTML = str; document.getElementById("workertable").innerHTML = str;
document.getElementById("worktitlecount").innerText = "Worker List - Total " + worker_data_showonly.length; document.getElementById("worktitlecount").innerText = "Worker List - Total " + worker_data_showonly.length;
@ -4757,7 +4789,7 @@ Current version: 73
let brokenstyle = (elem.maintenance_mode ? "style=\"color:#ee4444;\"" : ""); let brokenstyle = (elem.maintenance_mode ? "style=\"color:#ee4444;\"" : "");
let workerNameHtml = escapeHtml(elem.name.substring(0, 32)); let workerNameHtml = escapeHtml(elem.name.substring(0, 32));
let eleminfo = ((elem.info && elem.info!="")?elem.info:""); let eleminfo = ((elem.info && elem.info!="")?elem.info:"");
str += "<tr><td>" + workerNameHtml + "</td><td><input class='' style='color:#000000;' id='mwc_desc_"+i+"' placeholder='Worker Description' value='"+eleminfo+"''></td><td "+brokenstyle+">" + elem.uptime + "<br>(" + elem.requests_fulfilled + " jobs)</td><td "+style+">" + elem.kudos_rewards.toFixed(0) + "</td><td>"+(elem.online?"Online":"Offline")+"</td><td><input type='checkbox' id='mwc_maint_"+i+"' "+(elem.maintenance_mode?"checked":"")+"></td></tr>"; str += "<tr><td>" + workerNameHtml + "</td><td><input class='' style='color:#000000;' id='mwc_desc_"+i+"' placeholder='Worker Description' value='"+eleminfo+"''></td><td "+brokenstyle+">" + format_uptime(elem.uptime) + "<br>(" + elem.requests_fulfilled + " jobs)</td><td "+style+">" + elem.kudos_rewards.toFixed(0) + "</td><td>"+(elem.online?"Online":"Offline")+"</td><td><input type='checkbox' id='mwc_maint_"+i+"' "+(elem.maintenance_mode?"checked":"")+"></td></tr>";
} }
document.getElementById("myownworkertable").innerHTML = str; document.getElementById("myownworkertable").innerHTML = str;
@ -5819,7 +5851,7 @@ Current version: 73
document.getElementById("invert_colors").checked = localsettings.invert_colors; document.getElementById("invert_colors").checked = localsettings.invert_colors;
document.getElementById("trimsentences").checked = localsettings.trimsentences; document.getElementById("trimsentences").checked = localsettings.trimsentences;
document.getElementById("trimwhitespace").checked = localsettings.trimwhitespace; document.getElementById("trimwhitespace").checked = localsettings.trimwhitespace;
document.getElementById("unban_tokens").checked = localsettings.unban_tokens; document.getElementById("eos_ban_mode").value = localsettings.eos_ban_mode;
document.getElementById("persist_session").checked = localsettings.persist_session; document.getElementById("persist_session").checked = localsettings.persist_session;
document.getElementById("opmode").value = localsettings.opmode; document.getElementById("opmode").value = localsettings.opmode;
document.getElementById("chatname").value = localsettings.chatname; document.getElementById("chatname").value = localsettings.chatname;
@ -5912,6 +5944,7 @@ Current version: 73
sdmodelshtml += "<option value=\"" + stablemodels[i].name + " (" + stablemodels[i].count + ")\">"; sdmodelshtml += "<option value=\"" + stablemodels[i].name + " (" + stablemodels[i].count + ")\">";
} }
document.getElementById("sdmodels").innerHTML = sdmodelshtml; document.getElementById("sdmodels").innerHTML = sdmodelshtml;
document.getElementById("tokenstreaming").checked = localsettings.tokenstreaming;
document.getElementById("img_autogen").checked = localsettings.img_autogen; document.getElementById("img_autogen").checked = localsettings.img_autogen;
document.getElementById("save_images").checked = localsettings.save_images; document.getElementById("save_images").checked = localsettings.save_images;
document.getElementById("prompt_for_savename").checked = localsettings.prompt_for_savename; document.getElementById("prompt_for_savename").checked = localsettings.prompt_for_savename;
@ -6026,7 +6059,7 @@ Current version: 73
localsettings.invert_colors = (document.getElementById("invert_colors").checked ? true : false); localsettings.invert_colors = (document.getElementById("invert_colors").checked ? true : false);
localsettings.trimsentences = (document.getElementById("trimsentences").checked ? true : false); localsettings.trimsentences = (document.getElementById("trimsentences").checked ? true : false);
localsettings.trimwhitespace = (document.getElementById("trimwhitespace").checked ? true : false); localsettings.trimwhitespace = (document.getElementById("trimwhitespace").checked ? true : false);
localsettings.unban_tokens = (document.getElementById("unban_tokens").checked ? true : false); localsettings.eos_ban_mode = document.getElementById("eos_ban_mode").value;
localsettings.persist_session = (document.getElementById("persist_session").checked ? true : false); localsettings.persist_session = (document.getElementById("persist_session").checked ? true : false);
if(document.getElementById("opmode").value==3) if(document.getElementById("opmode").value==3)
{ {
@ -6073,6 +6106,7 @@ Current version: 73
localsettings.image_styles = pendingstyle; localsettings.image_styles = pendingstyle;
localsettings.grammar = pendinggrammar; localsettings.grammar = pendinggrammar;
localsettings.tokenstreaming = (document.getElementById("tokenstreaming").checked ? true : false);
localsettings.img_autogen = (document.getElementById("img_autogen").checked ? true : false); localsettings.img_autogen = (document.getElementById("img_autogen").checked ? true : false);
localsettings.save_images = (document.getElementById("save_images").checked ? true : false); localsettings.save_images = (document.getElementById("save_images").checked ? true : false);
localsettings.prompt_for_savename = (document.getElementById("prompt_for_savename").checked ? true : false); localsettings.prompt_for_savename = (document.getElementById("prompt_for_savename").checked ? true : false);
@ -6317,7 +6351,7 @@ Current version: 73
//v2 api specific fields //v2 api specific fields
submit_payload.workers = selected_workers.map((m) => { return m.id }); submit_payload.workers = selected_workers.map((m) => { return m.id });
dispatch_submit_generation(submit_payload); dispatch_submit_generation(submit_payload,false);
render_gametext(); render_gametext();
document.getElementById("memorytext").value = "[<|Generating summary, do not close window...|>]" document.getElementById("memorytext").value = "[<|Generating summary, do not close window...|>]"
}; };
@ -6403,6 +6437,7 @@ Current version: 73
synchro_polled_response = null; synchro_polled_response = null;
synchro_pending_stream = ""; synchro_pending_stream = "";
waiting_for_autosummary = false; waiting_for_autosummary = false;
last_reply_was_empty = false;
current_memory = ""; current_memory = "";
current_anote = ""; current_anote = "";
current_wi = []; current_wi = [];
@ -6640,9 +6675,10 @@ Current version: 73
function submit_generation() { function submit_generation() {
let newgen = document.getElementById("input_text").value; let newgen = document.getElementById("input_text").value;
const user_input_empty = (newgen.trim()=="");
let doNotGenerate = false; let doNotGenerate = false;
if (newgen.trim() != "" || gametext_arr.length > 0 || current_memory != "" || current_anote != "") if (!user_input_empty || gametext_arr.length > 0 || current_memory != "" || current_anote != "")
{ {
waiting_for_autosummary = false; waiting_for_autosummary = false;
idle_timer = 0; idle_timer = 0;
@ -7045,7 +7081,7 @@ Current version: 73
if (!doNotGenerate) if (!doNotGenerate)
{ {
dispatch_submit_generation(submit_payload); dispatch_submit_generation(submit_payload, user_input_empty);
} }
else else
{ {
@ -7056,7 +7092,7 @@ Current version: 73
} }
} }
function dispatch_submit_generation(submit_payload) function dispatch_submit_generation(submit_payload, input_was_empty) //if input is not empty, always unban eos
{ {
console.log(submit_payload); console.log(submit_payload);
last_request_str = JSON.stringify(submit_payload); last_request_str = JSON.stringify(submit_payload);
@ -7132,7 +7168,7 @@ Current version: 73
//version 1.2.4 and later supports unban tokens //version 1.2.4 and later supports unban tokens
if (kobold_endpoint_version && kobold_endpoint_version != "" && compare_version_str(kobold_endpoint_version, "1.2.3") > 0) if (kobold_endpoint_version && kobold_endpoint_version != "" && compare_version_str(kobold_endpoint_version, "1.2.3") > 0)
{ {
submit_payload.use_default_badwordsids = (localsettings.unban_tokens?false:true); submit_payload.use_default_badwordsids = determine_if_ban_eos(input_was_empty);
} }
let pseudostreaming = (determine_streaming_type()==1); let pseudostreaming = (determine_streaming_type()==1);
@ -7411,7 +7447,7 @@ Current version: 73
} }
//horde supports unban tokens //horde supports unban tokens
submit_payload.use_default_badwordsids = (localsettings.unban_tokens?false:true); submit_payload.use_default_badwordsids = determine_if_ban_eos(input_was_empty);
fetch(selectedhorde.submit_endpoint, { fetch(selectedhorde.submit_endpoint, {
method: 'POST', // or 'PUT' method: 'POST', // or 'PUT'
@ -7632,7 +7668,6 @@ Current version: 73
} }
function handle_incoming_text(gentxt, genworker, genmdl, genkudos) { function handle_incoming_text(gentxt, genworker, genmdl, genkudos) {
//handle stopping tokens if they got missed (eg. horde) //handle stopping tokens if they got missed (eg. horde)
gentxt = trim_extra_stop_seqs(gentxt,true); gentxt = trim_extra_stop_seqs(gentxt,true);
@ -7984,6 +8019,7 @@ Current version: 73
pending_response_id = ""; pending_response_id = "";
poll_in_progress = false; poll_in_progress = false;
let resp = synchro_polled_response; let resp = synchro_polled_response;
last_reply_was_empty = (resp=="" || resp.trim()=="");
if (resp != null && resp != "") { if (resp != null && resp != "") {
let gentxt = resp; let gentxt = resp;
let genworker = "Custom Endpoint"; let genworker = "Custom Endpoint";
@ -8051,6 +8087,7 @@ Current version: 73
handle_incoming_autosummary(gentxt); handle_incoming_autosummary(gentxt);
} }
else { else {
last_reply_was_empty = (gentxt=="" || gentxt.trim()=="");
handle_incoming_text(gentxt, genworker, genmdl, genkudos); handle_incoming_text(gentxt, genworker, genmdl, genkudos);
} }
} }
@ -8138,6 +8175,7 @@ Current version: 73
if (oldInnerText != edited) { if (oldInnerText != edited) {
gametext_arr = []; gametext_arr = [];
redo_arr = []; redo_arr = [];
last_reply_was_empty = false;
retry_prev_text = ""; retry_prev_text = "";
redo_prev_text = ""; redo_prev_text = "";
@ -8991,7 +9029,7 @@ Current version: 73
console.log("Clear story"); console.log("Clear story");
if (pending_response_id == "" && gametext_arr.length > 0) { if (pending_response_id == "" && gametext_arr.length > 0) {
last_reply_was_empty = false;
while(gametext_arr.length > 0) while(gametext_arr.length > 0)
{ {
if(retry_prev_text!="") if(retry_prev_text!="")
@ -9017,6 +9055,7 @@ Current version: 73
} }
function btn_back() { function btn_back() {
if (pending_response_id == "" && gametext_arr.length > 0) { if (pending_response_id == "" && gametext_arr.length > 0) {
last_reply_was_empty = false;
if(retry_prev_text!="") if(retry_prev_text!="")
{ {
redo_prev_text = gametext_arr.pop(); redo_prev_text = gametext_arr.pop();
@ -9039,6 +9078,7 @@ Current version: 73
console.log("Redo All story"); console.log("Redo All story");
if (pending_response_id == "" && redo_arr.length > 0) { if (pending_response_id == "" && redo_arr.length > 0) {
last_reply_was_empty = false;
while(redo_arr.length > 0) while(redo_arr.length > 0)
{ {
retry_prev_text = ""; retry_prev_text = "";
@ -9058,11 +9098,13 @@ Current version: 73
function btn_redo() { function btn_redo() {
if (pending_response_id == "") { if (pending_response_id == "") {
if (redo_arr.length > 0) { if (redo_arr.length > 0) {
last_reply_was_empty = false;
retry_prev_text = ""; retry_prev_text = "";
let popped = redo_arr.pop(); let popped = redo_arr.pop();
gametext_arr.push(popped); gametext_arr.push(popped);
render_gametext(); render_gametext();
}else if (redo_prev_text != "") { }else if (redo_prev_text != "") {
last_reply_was_empty = false;
retry_prev_text = gametext_arr.pop(); retry_prev_text = gametext_arr.pop();
gametext_arr.push(redo_prev_text); gametext_arr.push(redo_prev_text);
redo_prev_text = ""; redo_prev_text = "";
@ -9074,6 +9116,7 @@ Current version: 73
function btn_retry() { function btn_retry() {
if (pending_response_id == "" && (gametext_arr.length > 1 || if (pending_response_id == "" && (gametext_arr.length > 1 ||
(gametext_arr.length > 0 && (current_memory != "" || current_anote != "")))) { (gametext_arr.length > 0 && (current_memory != "" || current_anote != "")))) {
last_reply_was_empty = false;
let boxtextstash = document.getElementById("input_text").value; let boxtextstash = document.getElementById("input_text").value;
document.getElementById("input_text").value = ""; document.getElementById("input_text").value = "";
let temp = gametext_arr[gametext_arr.length-1]; let temp = gametext_arr[gametext_arr.length-1];
@ -10105,7 +10148,7 @@ Current version: 73
</table> </table>
<div class="settinglabel"> <div class="settinglabel">
<div class="justifyleft settingsmall" title="Whether to allow multiple lines in AI responses. Not recommended.">Multiline Replies </div> <div class="justifyleft settingsmall" title="Whether to allow multiple lines in AI responses.">Multiline Replies </div>
<input type="checkbox" id="multiline_replies" style="margin:0px 0 0;"> <input type="checkbox" id="multiline_replies" style="margin:0px 0 0;">
</div> </div>
<div class="settinglabel"> <div class="settinglabel">
@ -10316,9 +10359,13 @@ Current version: 73
<input type="checkbox" id="trimwhitespace" style="margin:0px 0px 0px auto;"> <input type="checkbox" id="trimwhitespace" style="margin:0px 0px 0px auto;">
</div> </div>
<div class="settinglabel"> <div class="settinglabel">
<div class="justifyleft settingsmall" title="">Unban EOS Tokens <span class="helpicon">?<span <div class="justifyleft settingsmall" title="">EOS Token Ban <span class="helpicon">?<span
class="helptext">Allow the End-Of-Stream (EOS) token and potentially other restricted special tokens to be generated.</span></span></div> class="helptext">Allow the End-Of-Stream (EOS) token and potentially other restricted special tokens to be generated.</span></span></div>
<input type="checkbox" id="unban_tokens" style="margin:0px 0px 0px auto;"> <select style="padding:1px; height:auto; width: 34px; appearance: none; font-size: 7pt; margin:0px 0px 0px auto;" class="form-control" id="eos_ban_mode">
<option value="0">Auto</option>
<option value="1">Unban</option>
<option value="2">Ban</option>
</select>
</div> </div>
<div class="settinglabel"> <div class="settinglabel">
<div class="justifyleft settingsmall">Placeholder Tags <span class="helpicon">?<span <div class="justifyleft settingsmall">Placeholder Tags <span class="helpicon">?<span

View file

@ -1,9 +1,12 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
#-*- coding: utf-8 -*- #-*- coding: utf-8 -*-
# A hacky little script from Concedo that exposes llama.cpp function bindings # KoboldCpp is an easy-to-use AI text-generation software for GGML models.
# allowing it to be used via a simulated kobold api endpoint # It's a single self contained distributable from Concedo, that builds off llama.cpp,
# generation delay scales linearly with original prompt length. # and adds a versatile Kobold API endpoint, additional format support,
# backward compatibility, as well as a fancy UI with persistent stories,
# editing tools, save formats, memory, world info, author's note, characters,
# scenarios and everything Kobold and Kobold Lite have to offer.
import ctypes import ctypes
import os import os
@ -364,7 +367,7 @@ maxhordelen = 256
modelbusy = threading.Lock() modelbusy = threading.Lock()
requestsinqueue = 0 requestsinqueue = 0
defaultport = 5001 defaultport = 5001
KcppVersion = "1.45" KcppVersion = "1.45.1"
showdebug = True showdebug = True
showsamplerwarning = True showsamplerwarning = True
showmaxctxwarning = True showmaxctxwarning = True
@ -1948,6 +1951,17 @@ def main(launch_args,start_server=True):
timer_thread = threading.Timer(1, onready_subprocess) #1 second delay timer_thread = threading.Timer(1, onready_subprocess) #1 second delay
timer_thread.start() timer_thread.start()
# show deprecation warnings
if args.unbantokens:
print("WARNING: --unbantokens is DEPRECATED and will be removed soon! EOS unbans should now be set via the generate API.")
if args.usemirostat:
print("WARNING: --usemirostat is DEPRECATED and will be removed soon! Mirostat values should now be set via the generate API.")
if args.stream:
print("WARNING: --stream is DEPRECATED and will be removed soon! This was a Kobold Lite only parameter, which is now a proper setting toggle inside Lite.")
if args.psutil_set_threads:
print("WARNING: --psutil_set_threads is DEPRECATED and will be removed soon! This parameter was generally unhelpful and unnecessary, as the defaults were usually sufficient")
if start_server: if start_server:
print(f"Please connect to custom endpoint at {epurl}") print(f"Please connect to custom endpoint at {epurl}")
asyncio.run(RunServerMultiThreaded(args.host, args.port, embedded_kailite)) asyncio.run(RunServerMultiThreaded(args.host, args.port, embedded_kailite))
@ -1974,22 +1988,18 @@ if __name__ == '__main__':
default_threads = (physical_core_limit if physical_core_limit<=3 else max(3,physical_core_limit-1)) default_threads = (physical_core_limit if physical_core_limit<=3 else max(3,physical_core_limit-1))
parser.add_argument("--threads", help="Use a custom number of threads if specified. Otherwise, uses an amount based on CPU cores", type=int, default=default_threads) parser.add_argument("--threads", help="Use a custom number of threads if specified. Otherwise, uses an amount based on CPU cores", type=int, default=default_threads)
parser.add_argument("--blasthreads", help="Use a different number of threads during BLAS if specified. Otherwise, has the same value as --threads",metavar=('[threads]'), type=int, default=0) parser.add_argument("--blasthreads", help="Use a different number of threads during BLAS if specified. Otherwise, has the same value as --threads",metavar=('[threads]'), type=int, default=0)
parser.add_argument("--psutil_set_threads", help="Experimental flag. If set, uses psutils to determine thread count based on physical cores.", action='store_true')
parser.add_argument("--highpriority", help="Experimental flag. If set, increases the process CPU priority, potentially speeding up generation. Use caution.", action='store_true') parser.add_argument("--highpriority", help="Experimental flag. If set, increases the process CPU priority, potentially speeding up generation. Use caution.", action='store_true')
parser.add_argument("--contextsize", help="Controls the memory allocated for maximum context size, only change if you need more RAM for big contexts. (default 2048)", type=int,choices=[512,1024,2048,3072,4096,6144,8192,12288,16384,24576,32768], default=2048) parser.add_argument("--contextsize", help="Controls the memory allocated for maximum context size, only change if you need more RAM for big contexts. (default 2048)", type=int,choices=[512,1024,2048,3072,4096,6144,8192,12288,16384,24576,32768], default=2048)
parser.add_argument("--blasbatchsize", help="Sets the batch size used in BLAS processing (default 512). Setting it to -1 disables BLAS mode, but keeps other benefits like GPU offload.", type=int,choices=[-1,32,64,128,256,512,1024,2048], default=512) parser.add_argument("--blasbatchsize", help="Sets the batch size used in BLAS processing (default 512). Setting it to -1 disables BLAS mode, but keeps other benefits like GPU offload.", type=int,choices=[-1,32,64,128,256,512,1024,2048], default=512)
parser.add_argument("--ropeconfig", help="If set, uses customized RoPE scaling from configured frequency scale and frequency base (e.g. --ropeconfig 0.25 10000). Otherwise, uses NTK-Aware scaling set automatically based on context size. For linear rope, simply set the freq-scale and ignore the freq-base",metavar=('[rope-freq-scale]', '[rope-freq-base]'), default=[0.0, 10000.0], type=float, nargs='+') parser.add_argument("--ropeconfig", help="If set, uses customized RoPE scaling from configured frequency scale and frequency base (e.g. --ropeconfig 0.25 10000). Otherwise, uses NTK-Aware scaling set automatically based on context size. For linear rope, simply set the freq-scale and ignore the freq-base",metavar=('[rope-freq-scale]', '[rope-freq-base]'), default=[0.0, 10000.0], type=float, nargs='+')
parser.add_argument("--stream", help="Uses streaming when generating tokens. Only for the Kobold Lite UI.", action='store_true')
parser.add_argument("--smartcontext", help="Reserving a portion of context to try processing less frequently.", action='store_true') parser.add_argument("--smartcontext", help="Reserving a portion of context to try processing less frequently.", action='store_true')
parser.add_argument("--unbantokens", help="Normally, KoboldAI prevents the EOS token from being generated. This flag unbans it.", action='store_true')
parser.add_argument("--bantokens", help="You can manually specify a list of token SUBSTRINGS that the AI cannot use. This bans ALL instances of that substring.", metavar=('[token_substrings]'), nargs='+') parser.add_argument("--bantokens", help="You can manually specify a list of token SUBSTRINGS that the AI cannot use. This bans ALL instances of that substring.", metavar=('[token_substrings]'), nargs='+')
parser.add_argument("--usemirostat", help="Experimental! Replaces your samplers with mirostat. Takes 3 params = [type(0/1/2), tau(5.0), eta(0.1)].",metavar=('[type]', '[tau]', '[eta]'), type=float, nargs=3)
parser.add_argument("--forceversion", help="If the model file format detection fails (e.g. rogue modified model) you can set this to override the detected format (enter desired version, e.g. 401 for GPTNeoX-Type2).",metavar=('[version]'), type=int, default=0) parser.add_argument("--forceversion", help="If the model file format detection fails (e.g. rogue modified model) you can set this to override the detected format (enter desired version, e.g. 401 for GPTNeoX-Type2).",metavar=('[version]'), type=int, default=0)
parser.add_argument("--nommap", help="If set, do not use mmap to load newer models", action='store_true') parser.add_argument("--nommap", help="If set, do not use mmap to load newer models", action='store_true')
parser.add_argument("--usemlock", help="For Apple Systems. Force system to keep model in RAM rather than swapping or compressing", action='store_true') parser.add_argument("--usemlock", help="For Apple Systems. Force system to keep model in RAM rather than swapping or compressing", action='store_true')
parser.add_argument("--noavx2", help="Do not use AVX2 instructions, a slower compatibility mode for older devices. Does not work with --clblast.", action='store_true') parser.add_argument("--noavx2", help="Do not use AVX2 instructions, a slower compatibility mode for older devices. Does not work with --clblast.", action='store_true')
parser.add_argument("--debugmode", help="Shows additional debug info in the terminal.", action='store_const', const=1, default=0) parser.add_argument("--debugmode", help="Shows additional debug info in the terminal.", action='store_const', const=1, default=0)
parser.add_argument("--skiplauncher", help="Doesn't display or use the new GUI launcher.", action='store_true') parser.add_argument("--skiplauncher", help="Doesn't display or use the GUI launcher.", action='store_true')
parser.add_argument("--hordeconfig", help="Sets the display model name to something else, for easy use on AI Horde. Optional additional parameters set the horde max genlength, max ctxlen, API key and worker name.",metavar=('[hordemodelname]', '[hordegenlength] [hordemaxctx] [hordeapikey] [hordeworkername]'), nargs='+') parser.add_argument("--hordeconfig", help="Sets the display model name to something else, for easy use on AI Horde. Optional additional parameters set the horde max genlength, max ctxlen, API key and worker name.",metavar=('[hordemodelname]', '[hordegenlength] [hordemaxctx] [hordeapikey] [hordeworkername]'), nargs='+')
compatgroup = parser.add_mutually_exclusive_group() compatgroup = parser.add_mutually_exclusive_group()
compatgroup.add_argument("--noblas", help="Do not use OpenBLAS for accelerated prompt ingestion", action='store_true') compatgroup.add_argument("--noblas", help="Do not use OpenBLAS for accelerated prompt ingestion", action='store_true')
@ -2001,5 +2011,10 @@ if __name__ == '__main__':
parser.add_argument("--multiuser", help="Runs in multiuser mode, which queues incoming requests instead of blocking them. Polled-streaming is disabled while multiple requests are in queue.", action='store_true') parser.add_argument("--multiuser", help="Runs in multiuser mode, which queues incoming requests instead of blocking them. Polled-streaming is disabled while multiple requests are in queue.", action='store_true')
parser.add_argument("--foreground", help="Windows only. Sends the terminal to the foreground every time a new prompt is generated. This helps avoid some idle slowdown issues.", action='store_true') parser.add_argument("--foreground", help="Windows only. Sends the terminal to the foreground every time a new prompt is generated. This helps avoid some idle slowdown issues.", action='store_true')
#deprecated
parser.add_argument("--psutil_set_threads", help="--psutil_set_threads is DEPRECATED and will be removed soon! This parameter was generally unhelpful and unnecessary, as the defaults were usually sufficient.", action='store_true')
parser.add_argument("--stream", help="--stream is DEPRECATED and will be removed soon! This was a Kobold Lite only parameter, which is now a proper setting toggle inside Lite.", action='store_true')
parser.add_argument("--unbantokens", help="--unbantokens is DEPRECATED and will be removed soon! EOS unbans should now be set via the generate API", action='store_true')
parser.add_argument("--usemirostat", help="--usemirostat is DEPRECATED and will be removed soon! Mirostat values should now be set via the generate API",metavar=('[type]', '[tau]', '[eta]'), type=float, nargs=3)
main(parser.parse_args(),start_server=True) main(parser.parse_args(),start_server=True)