increase context for gguf to 32k, horde worker stats, fixed glitch in horde launcher ui, oai freq penalty, updated lite

This commit is contained in:
Concedo 2023-09-28 23:50:08 +08:00
parent 6a821b268a
commit ca8b315202
3 changed files with 85 additions and 42 deletions

View file

@ -562,7 +562,17 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
blasbatchsize = 8; blasbatchsize = 8;
} }
params.memory_f16 = inputs.f16_kv; params.memory_f16 = inputs.f16_kv;
params.n_ctx = inputs.max_context_length;
auto clamped_max_context_length = inputs.max_context_length;
if(clamped_max_context_length>16384 &&
file_format != FileFormat::GGUF_LLAMA && file_format!=FileFormat::GGUF_FALCON)
{
printf("Warning: Only GGUF models can use max context above 16k. Max context lowered to 16k.\n");
clamped_max_context_length = 16384;
}
params.n_ctx = clamped_max_context_length;
neox_ctx_v2.hparams.n_ctx = neox_ctx_v3.hparams.n_ctx neox_ctx_v2.hparams.n_ctx = neox_ctx_v3.hparams.n_ctx
= gptj_ctx_v1.hparams.n_ctx = gptj_ctx_v2.hparams.n_ctx = gptj_ctx_v3.hparams.n_ctx = gptj_ctx_v1.hparams.n_ctx = gptj_ctx_v2.hparams.n_ctx = gptj_ctx_v3.hparams.n_ctx
@ -594,7 +604,8 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
float factor = file_format_meta.n_ctx_train/2048; float factor = file_format_meta.n_ctx_train/2048;
effectivenctx = effectivenctx/factor; effectivenctx = effectivenctx/factor;
} }
rope_freq_base = (effectivenctx <= 2048 ? 10000.0f : (effectivenctx <= 3072 ? 26000.0f : (effectivenctx <= 4096 ? 32000.0f : (effectivenctx <= 6144 ? 54000.0f : (effectivenctx <= 8192 ? 82684.0f : (effectivenctx <= 12288 ? 140000.0f : 200000.0f)))))); rope_freq_base = (effectivenctx <= 2048 ? 10000.0f : (effectivenctx <= 3072 ? 26000.0f : (effectivenctx <= 4096 ? 32000.0f : (effectivenctx <= 6144 ? 54000.0f :
(effectivenctx <= 8192 ? 82684.0f : (effectivenctx <= 12288 ? 140000.0f : (effectivenctx <= 16384 ? 200000.0f : (effectivenctx <= 24576 ? 320000.0f : 440000.0f))))))));
} }
@ -633,7 +644,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
//newer format has bit unshuffling //newer format has bit unshuffling
SetQuantsUnshuffled(file_format == FileFormat::GGJT_2); SetQuantsUnshuffled(file_format == FileFormat::GGJT_2);
llama_v2_context_params llama_ctx_params_v2 = llama_v2_context_default_params(); llama_v2_context_params llama_ctx_params_v2 = llama_v2_context_default_params();
llama_ctx_params_v2.n_ctx = inputs.max_context_length; llama_ctx_params_v2.n_ctx = clamped_max_context_length;
//llama_ctx_params.n_parts = -1; //llama_ctx_params.n_parts = -1;
llama_ctx_params_v2.seed = -1; llama_ctx_params_v2.seed = -1;
llama_ctx_params_v2.f16_kv = inputs.f16_kv; llama_ctx_params_v2.f16_kv = inputs.f16_kv;
@ -683,7 +694,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
else if(file_format == FileFormat::GGJT_3) else if(file_format == FileFormat::GGJT_3)
{ {
llama_v3_context_params llama_ctx_params = llama_v3_context_default_params(); llama_v3_context_params llama_ctx_params = llama_v3_context_default_params();
llama_ctx_params.n_ctx = inputs.max_context_length; llama_ctx_params.n_ctx = clamped_max_context_length;
//llama_ctx_paran_parts = -1; //llama_ctx_paran_parts = -1;
llama_ctx_params.seed = -1; llama_ctx_params.seed = -1;
llama_ctx_params.f16_kv = inputs.f16_kv; llama_ctx_params.f16_kv = inputs.f16_kv;
@ -754,7 +765,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
else if(file_format==FileFormat::GGUF_LLAMA || file_format==FileFormat::GGUF_FALCON) else if(file_format==FileFormat::GGUF_LLAMA || file_format==FileFormat::GGUF_FALCON)
{ {
llama_context_params llama_ctx_params = llama_context_default_params(); llama_context_params llama_ctx_params = llama_context_default_params();
llama_ctx_params.n_ctx = inputs.max_context_length; llama_ctx_params.n_ctx = clamped_max_context_length;
//llama_ctx_paran_parts = -1; //llama_ctx_paran_parts = -1;
llama_ctx_params.seed = -1; llama_ctx_params.seed = -1;
llama_ctx_params.f16_kv = inputs.f16_kv; llama_ctx_params.f16_kv = inputs.f16_kv;

View file

@ -6,7 +6,7 @@ It requires no dependencies, installation or setup.
Just copy this single static HTML file anywhere and open it in a browser, or from a webserver. Just copy this single static HTML file anywhere and open it in a browser, or from a webserver.
Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite. Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite.
Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp. Please do not remove this line. Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp. Please do not remove this line.
Current version: 71 Current version: 72
-Concedo -Concedo
--> -->
@ -4324,13 +4324,26 @@ Current version: 71
//load contexts //load contexts
gametext_arr = []; gametext_arr = [];
if (temp_scenario.prompt != "") { if (temp_scenario.prompt != "") {
gametext_arr.push(temp_scenario.prompt); let prompttxt = temp_scenario.prompt;
if(!localsettings.placeholder_tags) //do a one-time replace instead
{
prompttxt = replace_placeholders_direct(prompttxt);
}
gametext_arr.push(prompttxt);
} }
if (temp_scenario.authorsnote != "") { if (temp_scenario.authorsnote != "") {
current_anote = temp_scenario.authorsnote; current_anote = temp_scenario.authorsnote;
if(!localsettings.placeholder_tags)
{
current_anote = replace_placeholders_direct(current_anote);
}
} }
if (temp_scenario.memory != "") { if (temp_scenario.memory != "") {
current_memory = temp_scenario.memory; current_memory = temp_scenario.memory;
if(!localsettings.placeholder_tags)
{
current_memory = replace_placeholders_direct(current_memory);
}
} }
if (temp_scenario.worldinfo && temp_scenario.worldinfo.length > 0) { if (temp_scenario.worldinfo && temp_scenario.worldinfo.length > 0) {
current_wi = []; current_wi = [];
@ -4396,8 +4409,6 @@ Current version: 71
if (temp_scenario.instruct_endtag) { localsettings.instruct_endtag = temp_scenario.instruct_endtag; } if (temp_scenario.instruct_endtag) { localsettings.instruct_endtag = temp_scenario.instruct_endtag; }
} }
render_gametext(); render_gametext();
} }
function togglescenarioallownsfw() function togglescenarioallownsfw()
@ -6385,10 +6396,7 @@ Current version: 71
render_gametext(); render_gametext();
} }
function replace_placeholders(inputtxt) function replace_placeholders_direct(inputtxt)
{
//only do this for chat and instruct modes
if(localsettings.placeholder_tags)
{ {
inputtxt = replaceAll(inputtxt,"{{user}}",localsettings.chatname?localsettings.chatname:"You",true); inputtxt = replaceAll(inputtxt,"{{user}}",localsettings.chatname?localsettings.chatname:"You",true);
inputtxt = replaceAll(inputtxt,"{{char}}",localsettings.chatopponent?localsettings.chatopponent:defaultchatopponent,true); inputtxt = replaceAll(inputtxt,"{{char}}",localsettings.chatopponent?localsettings.chatopponent:defaultchatopponent,true);
@ -6397,6 +6405,14 @@ Current version: 71
//failsafe to handle removing newline tags //failsafe to handle removing newline tags
inputtxt = replaceAll(inputtxt,instructstartplaceholder.trim(),get_instruct_starttag(false)); inputtxt = replaceAll(inputtxt,instructstartplaceholder.trim(),get_instruct_starttag(false));
inputtxt = replaceAll(inputtxt,instructendplaceholder.trim(),get_instruct_endtag(false)); inputtxt = replaceAll(inputtxt,instructendplaceholder.trim(),get_instruct_endtag(false));
return inputtxt;
}
function replace_placeholders(inputtxt)
{
//only do this for chat and instruct modes
if(localsettings.placeholder_tags)
{
inputtxt = replace_placeholders_direct(inputtxt);
} }
return inputtxt; return inputtxt;
} }
@ -10184,13 +10200,14 @@ Current version: 71
<div class="settingitem"> <div class="settingitem">
<div class="settinglabel"> <div class="settinglabel">
<div class="justifyleft settingsmall" id="tokenstreaminglabel" title="Attempts to use token streaming if available.">Token Streaming </div> <div class="justifyleft settingsmall" id="tokenstreaminglabel" title="">Token Streaming <span class="helpicon">?<span
<input type="checkbox" id="tokenstreaming" style="margin:0px 0 0;"> class="helptext">Attempts to use token streaming if supported. Not available on Horde.</span></span></div>
<input type="checkbox" id="tokenstreaming" style="margin:0px 0px 0px auto;">
</div> </div>
<div id="idlesection" class="settinglabel"> <div id="idlesection" class="settinglabel">
<div class="justifyleft settingsmall" title="Allow the AI to send more responses if you are idle.">Idle Responses&nbsp;</div> <div class="justifyleft settingsmall" title="Allow the AI to send more responses if you are idle.">Idle Responses&nbsp;</div>
<select style="padding:1px; height:auto; width: 27px; appearance: none; font-size: 7pt;" class="form-control" id="idle_responses"> <select style="padding:1px; height:auto; width: 27px; appearance: none; font-size: 7pt; margin:0px 0px 0px auto;" class="form-control" id="idle_responses">
<option value="0">Off</option> <option value="0">Off</option>
<option value="1">1x</option> <option value="1">1x</option>
<option value="2">2x</option> <option value="2">2x</option>
@ -10210,21 +10227,24 @@ Current version: 71
<div class="settinglabel"> <div class="settinglabel">
<div class="justifyleft settingsmall" title="Trims incomplete sentences in AI output">Trim Sentences </div> <div class="justifyleft settingsmall" title="">Trim Sentences <span class="helpicon">?<span
<input type="checkbox" id="trimsentences" style="margin:0px 0 0;"> class="helptext">Trims incomplete sentences in AI output.</span></span></div>
<input type="checkbox" id="trimsentences" style="margin:0px 0px 0px auto;">
</div> </div>
<div class="settinglabel"> <div class="settinglabel">
<div class="justifyleft settingsmall" title="Trim trailing whitespace at the end of context">Trim Whitespace </div> <div class="justifyleft settingsmall" title="">Trim Whitespace <span class="helpicon">?<span
<input type="checkbox" id="trimwhitespace" style="margin:0px 0 0;"> class="helptext">Compresses double newlines and removes trailing whitespace in AI output.</span></span></div>
<input type="checkbox" id="trimwhitespace" style="margin:0px 0px 0px auto;">
</div> </div>
<div class="settinglabel"> <div class="settinglabel">
<div class="justifyleft settingsmall" title="Allow the EOS token and others to be used">Unban EOS Tokens </div> <div class="justifyleft settingsmall" title="">Unban EOS Tokens <span class="helpicon">?<span
<input type="checkbox" id="unban_tokens" style="margin:0px 0 0;"> class="helptext">Allow the End-Of-Stream (EOS) token and potentially other restricted special tokens to be generated.</span></span></div>
<input type="checkbox" id="unban_tokens" style="margin:0px 0px 0px auto;">
</div> </div>
<div class="settinglabel"> <div class="settinglabel">
<div class="justifyleft settingsmall">Placeholder Tags <span class="helpicon">?<span <div class="justifyleft settingsmall">Placeholder Tags <span class="helpicon">?<span
class="helptext">If enabled, uses universal {{user}} and {{[INPUT]}} placeholders that get swapped on submit. If disabled, uses plaintext chat or instruct tags verbatim.</span></span></div> class="helptext">If enabled, uses universal {{user}} and {{[INPUT]}} placeholders that get swapped on submit. If disabled, uses plaintext chat or instruct tags verbatim.</span></span></div>
<input type="checkbox" id="placeholder_tags" style="margin:0px 0 0;"> <input type="checkbox" id="placeholder_tags" style="margin:0px 0px 0px auto;">
</div> </div>
</div> </div>
@ -10232,23 +10252,23 @@ Current version: 71
<div class="settingitem"> <div class="settingitem">
<div class="settinglabel"> <div class="settinglabel">
<div class="justifyleft settingsmall" title="Autosaves your current story and settings on exit, reloads when you return">Persist Autosave Session </div> <div class="justifyleft settingsmall" title="Autosaves your current story and settings on exit, reloads when you return">Persist Autosave Session </div>
<input type="checkbox" id="persist_session" style="margin:0px 0 0;"> <input type="checkbox" id="persist_session" style="margin:0px 0px 0px auto;">
</div> </div>
<div class="settinglabel"> <div class="settinglabel">
<div class="justifyleft settingsmall" title="Includes your current settings when saving or sharing your story">Save File Incl. Settings </div> <div class="justifyleft settingsmall" title="Includes your current settings when saving or sharing your story">Save File Incl. Settings </div>
<input type="checkbox" id="export_settings" style="margin:0px 0 0;"> <input type="checkbox" id="export_settings" style="margin:0px 0px 0px auto;">
</div> </div>
<div class="settinglabel"> <div class="settinglabel">
<div class="justifyleft settingsmall" title="Prompts to input a different filename when saving file.">Show Rename Save File </div> <div class="justifyleft settingsmall" title="Prompts to input a different filename when saving file.">Show Rename Save File </div>
<input type="checkbox" id="prompt_for_savename" style="margin:0px 0 0;"> <input type="checkbox" id="prompt_for_savename" style="margin:0px 0px 0px auto;">
</div> </div>
<div class="settinglabel"> <div class="settinglabel">
<div class="justifyleft settingsmall" title="Automatically scrolls the text window down when new text is generated">Autoscroll Text </div> <div class="justifyleft settingsmall" title="Automatically scrolls the text window down when new text is generated">Autoscroll Text </div>
<input type="checkbox" id="autoscroll" style="margin:0px 0 0;"> <input type="checkbox" id="autoscroll" style="margin:0px 0px 0px auto;">
</div> </div>
<div class="settinglabel"> <div class="settinglabel">
<div class="justifyleft settingsmall" title="Inverts all colors, simple light mode">Inverted Colors </div> <div class="justifyleft settingsmall" title="Inverts all colors, simple light mode">Inverted Colors </div>
<input type="checkbox" id="invert_colors" style="margin:0px 0 0;"> <input type="checkbox" id="invert_colors" style="margin:0px 0px 0px auto;">
</div> </div>
</div> </div>

View file

@ -395,7 +395,8 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
genparams["top_k"] = int(genparams.get('top_k', 120)) genparams["top_k"] = int(genparams.get('top_k', 120))
genparams["max_length"]=genparams.get('max', 50) genparams["max_length"]=genparams.get('max', 50)
elif api_format==3: elif api_format==3:
scaled_rep_pen = genparams.get('presence_penalty', 0.1) + 1 frqp = genparams.get('frequency_penalty', 0.1)
scaled_rep_pen = genparams.get('presence_penalty', frqp) + 1
genparams["max_length"] = genparams.get('max_tokens', 50) genparams["max_length"] = genparams.get('max_tokens', 50)
genparams["rep_pen"] = scaled_rep_pen genparams["rep_pen"] = scaled_rep_pen
@ -832,7 +833,7 @@ def show_new_gui():
# slider data # slider data
blasbatchsize_values = ["-1", "32", "64", "128", "256", "512", "1024", "2048"] blasbatchsize_values = ["-1", "32", "64", "128", "256", "512", "1024", "2048"]
blasbatchsize_text = ["Don't Batch BLAS","32","64","128","256","512","1024","2048"] blasbatchsize_text = ["Don't Batch BLAS","32","64","128","256","512","1024","2048"]
contextsize_text = ["512", "1024", "2048", "3072", "4096", "6144", "8192", "12288", "16384"] contextsize_text = ["512", "1024", "2048", "3072", "4096", "6144", "8192", "12288", "16384", "24576", "32768"]
runopts = [opt for lib, opt in lib_option_pairs if file_exists(lib)] runopts = [opt for lib, opt in lib_option_pairs if file_exists(lib)]
antirunopts = [opt.replace("Use ", "") for lib, opt in lib_option_pairs if not (opt in runopts)] antirunopts = [opt.replace("Use ", "") for lib, opt in lib_option_pairs if not (opt in runopts)]
if not any(runopts): if not any(runopts):
@ -1154,18 +1155,18 @@ def show_new_gui():
# horde # horde
makelabel(network_tab, "Horde:", 5).grid(pady=10) makelabel(network_tab, "Horde:", 5).grid(pady=10)
horde_name_entry, horde_name_label = makelabelentry(network_tab, "Horde Model Name:", horde_name_var, 7, 180) horde_name_entry, horde_name_label = makelabelentry(network_tab, "Horde Model Name:", horde_name_var, 10, 180)
horde_gen_entry, horde_gen_label = makelabelentry(network_tab, "Gen. Length:", horde_gen_var, 8, 50) horde_gen_entry, horde_gen_label = makelabelentry(network_tab, "Gen. Length:", horde_gen_var, 11, 50)
horde_context_entry, horde_context_label = makelabelentry(network_tab, "Max Context:",horde_context_var, 9, 50) horde_context_entry, horde_context_label = makelabelentry(network_tab, "Max Context:",horde_context_var, 12, 50)
horde_apikey_entry, horde_apikey_label = makelabelentry(network_tab, "API Key (If Embedded Worker):",horde_apikey_var, 10, 180) horde_apikey_entry, horde_apikey_label = makelabelentry(network_tab, "API Key (If Embedded Worker):",horde_apikey_var, 13, 180)
horde_workername_entry, horde_workername_label = makelabelentry(network_tab, "Horde Worker Name:",horde_workername_var, 11, 180) horde_workername_entry, horde_workername_label = makelabelentry(network_tab, "Horde Worker Name:",horde_workername_var, 14, 180)
def togglehorde(a,b,c): def togglehorde(a,b,c):
labels = [horde_name_label, horde_gen_label, horde_context_label, horde_apikey_label, horde_workername_label] labels = [horde_name_label, horde_gen_label, horde_context_label, horde_apikey_label, horde_workername_label]
for idx, item in enumerate([horde_name_entry, horde_gen_entry, horde_context_entry, horde_apikey_entry, horde_workername_entry]): for idx, item in enumerate([horde_name_entry, horde_gen_entry, horde_context_entry, horde_apikey_entry, horde_workername_entry]):
if usehorde_var.get() == 1: if usehorde_var.get() == 1:
item.grid(row=5 + idx, column = 1, padx=8, pady=1, stick="nw") item.grid(row=10 + idx, column = 1, padx=8, pady=1, stick="nw")
labels[idx].grid(row=5 + idx, padx=8, pady=1, stick="nw") labels[idx].grid(row=10 + idx, padx=8, pady=1, stick="nw")
else: else:
item.grid_forget() item.grid_forget()
labels[idx].grid_forget() labels[idx].grid_forget()
@ -1614,6 +1615,8 @@ def run_horde_worker(args, api_key, worker_name):
current_id = None current_id = None
current_payload = None current_payload = None
current_generation = None current_generation = None
session_kudos_earned = 0
session_starttime = datetime.now()
sleepy_counter = 0 #if this exceeds a value, worker becomes sleepy (slower) sleepy_counter = 0 #if this exceeds a value, worker becomes sleepy (slower)
print("===\nEmbedded Horde Worker '"+worker_name+"' Starting...\n(To use your own KAI Bridge/Scribe worker instead, don't set your API key)") print("===\nEmbedded Horde Worker '"+worker_name+"' Starting...\n(To use your own KAI Bridge/Scribe worker instead, don't set your API key)")
BRIDGE_AGENT = f"KoboldCppEmbedWorker:1:https://github.com/LostRuins/koboldcpp" BRIDGE_AGENT = f"KoboldCppEmbedWorker:1:https://github.com/LostRuins/koboldcpp"
@ -1691,7 +1694,16 @@ def run_horde_worker(args, api_key, worker_name):
exitcounter += 1 exitcounter += 1
print_with_time("Error: Job submit failed.") print_with_time("Error: Job submit failed.")
else: else:
print_with_time(f'Submitted generation to {cluster} with id {current_id} and contributed for {reply["reward"]}') reward = reply["reward"]
session_kudos_earned += reward
curtime = datetime.now()
elapsedtime=curtime-session_starttime
hrs = elapsedtime.seconds // 3600
mins = elapsedtime.seconds // 60 % 60
secs = elapsedtime.seconds % 60
elapsedtimestr = f"{hrs:03d}h:{mins:02d}m:{secs:02d}s"
earnrate = session_kudos_earned/(elapsedtime.seconds/3600)
print_with_time(f'Submitted {current_id} and earned {reward:.0f} kd - [Total:{session_kudos_earned:.0f}kd, Time:{elapsedtimestr}, EarnRate:{earnrate:.0f}kd/hr]')
else: else:
print_with_time("Error: Abandoned current job due to errors. Getting new job.") print_with_time("Error: Abandoned current job due to errors. Getting new job.")
current_id = None current_id = None
@ -1952,7 +1964,7 @@ if __name__ == '__main__':
parser.add_argument("--blasthreads", help="Use a different number of threads during BLAS if specified. Otherwise, has the same value as --threads",metavar=('[threads]'), type=int, default=0) parser.add_argument("--blasthreads", help="Use a different number of threads during BLAS if specified. Otherwise, has the same value as --threads",metavar=('[threads]'), type=int, default=0)
parser.add_argument("--psutil_set_threads", help="Experimental flag. If set, uses psutils to determine thread count based on physical cores.", action='store_true') parser.add_argument("--psutil_set_threads", help="Experimental flag. If set, uses psutils to determine thread count based on physical cores.", action='store_true')
parser.add_argument("--highpriority", help="Experimental flag. If set, increases the process CPU priority, potentially speeding up generation. Use caution.", action='store_true') parser.add_argument("--highpriority", help="Experimental flag. If set, increases the process CPU priority, potentially speeding up generation. Use caution.", action='store_true')
parser.add_argument("--contextsize", help="Controls the memory allocated for maximum context size, only change if you need more RAM for big contexts. (default 2048)", type=int,choices=[512,1024,2048,3072,4096,6144,8192,12288,16384], default=2048) parser.add_argument("--contextsize", help="Controls the memory allocated for maximum context size, only change if you need more RAM for big contexts. (default 2048)", type=int,choices=[512,1024,2048,3072,4096,6144,8192,12288,16384,24576,32768], default=2048)
parser.add_argument("--blasbatchsize", help="Sets the batch size used in BLAS processing (default 512). Setting it to -1 disables BLAS mode, but keeps other benefits like GPU offload.", type=int,choices=[-1,32,64,128,256,512,1024,2048], default=512) parser.add_argument("--blasbatchsize", help="Sets the batch size used in BLAS processing (default 512). Setting it to -1 disables BLAS mode, but keeps other benefits like GPU offload.", type=int,choices=[-1,32,64,128,256,512,1024,2048], default=512)
parser.add_argument("--ropeconfig", help="If set, uses customized RoPE scaling from configured frequency scale and frequency base (e.g. --ropeconfig 0.25 10000). Otherwise, uses NTK-Aware scaling set automatically based on context size. For linear rope, simply set the freq-scale and ignore the freq-base",metavar=('[rope-freq-scale]', '[rope-freq-base]'), default=[0.0, 10000.0], type=float, nargs='+') parser.add_argument("--ropeconfig", help="If set, uses customized RoPE scaling from configured frequency scale and frequency base (e.g. --ropeconfig 0.25 10000). Otherwise, uses NTK-Aware scaling set automatically based on context size. For linear rope, simply set the freq-scale and ignore the freq-base",metavar=('[rope-freq-scale]', '[rope-freq-base]'), default=[0.0, 10000.0], type=float, nargs='+')
parser.add_argument("--stream", help="Uses streaming when generating tokens. Only for the Kobold Lite UI.", action='store_true') parser.add_argument("--stream", help="Uses streaming when generating tokens. Only for the Kobold Lite UI.", action='store_true')