increase context for gguf to 32k, horde worker stats, fixed glitch in horde launcher ui, oai freq penalty, updated lite
This commit is contained in:
parent
6a821b268a
commit
ca8b315202
3 changed files with 85 additions and 42 deletions
|
@ -562,7 +562,17 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
|||
blasbatchsize = 8;
|
||||
}
|
||||
params.memory_f16 = inputs.f16_kv;
|
||||
params.n_ctx = inputs.max_context_length;
|
||||
|
||||
auto clamped_max_context_length = inputs.max_context_length;
|
||||
|
||||
if(clamped_max_context_length>16384 &&
|
||||
file_format != FileFormat::GGUF_LLAMA && file_format!=FileFormat::GGUF_FALCON)
|
||||
{
|
||||
printf("Warning: Only GGUF models can use max context above 16k. Max context lowered to 16k.\n");
|
||||
clamped_max_context_length = 16384;
|
||||
}
|
||||
|
||||
params.n_ctx = clamped_max_context_length;
|
||||
|
||||
neox_ctx_v2.hparams.n_ctx = neox_ctx_v3.hparams.n_ctx
|
||||
= gptj_ctx_v1.hparams.n_ctx = gptj_ctx_v2.hparams.n_ctx = gptj_ctx_v3.hparams.n_ctx
|
||||
|
@ -594,7 +604,8 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
|||
float factor = file_format_meta.n_ctx_train/2048;
|
||||
effectivenctx = effectivenctx/factor;
|
||||
}
|
||||
rope_freq_base = (effectivenctx <= 2048 ? 10000.0f : (effectivenctx <= 3072 ? 26000.0f : (effectivenctx <= 4096 ? 32000.0f : (effectivenctx <= 6144 ? 54000.0f : (effectivenctx <= 8192 ? 82684.0f : (effectivenctx <= 12288 ? 140000.0f : 200000.0f))))));
|
||||
rope_freq_base = (effectivenctx <= 2048 ? 10000.0f : (effectivenctx <= 3072 ? 26000.0f : (effectivenctx <= 4096 ? 32000.0f : (effectivenctx <= 6144 ? 54000.0f :
|
||||
(effectivenctx <= 8192 ? 82684.0f : (effectivenctx <= 12288 ? 140000.0f : (effectivenctx <= 16384 ? 200000.0f : (effectivenctx <= 24576 ? 320000.0f : 440000.0f))))))));
|
||||
|
||||
}
|
||||
|
||||
|
@ -633,7 +644,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
|||
//newer format has bit unshuffling
|
||||
SetQuantsUnshuffled(file_format == FileFormat::GGJT_2);
|
||||
llama_v2_context_params llama_ctx_params_v2 = llama_v2_context_default_params();
|
||||
llama_ctx_params_v2.n_ctx = inputs.max_context_length;
|
||||
llama_ctx_params_v2.n_ctx = clamped_max_context_length;
|
||||
//llama_ctx_params.n_parts = -1;
|
||||
llama_ctx_params_v2.seed = -1;
|
||||
llama_ctx_params_v2.f16_kv = inputs.f16_kv;
|
||||
|
@ -683,7 +694,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
|||
else if(file_format == FileFormat::GGJT_3)
|
||||
{
|
||||
llama_v3_context_params llama_ctx_params = llama_v3_context_default_params();
|
||||
llama_ctx_params.n_ctx = inputs.max_context_length;
|
||||
llama_ctx_params.n_ctx = clamped_max_context_length;
|
||||
//llama_ctx_paran_parts = -1;
|
||||
llama_ctx_params.seed = -1;
|
||||
llama_ctx_params.f16_kv = inputs.f16_kv;
|
||||
|
@ -754,7 +765,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
|||
else if(file_format==FileFormat::GGUF_LLAMA || file_format==FileFormat::GGUF_FALCON)
|
||||
{
|
||||
llama_context_params llama_ctx_params = llama_context_default_params();
|
||||
llama_ctx_params.n_ctx = inputs.max_context_length;
|
||||
llama_ctx_params.n_ctx = clamped_max_context_length;
|
||||
//llama_ctx_paran_parts = -1;
|
||||
llama_ctx_params.seed = -1;
|
||||
llama_ctx_params.f16_kv = inputs.f16_kv;
|
||||
|
|
72
klite.embd
72
klite.embd
|
@ -6,7 +6,7 @@ It requires no dependencies, installation or setup.
|
|||
Just copy this single static HTML file anywhere and open it in a browser, or from a webserver.
|
||||
Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite.
|
||||
Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp. Please do not remove this line.
|
||||
Current version: 71
|
||||
Current version: 72
|
||||
-Concedo
|
||||
-->
|
||||
|
||||
|
@ -4324,13 +4324,26 @@ Current version: 71
|
|||
//load contexts
|
||||
gametext_arr = [];
|
||||
if (temp_scenario.prompt != "") {
|
||||
gametext_arr.push(temp_scenario.prompt);
|
||||
let prompttxt = temp_scenario.prompt;
|
||||
if(!localsettings.placeholder_tags) //do a one-time replace instead
|
||||
{
|
||||
prompttxt = replace_placeholders_direct(prompttxt);
|
||||
}
|
||||
gametext_arr.push(prompttxt);
|
||||
}
|
||||
if (temp_scenario.authorsnote != "") {
|
||||
current_anote = temp_scenario.authorsnote;
|
||||
if(!localsettings.placeholder_tags)
|
||||
{
|
||||
current_anote = replace_placeholders_direct(current_anote);
|
||||
}
|
||||
}
|
||||
if (temp_scenario.memory != "") {
|
||||
current_memory = temp_scenario.memory;
|
||||
if(!localsettings.placeholder_tags)
|
||||
{
|
||||
current_memory = replace_placeholders_direct(current_memory);
|
||||
}
|
||||
}
|
||||
if (temp_scenario.worldinfo && temp_scenario.worldinfo.length > 0) {
|
||||
current_wi = [];
|
||||
|
@ -4396,8 +4409,6 @@ Current version: 71
|
|||
if (temp_scenario.instruct_endtag) { localsettings.instruct_endtag = temp_scenario.instruct_endtag; }
|
||||
}
|
||||
|
||||
|
||||
|
||||
render_gametext();
|
||||
}
|
||||
function togglescenarioallownsfw()
|
||||
|
@ -6385,18 +6396,23 @@ Current version: 71
|
|||
render_gametext();
|
||||
}
|
||||
|
||||
function replace_placeholders_direct(inputtxt)
|
||||
{
|
||||
inputtxt = replaceAll(inputtxt,"{{user}}",localsettings.chatname?localsettings.chatname:"You",true);
|
||||
inputtxt = replaceAll(inputtxt,"{{char}}",localsettings.chatopponent?localsettings.chatopponent:defaultchatopponent,true);
|
||||
inputtxt = replaceAll(inputtxt,instructstartplaceholder,get_instruct_starttag(false));
|
||||
inputtxt = replaceAll(inputtxt,instructendplaceholder,get_instruct_endtag(false));
|
||||
//failsafe to handle removing newline tags
|
||||
inputtxt = replaceAll(inputtxt,instructstartplaceholder.trim(),get_instruct_starttag(false));
|
||||
inputtxt = replaceAll(inputtxt,instructendplaceholder.trim(),get_instruct_endtag(false));
|
||||
return inputtxt;
|
||||
}
|
||||
function replace_placeholders(inputtxt)
|
||||
{
|
||||
//only do this for chat and instruct modes
|
||||
if(localsettings.placeholder_tags)
|
||||
{
|
||||
inputtxt = replaceAll(inputtxt,"{{user}}",localsettings.chatname?localsettings.chatname:"You",true);
|
||||
inputtxt = replaceAll(inputtxt,"{{char}}",localsettings.chatopponent?localsettings.chatopponent:defaultchatopponent,true);
|
||||
inputtxt = replaceAll(inputtxt,instructstartplaceholder,get_instruct_starttag(false));
|
||||
inputtxt = replaceAll(inputtxt,instructendplaceholder,get_instruct_endtag(false));
|
||||
//failsafe to handle removing newline tags
|
||||
inputtxt = replaceAll(inputtxt,instructstartplaceholder.trim(),get_instruct_starttag(false));
|
||||
inputtxt = replaceAll(inputtxt,instructendplaceholder.trim(),get_instruct_endtag(false));
|
||||
inputtxt = replace_placeholders_direct(inputtxt);
|
||||
}
|
||||
return inputtxt;
|
||||
}
|
||||
|
@ -10184,13 +10200,14 @@ Current version: 71
|
|||
|
||||
<div class="settingitem">
|
||||
<div class="settinglabel">
|
||||
<div class="justifyleft settingsmall" id="tokenstreaminglabel" title="Attempts to use token streaming if available.">Token Streaming </div>
|
||||
<input type="checkbox" id="tokenstreaming" style="margin:0px 0 0;">
|
||||
<div class="justifyleft settingsmall" id="tokenstreaminglabel" title="">Token Streaming <span class="helpicon">?<span
|
||||
class="helptext">Attempts to use token streaming if supported. Not available on Horde.</span></span></div>
|
||||
<input type="checkbox" id="tokenstreaming" style="margin:0px 0px 0px auto;">
|
||||
</div>
|
||||
|
||||
<div id="idlesection" class="settinglabel">
|
||||
<div class="justifyleft settingsmall" title="Allow the AI to send more responses if you are idle.">Idle Responses </div>
|
||||
<select style="padding:1px; height:auto; width: 27px; appearance: none; font-size: 7pt;" class="form-control" id="idle_responses">
|
||||
<select style="padding:1px; height:auto; width: 27px; appearance: none; font-size: 7pt; margin:0px 0px 0px auto;" class="form-control" id="idle_responses">
|
||||
<option value="0">Off</option>
|
||||
<option value="1">1x</option>
|
||||
<option value="2">2x</option>
|
||||
|
@ -10210,21 +10227,24 @@ Current version: 71
|
|||
|
||||
|
||||
<div class="settinglabel">
|
||||
<div class="justifyleft settingsmall" title="Trims incomplete sentences in AI output">Trim Sentences </div>
|
||||
<input type="checkbox" id="trimsentences" style="margin:0px 0 0;">
|
||||
<div class="justifyleft settingsmall" title="">Trim Sentences <span class="helpicon">?<span
|
||||
class="helptext">Trims incomplete sentences in AI output.</span></span></div>
|
||||
<input type="checkbox" id="trimsentences" style="margin:0px 0px 0px auto;">
|
||||
</div>
|
||||
<div class="settinglabel">
|
||||
<div class="justifyleft settingsmall" title="Trim trailing whitespace at the end of context">Trim Whitespace </div>
|
||||
<input type="checkbox" id="trimwhitespace" style="margin:0px 0 0;">
|
||||
<div class="justifyleft settingsmall" title="">Trim Whitespace <span class="helpicon">?<span
|
||||
class="helptext">Compresses double newlines and removes trailing whitespace in AI output.</span></span></div>
|
||||
<input type="checkbox" id="trimwhitespace" style="margin:0px 0px 0px auto;">
|
||||
</div>
|
||||
<div class="settinglabel">
|
||||
<div class="justifyleft settingsmall" title="Allow the EOS token and others to be used">Unban EOS Tokens </div>
|
||||
<input type="checkbox" id="unban_tokens" style="margin:0px 0 0;">
|
||||
<div class="justifyleft settingsmall" title="">Unban EOS Tokens <span class="helpicon">?<span
|
||||
class="helptext">Allow the End-Of-Stream (EOS) token and potentially other restricted special tokens to be generated.</span></span></div>
|
||||
<input type="checkbox" id="unban_tokens" style="margin:0px 0px 0px auto;">
|
||||
</div>
|
||||
<div class="settinglabel">
|
||||
<div class="justifyleft settingsmall">Placeholder Tags <span class="helpicon">?<span
|
||||
class="helptext">If enabled, uses universal {{user}} and {{[INPUT]}} placeholders that get swapped on submit. If disabled, uses plaintext chat or instruct tags verbatim.</span></span></div>
|
||||
<input type="checkbox" id="placeholder_tags" style="margin:0px 0 0;">
|
||||
<input type="checkbox" id="placeholder_tags" style="margin:0px 0px 0px auto;">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
@ -10232,23 +10252,23 @@ Current version: 71
|
|||
<div class="settingitem">
|
||||
<div class="settinglabel">
|
||||
<div class="justifyleft settingsmall" title="Autosaves your current story and settings on exit, reloads when you return">Persist Autosave Session </div>
|
||||
<input type="checkbox" id="persist_session" style="margin:0px 0 0;">
|
||||
<input type="checkbox" id="persist_session" style="margin:0px 0px 0px auto;">
|
||||
</div>
|
||||
<div class="settinglabel">
|
||||
<div class="justifyleft settingsmall" title="Includes your current settings when saving or sharing your story">Save File Incl. Settings </div>
|
||||
<input type="checkbox" id="export_settings" style="margin:0px 0 0;">
|
||||
<input type="checkbox" id="export_settings" style="margin:0px 0px 0px auto;">
|
||||
</div>
|
||||
<div class="settinglabel">
|
||||
<div class="justifyleft settingsmall" title="Prompts to input a different filename when saving file.">Show Rename Save File </div>
|
||||
<input type="checkbox" id="prompt_for_savename" style="margin:0px 0 0;">
|
||||
<input type="checkbox" id="prompt_for_savename" style="margin:0px 0px 0px auto;">
|
||||
</div>
|
||||
<div class="settinglabel">
|
||||
<div class="justifyleft settingsmall" title="Automatically scrolls the text window down when new text is generated">Autoscroll Text </div>
|
||||
<input type="checkbox" id="autoscroll" style="margin:0px 0 0;">
|
||||
<input type="checkbox" id="autoscroll" style="margin:0px 0px 0px auto;">
|
||||
</div>
|
||||
<div class="settinglabel">
|
||||
<div class="justifyleft settingsmall" title="Inverts all colors, simple light mode">Inverted Colors </div>
|
||||
<input type="checkbox" id="invert_colors" style="margin:0px 0 0;">
|
||||
<input type="checkbox" id="invert_colors" style="margin:0px 0px 0px auto;">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
|
34
koboldcpp.py
34
koboldcpp.py
|
@ -395,7 +395,8 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
|||
genparams["top_k"] = int(genparams.get('top_k', 120))
|
||||
genparams["max_length"]=genparams.get('max', 50)
|
||||
elif api_format==3:
|
||||
scaled_rep_pen = genparams.get('presence_penalty', 0.1) + 1
|
||||
frqp = genparams.get('frequency_penalty', 0.1)
|
||||
scaled_rep_pen = genparams.get('presence_penalty', frqp) + 1
|
||||
genparams["max_length"] = genparams.get('max_tokens', 50)
|
||||
genparams["rep_pen"] = scaled_rep_pen
|
||||
|
||||
|
@ -832,7 +833,7 @@ def show_new_gui():
|
|||
# slider data
|
||||
blasbatchsize_values = ["-1", "32", "64", "128", "256", "512", "1024", "2048"]
|
||||
blasbatchsize_text = ["Don't Batch BLAS","32","64","128","256","512","1024","2048"]
|
||||
contextsize_text = ["512", "1024", "2048", "3072", "4096", "6144", "8192", "12288", "16384"]
|
||||
contextsize_text = ["512", "1024", "2048", "3072", "4096", "6144", "8192", "12288", "16384", "24576", "32768"]
|
||||
runopts = [opt for lib, opt in lib_option_pairs if file_exists(lib)]
|
||||
antirunopts = [opt.replace("Use ", "") for lib, opt in lib_option_pairs if not (opt in runopts)]
|
||||
if not any(runopts):
|
||||
|
@ -1154,18 +1155,18 @@ def show_new_gui():
|
|||
# horde
|
||||
makelabel(network_tab, "Horde:", 5).grid(pady=10)
|
||||
|
||||
horde_name_entry, horde_name_label = makelabelentry(network_tab, "Horde Model Name:", horde_name_var, 7, 180)
|
||||
horde_gen_entry, horde_gen_label = makelabelentry(network_tab, "Gen. Length:", horde_gen_var, 8, 50)
|
||||
horde_context_entry, horde_context_label = makelabelentry(network_tab, "Max Context:",horde_context_var, 9, 50)
|
||||
horde_apikey_entry, horde_apikey_label = makelabelentry(network_tab, "API Key (If Embedded Worker):",horde_apikey_var, 10, 180)
|
||||
horde_workername_entry, horde_workername_label = makelabelentry(network_tab, "Horde Worker Name:",horde_workername_var, 11, 180)
|
||||
horde_name_entry, horde_name_label = makelabelentry(network_tab, "Horde Model Name:", horde_name_var, 10, 180)
|
||||
horde_gen_entry, horde_gen_label = makelabelentry(network_tab, "Gen. Length:", horde_gen_var, 11, 50)
|
||||
horde_context_entry, horde_context_label = makelabelentry(network_tab, "Max Context:",horde_context_var, 12, 50)
|
||||
horde_apikey_entry, horde_apikey_label = makelabelentry(network_tab, "API Key (If Embedded Worker):",horde_apikey_var, 13, 180)
|
||||
horde_workername_entry, horde_workername_label = makelabelentry(network_tab, "Horde Worker Name:",horde_workername_var, 14, 180)
|
||||
|
||||
def togglehorde(a,b,c):
|
||||
labels = [horde_name_label, horde_gen_label, horde_context_label, horde_apikey_label, horde_workername_label]
|
||||
for idx, item in enumerate([horde_name_entry, horde_gen_entry, horde_context_entry, horde_apikey_entry, horde_workername_entry]):
|
||||
if usehorde_var.get() == 1:
|
||||
item.grid(row=5 + idx, column = 1, padx=8, pady=1, stick="nw")
|
||||
labels[idx].grid(row=5 + idx, padx=8, pady=1, stick="nw")
|
||||
item.grid(row=10 + idx, column = 1, padx=8, pady=1, stick="nw")
|
||||
labels[idx].grid(row=10 + idx, padx=8, pady=1, stick="nw")
|
||||
else:
|
||||
item.grid_forget()
|
||||
labels[idx].grid_forget()
|
||||
|
@ -1614,6 +1615,8 @@ def run_horde_worker(args, api_key, worker_name):
|
|||
current_id = None
|
||||
current_payload = None
|
||||
current_generation = None
|
||||
session_kudos_earned = 0
|
||||
session_starttime = datetime.now()
|
||||
sleepy_counter = 0 #if this exceeds a value, worker becomes sleepy (slower)
|
||||
print("===\nEmbedded Horde Worker '"+worker_name+"' Starting...\n(To use your own KAI Bridge/Scribe worker instead, don't set your API key)")
|
||||
BRIDGE_AGENT = f"KoboldCppEmbedWorker:1:https://github.com/LostRuins/koboldcpp"
|
||||
|
@ -1691,7 +1694,16 @@ def run_horde_worker(args, api_key, worker_name):
|
|||
exitcounter += 1
|
||||
print_with_time("Error: Job submit failed.")
|
||||
else:
|
||||
print_with_time(f'Submitted generation to {cluster} with id {current_id} and contributed for {reply["reward"]}')
|
||||
reward = reply["reward"]
|
||||
session_kudos_earned += reward
|
||||
curtime = datetime.now()
|
||||
elapsedtime=curtime-session_starttime
|
||||
hrs = elapsedtime.seconds // 3600
|
||||
mins = elapsedtime.seconds // 60 % 60
|
||||
secs = elapsedtime.seconds % 60
|
||||
elapsedtimestr = f"{hrs:03d}h:{mins:02d}m:{secs:02d}s"
|
||||
earnrate = session_kudos_earned/(elapsedtime.seconds/3600)
|
||||
print_with_time(f'Submitted {current_id} and earned {reward:.0f} kd - [Total:{session_kudos_earned:.0f}kd, Time:{elapsedtimestr}, EarnRate:{earnrate:.0f}kd/hr]')
|
||||
else:
|
||||
print_with_time("Error: Abandoned current job due to errors. Getting new job.")
|
||||
current_id = None
|
||||
|
@ -1952,7 +1964,7 @@ if __name__ == '__main__':
|
|||
parser.add_argument("--blasthreads", help="Use a different number of threads during BLAS if specified. Otherwise, has the same value as --threads",metavar=('[threads]'), type=int, default=0)
|
||||
parser.add_argument("--psutil_set_threads", help="Experimental flag. If set, uses psutils to determine thread count based on physical cores.", action='store_true')
|
||||
parser.add_argument("--highpriority", help="Experimental flag. If set, increases the process CPU priority, potentially speeding up generation. Use caution.", action='store_true')
|
||||
parser.add_argument("--contextsize", help="Controls the memory allocated for maximum context size, only change if you need more RAM for big contexts. (default 2048)", type=int,choices=[512,1024,2048,3072,4096,6144,8192,12288,16384], default=2048)
|
||||
parser.add_argument("--contextsize", help="Controls the memory allocated for maximum context size, only change if you need more RAM for big contexts. (default 2048)", type=int,choices=[512,1024,2048,3072,4096,6144,8192,12288,16384,24576,32768], default=2048)
|
||||
parser.add_argument("--blasbatchsize", help="Sets the batch size used in BLAS processing (default 512). Setting it to -1 disables BLAS mode, but keeps other benefits like GPU offload.", type=int,choices=[-1,32,64,128,256,512,1024,2048], default=512)
|
||||
parser.add_argument("--ropeconfig", help="If set, uses customized RoPE scaling from configured frequency scale and frequency base (e.g. --ropeconfig 0.25 10000). Otherwise, uses NTK-Aware scaling set automatically based on context size. For linear rope, simply set the freq-scale and ignore the freq-base",metavar=('[rope-freq-scale]', '[rope-freq-base]'), default=[0.0, 10000.0], type=float, nargs='+')
|
||||
parser.add_argument("--stream", help="Uses streaming when generating tokens. Only for the Kobold Lite UI.", action='store_true')
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue