From ca8b31520231077a10fe6e426154b617e8ef2d67 Mon Sep 17 00:00:00 2001
From: Concedo <39025047+LostRuins@users.noreply.github.com>
Date: Thu, 28 Sep 2023 23:50:08 +0800
Subject: [PATCH] increase context for gguf to 32k, horde worker stats, fixed
 glitch in horde launcher ui, oai freq penalty, updated lite

---
 gpttype_adapter.cpp | 21 +++++++++----
 klite.embd          | 72 +++++++++++++++++++++++++++++----------------
 koboldcpp.py        | 34 ++++++++++++++-------
 3 files changed, 85 insertions(+), 42 deletions(-)

diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp
index fb0f205f8..d3116ed76 100644
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@@ -562,7 +562,17 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
         blasbatchsize = 8;
     }
     params.memory_f16 = inputs.f16_kv;
-    params.n_ctx = inputs.max_context_length;
+
+    auto clamped_max_context_length = inputs.max_context_length;
+
+    if(clamped_max_context_length>16384 &&
+    file_format != FileFormat::GGUF_LLAMA && file_format!=FileFormat::GGUF_FALCON)
+    {
+        printf("Warning: Only GGUF models can use max context above 16k. Max context lowered to 16k.\n");
+        clamped_max_context_length = 16384;
+    }
+
+    params.n_ctx = clamped_max_context_length;
 
     neox_ctx_v2.hparams.n_ctx  = neox_ctx_v3.hparams.n_ctx
     = gptj_ctx_v1.hparams.n_ctx = gptj_ctx_v2.hparams.n_ctx = gptj_ctx_v3.hparams.n_ctx
@@ -594,7 +604,8 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
                 float factor = file_format_meta.n_ctx_train/2048;
                 effectivenctx = effectivenctx/factor;
             }
-            rope_freq_base = (effectivenctx <= 2048 ? 10000.0f : (effectivenctx <= 3072 ? 26000.0f : (effectivenctx <= 4096 ? 32000.0f : (effectivenctx <= 6144 ? 54000.0f : (effectivenctx <= 8192 ? 82684.0f : (effectivenctx <= 12288 ? 140000.0f : 200000.0f))))));
+            rope_freq_base = (effectivenctx <= 2048 ? 10000.0f : (effectivenctx <= 3072 ? 26000.0f : (effectivenctx <= 4096 ? 32000.0f : (effectivenctx <= 6144 ? 54000.0f :
+            (effectivenctx <= 8192 ? 82684.0f : (effectivenctx <= 12288 ? 140000.0f : (effectivenctx <= 16384 ? 200000.0f : (effectivenctx <= 24576 ? 320000.0f : 440000.0f))))))));
 
         }
 
@@ -633,7 +644,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
         //newer format has bit unshuffling
         SetQuantsUnshuffled(file_format == FileFormat::GGJT_2);
         llama_v2_context_params llama_ctx_params_v2 = llama_v2_context_default_params();
-        llama_ctx_params_v2.n_ctx = inputs.max_context_length;
+        llama_ctx_params_v2.n_ctx = clamped_max_context_length;
         //llama_ctx_params.n_parts = -1;
         llama_ctx_params_v2.seed = -1;
         llama_ctx_params_v2.f16_kv = inputs.f16_kv;
@@ -683,7 +694,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
     else if(file_format == FileFormat::GGJT_3)
     {
         llama_v3_context_params llama_ctx_params = llama_v3_context_default_params();
-        llama_ctx_params.n_ctx = inputs.max_context_length;
+        llama_ctx_params.n_ctx = clamped_max_context_length;
         //llama_ctx_paran_parts = -1;
         llama_ctx_params.seed = -1;
         llama_ctx_params.f16_kv = inputs.f16_kv;
@@ -754,7 +765,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
     else if(file_format==FileFormat::GGUF_LLAMA || file_format==FileFormat::GGUF_FALCON)
     {
         llama_context_params llama_ctx_params = llama_context_default_params();
-        llama_ctx_params.n_ctx = inputs.max_context_length;
+        llama_ctx_params.n_ctx = clamped_max_context_length;
         //llama_ctx_paran_parts = -1;
         llama_ctx_params.seed = -1;
         llama_ctx_params.f16_kv = inputs.f16_kv;
diff --git a/klite.embd b/klite.embd
index c6c743b6e..d823ccfca 100644
--- a/klite.embd
+++ b/klite.embd
@@ -6,7 +6,7 @@ It requires no dependencies, installation or setup.
 Just copy this single static HTML file anywhere and open it in a browser, or from a webserver.
 Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite.
 Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp. Please do not remove this line.
-Current version: 71
+Current version: 72
 -Concedo
 -->
 
@@ -4324,13 +4324,26 @@ Current version: 71
 		//load contexts
 		gametext_arr = [];
 		if (temp_scenario.prompt != "") {
-			gametext_arr.push(temp_scenario.prompt);
+			let prompttxt = temp_scenario.prompt;
+			if(!localsettings.placeholder_tags) //do a one-time replace instead
+			{
+				prompttxt = replace_placeholders_direct(prompttxt);
+			}
+			gametext_arr.push(prompttxt);
 		}
 		if (temp_scenario.authorsnote != "") {
 			current_anote = temp_scenario.authorsnote;
+			if(!localsettings.placeholder_tags)
+			{
+				current_anote = replace_placeholders_direct(current_anote);
+			}
 		}
 		if (temp_scenario.memory != "") {
 			current_memory = temp_scenario.memory;
+			if(!localsettings.placeholder_tags)
+			{
+				current_memory = replace_placeholders_direct(current_memory);
+			}
 		}
 		if (temp_scenario.worldinfo && temp_scenario.worldinfo.length > 0) {
 			current_wi = [];
@@ -4396,8 +4409,6 @@ Current version: 71
 			if (temp_scenario.instruct_endtag) { localsettings.instruct_endtag = temp_scenario.instruct_endtag; }
 		}
 
-
-
 		render_gametext();
 	}
 	function togglescenarioallownsfw()
@@ -6385,18 +6396,23 @@ Current version: 71
 		render_gametext();
 	}
 
+	function replace_placeholders_direct(inputtxt)
+	{
+		inputtxt = replaceAll(inputtxt,"{{user}}",localsettings.chatname?localsettings.chatname:"You",true);
+		inputtxt = replaceAll(inputtxt,"{{char}}",localsettings.chatopponent?localsettings.chatopponent:defaultchatopponent,true);
+		inputtxt = replaceAll(inputtxt,instructstartplaceholder,get_instruct_starttag(false));
+		inputtxt = replaceAll(inputtxt,instructendplaceholder,get_instruct_endtag(false));
+		//failsafe to handle removing newline tags
+		inputtxt = replaceAll(inputtxt,instructstartplaceholder.trim(),get_instruct_starttag(false));
+		inputtxt = replaceAll(inputtxt,instructendplaceholder.trim(),get_instruct_endtag(false));
+		return inputtxt;
+	}
 	function replace_placeholders(inputtxt)
 	{
 		//only do this for chat and instruct modes
 		if(localsettings.placeholder_tags)
 		{
-			inputtxt = replaceAll(inputtxt,"{{user}}",localsettings.chatname?localsettings.chatname:"You",true);
-			inputtxt = replaceAll(inputtxt,"{{char}}",localsettings.chatopponent?localsettings.chatopponent:defaultchatopponent,true);
-			inputtxt = replaceAll(inputtxt,instructstartplaceholder,get_instruct_starttag(false));
-			inputtxt = replaceAll(inputtxt,instructendplaceholder,get_instruct_endtag(false));
-			//failsafe to handle removing newline tags
-			inputtxt = replaceAll(inputtxt,instructstartplaceholder.trim(),get_instruct_starttag(false));
-			inputtxt = replaceAll(inputtxt,instructendplaceholder.trim(),get_instruct_endtag(false));
+			inputtxt = replace_placeholders_direct(inputtxt);
 		}
 		return inputtxt;
 	}
@@ -10184,13 +10200,14 @@ Current version: 71
 
 					<div class="settingitem">
 						<div class="settinglabel">
-							<div class="justifyleft settingsmall" id="tokenstreaminglabel" title="Attempts to use token streaming if available.">Token Streaming </div>
-						   <input type="checkbox" id="tokenstreaming" style="margin:0px 0 0;">
+							<div class="justifyleft settingsmall" id="tokenstreaminglabel" title="">Token Streaming <span class="helpicon">?<span
+								class="helptext">Attempts to use token streaming if supported. Not available on Horde.</span></span></div>
+						   <input type="checkbox" id="tokenstreaming" style="margin:0px 0px 0px auto;">
 						</div>
 
 						<div id="idlesection" class="settinglabel">
 							<div class="justifyleft settingsmall" title="Allow the AI to send more responses if you are idle.">Idle Responses&nbsp;</div>
-							<select style="padding:1px; height:auto; width: 27px; appearance: none; font-size: 7pt;" class="form-control" id="idle_responses">
+							<select style="padding:1px; height:auto; width: 27px; appearance: none; font-size: 7pt; margin:0px 0px 0px auto;" class="form-control" id="idle_responses">
 								<option value="0">Off</option>
 								<option value="1">1x</option>
 								<option value="2">2x</option>
@@ -10210,21 +10227,24 @@ Current version: 71
 
 
 						<div class="settinglabel">
-							<div class="justifyleft settingsmall" title="Trims incomplete sentences in AI output">Trim Sentences </div>
-						   <input type="checkbox" id="trimsentences" style="margin:0px 0 0;">
+							<div class="justifyleft settingsmall" title="">Trim Sentences <span class="helpicon">?<span
+								class="helptext">Trims incomplete sentences in AI output.</span></span></div>
+						   <input type="checkbox" id="trimsentences" style="margin:0px 0px 0px auto;">
 						</div>
 						<div class="settinglabel">
-							<div class="justifyleft settingsmall" title="Trim trailing whitespace at the end of context">Trim Whitespace </div>
-						   <input type="checkbox" id="trimwhitespace" style="margin:0px 0 0;">
+							<div class="justifyleft settingsmall" title="">Trim Whitespace <span class="helpicon">?<span
+								class="helptext">Compresses double newlines and removes trailing whitespace in AI output.</span></span></div>
+						   <input type="checkbox" id="trimwhitespace" style="margin:0px 0px 0px auto;">
 						</div>
 						<div class="settinglabel">
-							<div class="justifyleft settingsmall" title="Allow the EOS token and others to be used">Unban EOS Tokens </div>
-						   <input type="checkbox" id="unban_tokens" style="margin:0px 0 0;">
+							<div class="justifyleft settingsmall" title="">Unban EOS Tokens <span class="helpicon">?<span
+								class="helptext">Allow the End-Of-Stream (EOS) token and potentially other restricted special tokens to be generated.</span></span></div>
+						   <input type="checkbox" id="unban_tokens" style="margin:0px 0px 0px auto;">
 						</div>
 						<div class="settinglabel">
 							<div class="justifyleft settingsmall">Placeholder Tags <span class="helpicon">?<span
 								class="helptext">If enabled, uses universal {{user}} and {{[INPUT]}} placeholders that get swapped on submit. If disabled, uses plaintext chat or instruct tags verbatim.</span></span></div>
-						   <input type="checkbox" id="placeholder_tags" style="margin:0px 0 0;">
+						   <input type="checkbox" id="placeholder_tags" style="margin:0px 0px 0px auto;">
 						</div>
 					</div>
 
@@ -10232,23 +10252,23 @@ Current version: 71
 					<div class="settingitem">
 						<div class="settinglabel">
 							<div class="justifyleft settingsmall" title="Autosaves your current story and settings on exit, reloads when you return">Persist Autosave Session </div>
-						   <input type="checkbox" id="persist_session" style="margin:0px 0 0;">
+						   <input type="checkbox" id="persist_session" style="margin:0px 0px 0px auto;">
 						</div>
 						<div class="settinglabel">
 							<div class="justifyleft settingsmall" title="Includes your current settings when saving or sharing your story">Save File Incl. Settings </div>
-						   <input type="checkbox" id="export_settings" style="margin:0px 0 0;">
+						   <input type="checkbox" id="export_settings" style="margin:0px 0px 0px auto;">
 						</div>
 						<div class="settinglabel">
 							<div class="justifyleft settingsmall"  title="Prompts to input a different filename when saving file.">Show Rename Save File </div>
-						   <input type="checkbox" id="prompt_for_savename" style="margin:0px 0 0;">
+						   <input type="checkbox" id="prompt_for_savename" style="margin:0px 0px 0px auto;">
 						</div>
 						<div class="settinglabel">
 							<div class="justifyleft settingsmall" title="Automatically scrolls the text window down when new text is generated">Autoscroll Text </div>
-						   <input type="checkbox" id="autoscroll" style="margin:0px 0 0;">
+						   <input type="checkbox" id="autoscroll" style="margin:0px 0px 0px auto;">
 						</div>
 						<div class="settinglabel">
 							<div class="justifyleft settingsmall" title="Inverts all colors, simple light mode">Inverted Colors </div>
-						   <input type="checkbox" id="invert_colors" style="margin:0px 0 0;">
+						   <input type="checkbox" id="invert_colors" style="margin:0px 0px 0px auto;">
 						</div>
 					</div>
 
diff --git a/koboldcpp.py b/koboldcpp.py
index 50dcd9ea0..797414c7b 100755
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -395,7 +395,8 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
                 genparams["top_k"] = int(genparams.get('top_k', 120))
                 genparams["max_length"]=genparams.get('max', 50)
             elif api_format==3:
-                scaled_rep_pen = genparams.get('presence_penalty', 0.1) + 1
+                frqp = genparams.get('frequency_penalty', 0.1)
+                scaled_rep_pen = genparams.get('presence_penalty', frqp) + 1
                 genparams["max_length"] = genparams.get('max_tokens', 50)
                 genparams["rep_pen"] = scaled_rep_pen
 
@@ -832,7 +833,7 @@ def show_new_gui():
     # slider data
     blasbatchsize_values = ["-1", "32", "64", "128", "256", "512", "1024", "2048"]
     blasbatchsize_text = ["Don't Batch BLAS","32","64","128","256","512","1024","2048"]
-    contextsize_text = ["512", "1024", "2048", "3072", "4096", "6144", "8192", "12288", "16384"]
+    contextsize_text = ["512", "1024", "2048", "3072", "4096", "6144", "8192", "12288", "16384", "24576", "32768"]
     runopts = [opt for lib, opt in lib_option_pairs if file_exists(lib)]
     antirunopts = [opt.replace("Use ", "") for lib, opt in lib_option_pairs if not (opt in runopts)]
     if not any(runopts):
@@ -1154,18 +1155,18 @@ def show_new_gui():
     # horde
     makelabel(network_tab, "Horde:", 5).grid(pady=10)
 
-    horde_name_entry,  horde_name_label = makelabelentry(network_tab, "Horde Model Name:", horde_name_var, 7, 180)
-    horde_gen_entry,  horde_gen_label = makelabelentry(network_tab, "Gen. Length:", horde_gen_var, 8, 50)
-    horde_context_entry,  horde_context_label = makelabelentry(network_tab, "Max Context:",horde_context_var, 9, 50)
-    horde_apikey_entry,  horde_apikey_label = makelabelentry(network_tab, "API Key (If Embedded Worker):",horde_apikey_var, 10, 180)
-    horde_workername_entry,  horde_workername_label = makelabelentry(network_tab, "Horde Worker Name:",horde_workername_var, 11, 180)
+    horde_name_entry,  horde_name_label = makelabelentry(network_tab, "Horde Model Name:", horde_name_var, 10, 180)
+    horde_gen_entry,  horde_gen_label = makelabelentry(network_tab, "Gen. Length:", horde_gen_var, 11, 50)
+    horde_context_entry,  horde_context_label = makelabelentry(network_tab, "Max Context:",horde_context_var, 12, 50)
+    horde_apikey_entry,  horde_apikey_label = makelabelentry(network_tab, "API Key (If Embedded Worker):",horde_apikey_var, 13, 180)
+    horde_workername_entry,  horde_workername_label = makelabelentry(network_tab, "Horde Worker Name:",horde_workername_var, 14, 180)
 
     def togglehorde(a,b,c):
         labels = [horde_name_label, horde_gen_label, horde_context_label, horde_apikey_label, horde_workername_label]
         for idx, item in enumerate([horde_name_entry, horde_gen_entry, horde_context_entry, horde_apikey_entry, horde_workername_entry]):
             if usehorde_var.get() == 1:
-                item.grid(row=5 + idx, column = 1, padx=8, pady=1, stick="nw")
-                labels[idx].grid(row=5 + idx, padx=8, pady=1, stick="nw")
+                item.grid(row=10 + idx, column = 1, padx=8, pady=1, stick="nw")
+                labels[idx].grid(row=10 + idx, padx=8, pady=1, stick="nw")
             else:
                 item.grid_forget()
                 labels[idx].grid_forget()
@@ -1614,6 +1615,8 @@ def run_horde_worker(args, api_key, worker_name):
     current_id = None
     current_payload = None
     current_generation = None
+    session_kudos_earned = 0
+    session_starttime = datetime.now()
     sleepy_counter = 0 #if this exceeds a value, worker becomes sleepy (slower)
     print("===\nEmbedded Horde Worker '"+worker_name+"' Starting...\n(To use your own KAI Bridge/Scribe worker instead, don't set your API key)")
     BRIDGE_AGENT = f"KoboldCppEmbedWorker:1:https://github.com/LostRuins/koboldcpp"
@@ -1691,7 +1694,16 @@ def run_horde_worker(args, api_key, worker_name):
                 exitcounter += 1
                 print_with_time("Error: Job submit failed.")
             else:
-                print_with_time(f'Submitted generation to {cluster} with id {current_id} and contributed for {reply["reward"]}')
+                reward = reply["reward"]
+                session_kudos_earned += reward
+                curtime = datetime.now()
+                elapsedtime=curtime-session_starttime
+                hrs = elapsedtime.seconds // 3600
+                mins = elapsedtime.seconds // 60 % 60
+                secs = elapsedtime.seconds % 60
+                elapsedtimestr = f"{hrs:03d}h:{mins:02d}m:{secs:02d}s"
+                earnrate = session_kudos_earned/(elapsedtime.seconds/3600)
+                print_with_time(f'Submitted {current_id} and earned {reward:.0f} kd - [Total:{session_kudos_earned:.0f}kd, Time:{elapsedtimestr}, EarnRate:{earnrate:.0f}kd/hr]')
         else:
             print_with_time("Error: Abandoned current job due to errors. Getting new job.")
         current_id = None
@@ -1952,7 +1964,7 @@ if __name__ == '__main__':
     parser.add_argument("--blasthreads", help="Use a different number of threads during BLAS if specified. Otherwise, has the same value as --threads",metavar=('[threads]'), type=int, default=0)
     parser.add_argument("--psutil_set_threads", help="Experimental flag. If set, uses psutils to determine thread count based on physical cores.", action='store_true')
     parser.add_argument("--highpriority", help="Experimental flag. If set, increases the process CPU priority, potentially speeding up generation. Use caution.", action='store_true')
-    parser.add_argument("--contextsize", help="Controls the memory allocated for maximum context size, only change if you need more RAM for big contexts. (default 2048)", type=int,choices=[512,1024,2048,3072,4096,6144,8192,12288,16384], default=2048)
+    parser.add_argument("--contextsize", help="Controls the memory allocated for maximum context size, only change if you need more RAM for big contexts. (default 2048)", type=int,choices=[512,1024,2048,3072,4096,6144,8192,12288,16384,24576,32768], default=2048)
     parser.add_argument("--blasbatchsize", help="Sets the batch size used in BLAS processing (default 512). Setting it to -1 disables BLAS mode, but keeps other benefits like GPU offload.", type=int,choices=[-1,32,64,128,256,512,1024,2048], default=512)
     parser.add_argument("--ropeconfig", help="If set, uses customized RoPE scaling from configured frequency scale and frequency base (e.g. --ropeconfig 0.25 10000). Otherwise, uses NTK-Aware scaling set automatically based on context size. For linear rope, simply set the freq-scale and ignore the freq-base",metavar=('[rope-freq-scale]', '[rope-freq-base]'), default=[0.0, 10000.0], type=float, nargs='+')
     parser.add_argument("--stream", help="Uses streaming when generating tokens. Only for the Kobold Lite UI.", action='store_true')