diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index bee447cc9..8e3b2613b 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -592,7 +592,7 @@ void PurgeMissingTokens(llama_context * ctx, std::vector ¤t_context_t //remove all tokens from old ctx between p0 and p1, updating both arrays and kv, then continue as normal const int ShortfallThreshold = 256; //dont trigger shifting if the distance between trimstart and currhead < this - const int SlackAllowance = 32; //in case the end text is slightly modified, be forgiving + const int SlackAllowance = 64; //in case the end text is slightly modified, be forgiving int trimstart = 0; int new_tokens_len = new_context_tokens.size(); @@ -621,14 +621,14 @@ void PurgeMissingTokens(llama_context * ctx, std::vector ¤t_context_t } //at least this many tokens need to match, otherwise don't bother trimming - const int LCQTokThreshold = std::max((new_tokens_len - trimstart) - (genamt+SlackAllowance), ShortfallThreshold-SlackAllowance); + const int LCSTokThreshold = std::max((new_tokens_len - trimstart) - (genamt+SlackAllowance), ShortfallThreshold-SlackAllowance); auto curr_ctx_without_memory = std::vector(current_context_tokens.begin() + trimstart, current_context_tokens.end()); auto new_ctx_without_memory = std::vector(new_context_tokens.begin() + trimstart, new_context_tokens.end()); auto shared = LongestCommonSubseq(curr_ctx_without_memory, new_ctx_without_memory); - if (shared.size() > LCQTokThreshold && ArrStartWith(new_ctx_without_memory, shared)) // enough tokens in common + if (shared.size() > LCSTokThreshold && ArrStartWith(new_ctx_without_memory, shared)) // enough tokens in common { int found = ArrFindIndexOf(current_context_tokens,shared); if(found>=0 && found > trimstart) diff --git a/klite.embd b/klite.embd index 93eab5a78..0efcda075 100644 --- a/klite.embd +++ b/klite.embd @@ -6,7 +6,7 @@ It requires no dependencies, installation or setup. Just copy this single static HTML file anywhere and open it in a browser, or from a webserver. Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite. Kobold Lite is under the AGPL v3.0 License unless otherwise exempted. Please do not remove this line. -Current version: 86 +Current version: 87 -Concedo --> @@ -7876,11 +7876,11 @@ Current version: 86 } //this is a hack since we dont have a proper tokenizer, but we can estimate 1 token per 3.3 characters - let max_allowed_characters = Math.floor(maxctxlen * 3.35); + let max_allowed_characters = Math.max(1, Math.floor(maxctxlen * 3) - (maxgenamt+8)); if (current_memory == null || current_memory.trim() == "") { //if there is no memory, then we can be a lot of lenient with the character counts since the backend will truncate excess anyway - max_allowed_characters = Math.floor(maxctxlen * 4.8); + max_allowed_characters = Math.floor(maxctxlen * 4.6); } let truncated_context = concat_gametext(true, ""); //no need to truncate if memory is empty