fixed for smartcontext

This commit is contained in:
Concedo 2023-10-28 19:09:22 +08:00
parent 6cf2b4c73b
commit 20ef442c2a
2 changed files with 6 additions and 6 deletions

View file

@ -592,7 +592,7 @@ void PurgeMissingTokens(llama_context * ctx, std::vector<int> &current_context_t
//remove all tokens from old ctx between p0 and p1, updating both arrays and kv, then continue as normal //remove all tokens from old ctx between p0 and p1, updating both arrays and kv, then continue as normal
const int ShortfallThreshold = 256; //dont trigger shifting if the distance between trimstart and currhead < this const int ShortfallThreshold = 256; //dont trigger shifting if the distance between trimstart and currhead < this
const int SlackAllowance = 32; //in case the end text is slightly modified, be forgiving const int SlackAllowance = 64; //in case the end text is slightly modified, be forgiving
int trimstart = 0; int trimstart = 0;
int new_tokens_len = new_context_tokens.size(); int new_tokens_len = new_context_tokens.size();
@ -621,14 +621,14 @@ void PurgeMissingTokens(llama_context * ctx, std::vector<int> &current_context_t
} }
//at least this many tokens need to match, otherwise don't bother trimming //at least this many tokens need to match, otherwise don't bother trimming
const int LCQTokThreshold = std::max((new_tokens_len - trimstart) - (genamt+SlackAllowance), ShortfallThreshold-SlackAllowance); const int LCSTokThreshold = std::max((new_tokens_len - trimstart) - (genamt+SlackAllowance), ShortfallThreshold-SlackAllowance);
auto curr_ctx_without_memory = std::vector<int>(current_context_tokens.begin() + trimstart, current_context_tokens.end()); auto curr_ctx_without_memory = std::vector<int>(current_context_tokens.begin() + trimstart, current_context_tokens.end());
auto new_ctx_without_memory = std::vector<int>(new_context_tokens.begin() + trimstart, new_context_tokens.end()); auto new_ctx_without_memory = std::vector<int>(new_context_tokens.begin() + trimstart, new_context_tokens.end());
auto shared = LongestCommonSubseq(curr_ctx_without_memory, new_ctx_without_memory); auto shared = LongestCommonSubseq(curr_ctx_without_memory, new_ctx_without_memory);
if (shared.size() > LCQTokThreshold && ArrStartWith(new_ctx_without_memory, shared)) // enough tokens in common if (shared.size() > LCSTokThreshold && ArrStartWith(new_ctx_without_memory, shared)) // enough tokens in common
{ {
int found = ArrFindIndexOf(current_context_tokens,shared); int found = ArrFindIndexOf(current_context_tokens,shared);
if(found>=0 && found > trimstart) if(found>=0 && found > trimstart)

View file

@ -6,7 +6,7 @@ It requires no dependencies, installation or setup.
Just copy this single static HTML file anywhere and open it in a browser, or from a webserver. Just copy this single static HTML file anywhere and open it in a browser, or from a webserver.
Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite. Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite.
Kobold Lite is under the AGPL v3.0 License unless otherwise exempted. Please do not remove this line. Kobold Lite is under the AGPL v3.0 License unless otherwise exempted. Please do not remove this line.
Current version: 86 Current version: 87
-Concedo -Concedo
--> -->
@ -7876,11 +7876,11 @@ Current version: 86
} }
//this is a hack since we dont have a proper tokenizer, but we can estimate 1 token per 3.3 characters //this is a hack since we dont have a proper tokenizer, but we can estimate 1 token per 3.3 characters
let max_allowed_characters = Math.floor(maxctxlen * 3.35); let max_allowed_characters = Math.max(1, Math.floor(maxctxlen * 3) - (maxgenamt+8));
if (current_memory == null || current_memory.trim() == "") if (current_memory == null || current_memory.trim() == "")
{ {
//if there is no memory, then we can be a lot of lenient with the character counts since the backend will truncate excess anyway //if there is no memory, then we can be a lot of lenient with the character counts since the backend will truncate excess anyway
max_allowed_characters = Math.floor(maxctxlen * 4.8); max_allowed_characters = Math.floor(maxctxlen * 4.6);
} }
let truncated_context = concat_gametext(true, ""); //no need to truncate if memory is empty let truncated_context = concat_gametext(true, ""); //no need to truncate if memory is empty