From b57aad79a816f823123fe71d4e28ad5329965dad Mon Sep 17 00:00:00 2001 From: HanishKVC Date: Fri, 24 May 2024 01:05:05 +0530 Subject: [PATCH] SimpleChat:SlidingWindow: iRecentUserMsgCnt to limit context load This is disabled by default. However if enabled, then in addition to latest system message, only the last N user messages, after the latest system message and its reponses from the ai model will be sent to the ai-model, when querying for a new response. This specified N also includes the latest user query. --- examples/server/public_simplechat/readme.md | 45 ++++++++++--- .../server/public_simplechat/simplechat.js | 67 ++++++++++++++++--- 2 files changed, 96 insertions(+), 16 deletions(-) diff --git a/examples/server/public_simplechat/readme.md b/examples/server/public_simplechat/readme.md index d8818b738..43d5b127f 100644 --- a/examples/server/public_simplechat/readme.md +++ b/examples/server/public_simplechat/readme.md @@ -14,11 +14,15 @@ own system prompts. The UI follows a responsive web design so that the layout can adapt to available display space in a usable enough manner, in general. -NOTE: Given that the idea is for basic minimal testing, it doesnt bother with any model context length and -culling of old messages from the chat. +Allows developer/end-user to control some of the behaviour by updating gMe members from browser's devel-tool +console. -NOTE: It doesnt set any parameters other than temperature for now. However if someone wants they can update -the js file as needed. +NOTE: Given that the idea is for basic minimal testing, it doesnt bother with any model context length and +culling of old messages from the chat by default. However by enabling the sliding window chat logic, a crude +form of old messages culling can be achieved. + +NOTE: It doesnt set any parameters other than temperature and max_tokens for now. However if someone wants +they can update the js file or equivalent member in gMe as needed. ## usage @@ -96,8 +100,8 @@ Once inside Me/gMe consolidates the settings which control the behaviour into one object. One can see the current settings, as well as change/update them using browsers devel-tool/console. - bCompletionFreshChatAlways - whether Completion mode collates completion history when communicating - with the server. + bCompletionFreshChatAlways - whether Completion mode collates complete/sliding-window history when + communicating with the server or only sends the latest user query/message. bCompletionInsertStandardRolePrefix - whether Completion mode inserts role related prefix wrt the messages that get inserted into prompt field wrt /Completion endpoint. @@ -106,22 +110,42 @@ One can see the current settings, as well as change/update them using browsers d irrespective of whether /chat/completions or /completions endpoint. If you want to add additional options/fields to send to the server/ai-model, and or - modify the existing options value, for now you can update this global var using - browser's development-tools/console. + modify the existing options value or remove them, for now you can update this global var + using browser's development-tools/console. + + iRecentUserMsgCnt - a simple minded SlidingWindow to limit context window load at Ai Model end. + This is disabled by default. However if enabled, then in addition to latest system message, only + the last/latest iRecentUserMsgCnt user messages after the latest system prompt and its responses + from the ai model will be sent to the ai-model, when querying for a new response. IE if enabled, + only user messages after the latest system message/prompt will be considered. + + This specified sliding window user message count also includes the latest user query. + <0 : Send entire chat history to server + 0 : Send only the system message if any to the server + >0 : Send the latest chat history from the latest system prompt, limited to specified cnt. + + +By using gMe's iRecentUserMsgCnt and chatRequestOptions.max_tokens one can try to control the +implications of loading of the ai-model's context window by chat history, wrt chat response to +some extent in a simple crude way. + Sometimes the browser may be stuborn with caching of the file, so your updates to html/css/js may not be visible. Also remember that just refreshing/reloading page in browser or for that matter clearing site data, dont directly override site caching in all cases. Worst case you may have to change port. Or in dev tools of browser, you may be able to disable caching fully. + Concept of multiple chat sessions with different servers, as well as saving and restoring of those across browser usage sessions, can be woven around the SimpleChat/MultiChatUI class and its instances relatively easily, however given the current goal of keeping this simple, it has not been added, for now. + By switching between chat.add_system_begin/anytime, one can control whether one can change the system prompt, anytime during the conversation or only at the beginning. + read_json_early, is to experiment with reading json response data early on, if available, so that user can be shown generated data, as and when it is being generated, rather than at the end when full data is available. @@ -132,3 +156,8 @@ at the end when full data is available. if able to read json data early on in future, as and when ai model is generating data, then this helper needs to indirectly update the chat div with the recieved data, without waiting for the overall data to be available. + + +## At the end + +Also a thank you to all open source and open model developers, who strive for the common good. diff --git a/examples/server/public_simplechat/simplechat.js b/examples/server/public_simplechat/simplechat.js index 97a300eac..52b28ad82 100644 --- a/examples/server/public_simplechat/simplechat.js +++ b/examples/server/public_simplechat/simplechat.js @@ -25,21 +25,23 @@ let gUsageMsg = `
  • Completion mode doesnt insert user/role: prefix implicitly.
  • Use shift+enter for inserting enter/newline.
  • -
  • Refresh the page to start over fresh.
  • +
  • If strange responses, Refresh page to start over fresh.
  • `; +/** @typedef {{role: string, content: string}[]} ChatMessages */ class SimpleChat { constructor() { /** * Maintain in a form suitable for common LLM web service chat/completions' messages entry - * @type {{role: string, content: string}[]} + * @type {ChatMessages} */ this.xchat = []; this.iLastSys = -1; @@ -50,6 +52,50 @@ class SimpleChat { this.iLastSys = -1; } + /** + * Recent chat messages. + * If iRecentUserMsgCnt < 0 + * Then return the full chat history + * Else + * Return chat messages from latest going back till the last/latest system prompt. + * While keeping track that the number of user queries/messages doesnt exceed iRecentUserMsgCnt. + * @param {number} iRecentUserMsgCnt + */ + recent_chat(iRecentUserMsgCnt) { + if (iRecentUserMsgCnt < 0) { + return this.xchat; + } + if (iRecentUserMsgCnt == 0) { + console.warn("WARN:SimpleChat:SC:RecentChat:iRecentUsermsgCnt of 0 means no user message/query sent"); + } + /** @type{ChatMessages} */ + let rchat = []; + let sysMsg = this.get_system_latest(); + if (sysMsg.length != 0) { + rchat.push({role: Roles.System, content: sysMsg}); + } + let iUserCnt = 0; + let iStart = this.xchat.length; + for(let i=this.xchat.length-1; i > this.iLastSys; i--) { + if (iUserCnt >= iRecentUserMsgCnt) { + break; + } + let msg = this.xchat[i]; + if (msg.role == Roles.User) { + iStart = i; + iUserCnt += 1; + } + } + for(let i = iStart; i < this.xchat.length; i++) { + let msg = this.xchat[i]; + if (msg.role == Roles.System) { + continue; + } + rchat.push({role: msg.role, content: msg.content}); + } + return rchat; + } + /** * Add an entry into xchat * @param {string} role @@ -76,7 +122,7 @@ class SimpleChat { div.replaceChildren(); } let last = undefined; - for(const x of this.xchat) { + for(const x of this.recent_chat(gMe.iRecentUserMsgCnt)) { let entry = document.createElement("p"); entry.className = `role-${x.role}`; entry.innerText = `${x.role}: ${x.content}`; @@ -111,7 +157,7 @@ class SimpleChat { */ request_messages_jsonstr() { let req = { - messages: this.xchat, + messages: this.recent_chat(gMe.iRecentUserMsgCnt), } return this.request_jsonstr(req); } @@ -123,7 +169,7 @@ class SimpleChat { request_prompt_jsonstr(bInsertStandardRolePrefix) { let prompt = ""; let iCnt = 0; - for(const chat of this.xchat) { + for(const chat of this.recent_chat(gMe.iRecentUserMsgCnt)) { iCnt += 1; if (iCnt > 1) { prompt += "\n"; @@ -527,6 +573,7 @@ class Me { this.multiChat = new MultiChatUI(); this.bCompletionFreshChatAlways = true; this.bCompletionInsertStandardRolePrefix = false; + this.iRecentUserMsgCnt = -1; // Add needed fields wrt json object to be sent wrt LLM web services completions endpoint. this.chatRequestOptions = { "temperature": 0.7, @@ -540,7 +587,7 @@ class Me { show_info(elDiv) { var p = document.createElement("p"); - p.innerText = "Settings (gMe)"; + p.innerText = "Settings (devel-tools-console gMe)"; p.className = "role-system"; elDiv.appendChild(p); @@ -552,6 +599,10 @@ class Me { p.innerText = `bCompletionInsertStandardRolePrefix:${this.bCompletionInsertStandardRolePrefix}`; elDiv.appendChild(p); + p = document.createElement("p"); + p.innerText = `iRecentUserMsgCnt:${this.iRecentUserMsgCnt}`; + elDiv.appendChild(p); + p = document.createElement("p"); p.innerText = `chatRequestOptions:${JSON.stringify(this.chatRequestOptions)}`; elDiv.appendChild(p);