From 1db965d00d991fbb6f24c11ba8bc63e2c31c04ba Mon Sep 17 00:00:00 2001
From: HanishKVC <hanishkvc@gmail.com>
Date: Mon, 27 May 2024 03:00:39 +0530
Subject: [PATCH] SimpleChat: Update a bit wrt readme and notes in du

---
 .../server/public_simplechat/datautils.mjs    | 38 ++++++++++++++++++-
 examples/server/public_simplechat/readme.md   | 23 ++++++-----
 2 files changed, 50 insertions(+), 11 deletions(-)

diff --git a/examples/server/public_simplechat/datautils.mjs b/examples/server/public_simplechat/datautils.mjs
index 8eab35e63..9c16f280d 100644
--- a/examples/server/public_simplechat/datautils.mjs
+++ b/examples/server/public_simplechat/datautils.mjs
@@ -3,9 +3,30 @@
 // by Humans for All
 //
 
+/**
+ * Given the limited context size of local LLMs and , many a times when context gets filled
+ * between the prompt and the response, it can lead to repeating text garbage generation.
+ * And many a times setting penalty wrt repeatation leads to over-intelligent garbage
+ * repeatation with slight variations. These garbage inturn can lead to overloading of the
+ * available model context, leading to less valuable response for subsequent prompts/queries,
+ * if chat history is sent to ai model.
+ *
+ * So two simple minded garbage trimming logics are experimented below.
+ * * one based on progressively-larger-substring-based-repeat-matching-with-partial-skip and
+ * * another based on char-histogram-driven garbage trimming.
+ *   * in future characteristic of histogram over varying lengths could be used to allow for
+ *     a more aggressive and adaptive trimming logic.
+ */
+
 
 /**
  * Simple minded logic to help remove repeating garbage at end of the string.
+ * The repeatation needs to be perfectly matching.
+ *
+ * The logic progressively goes on probing for longer and longer substring based
+ * repeatation, till there is no longer repeatation. Inturn picks the one with
+ * the longest chain.
+ *
  * @param {string} sIn
  * @param {number} maxSubL
  * @param {number} maxMatchLenThreshold
@@ -44,6 +65,9 @@ export function trim_repeat_garbage_at_end(sIn, maxSubL=10, maxMatchLenThreshold
 /**
  * Simple minded logic to help remove repeating garbage at end of the string, till it cant.
  * If its not able to trim, then it will try to skip a char at end and then trim, a few times.
+ * This ensures that even if there are multiple runs of garbage with different patterns, the
+ * logic still tries to munch through them.
+ *
  * @param {string} sIn
  * @param {number} maxSubL
  * @param {number | undefined} [maxMatchLenThreshold]
@@ -72,7 +96,14 @@ export function trim_repeat_garbage_at_end_loop(sIn, maxSubL, maxMatchLenThresho
 
 
 /**
- * A simple minded try trim garbage at end using histogram characteristics
+ * A simple minded try trim garbage at end using histogram driven characteristics.
+ * There can be variation in the repeatations, as long as no new char props up.
+ *
+ * This tracks the chars and their frequency in a specified length of substring at the end
+ * and inturn checks if moving further into the generated text from the end remains within
+ * the same char subset or goes beyond it and based on that either trims the string at the
+ * end or not. This allows to filter garbage at the end, including even if there are certain
+ * kind of small variations in the repeated text wrt position of seen chars.
  *
  * Allow the garbage to contain upto maxUniq chars, but at the same time ensure that
  * a given type of char ie numerals or alphabets or other types dont cross the specified
@@ -135,7 +166,10 @@ export function trim_hist_garbage_at_end(sIn, maxType, maxUniq, maxMatchLenThres
 }
 
 /**
- * Keep trimming repeatedly using hist_garbage logic, till you no longer can
+ * Keep trimming repeatedly using hist_garbage logic, till you no longer can.
+ * This ensures that even if there are multiple runs of garbage with different patterns,
+ * the logic still tries to munch through them.
+ *
  * @param {any} sIn
  * @param {number} maxType
  * @param {number} maxUniq
diff --git a/examples/server/public_simplechat/readme.md b/examples/server/public_simplechat/readme.md
index de0dfc99d..70bd61d66 100644
--- a/examples/server/public_simplechat/readme.md
+++ b/examples/server/public_simplechat/readme.md
@@ -15,11 +15,13 @@ The UI follows a responsive web design so that the layout can adapt to available
 enough manner, in general.
 
 Allows developer/end-user to control some of the behaviour by updating gMe members from browser's devel-tool
-console.
+console. Parallely some of the directly useful to end-user settings can also be changed using the provided
+settings ui.
 
-NOTE: Given that the idea is for basic minimal testing, it doesnt bother with any model context length and
-culling of old messages from the chat by default. However by enabling the sliding window chat logic, a crude
-form of old messages culling can be achieved.
+NOTE: Current web service api doesnt expose the model context length directly, so client logic doesnt provide
+any adaptive culling of old messages nor of replacing them with summary of their content etal. However there
+is a optional sliding window based chat logic, which provides a simple minded culling of old messages from
+the chat history before sending to the ai model.
 
 NOTE: It doesnt set any parameters other than temperature and max_tokens for now. However if someone wants
 they can update the js file or equivalent member in gMe as needed.
@@ -54,6 +56,8 @@ Once inside
 
 * Select between chat and completion mode. By default it is set to chat mode.
 
+* Change the default global settings, if one wants to.
+
 * In completion mode
   * logic by default doesnt insert any role specific "ROLE: " prefix wrt each role's message.
     If the model requires any prefix wrt user role messages, then the end user has to
@@ -104,14 +108,15 @@ by developers who may not be from web frontend background (so inturn may not be
 end-use-specific-language-extensions driven flows) so that they can use it to explore/experiment things.
 
 And given that the idea is also to help explore/experiment for developers, some flexibility is provided
-to change behaviour easily using the devel-tools/console, for now. And skeletal logic has been implemented
-to explore some of the end points and ideas/implications around them.
+to change behaviour easily using the devel-tools/console or provided minimal settings ui (wrt few aspects).
+Skeletal logic has been implemented to explore some of the end points and ideas/implications around them.
 
 
 ### General
 
 Me/gMe consolidates the settings which control the behaviour into one object.
 One can see the current settings, as well as change/update them using browsers devel-tool/console.
+It is attached to the document object.
 
   bCompletionFreshChatAlways - whether Completion mode collates complete/sliding-window history when
   communicating with the server or only sends the latest user query/message.
@@ -189,9 +194,9 @@ also be started with a model context size of 1k or more, to be on safe side.
   internal n_predict, for now add the same here on the client side, maybe later add max_tokens
   to /completions endpoint handling code on server side.
 
-Frequency and presence penalty fields are set to 1.2 in the set of fields sent to server
-along with the user query. So that the model is partly set to try avoid repeating text in
-its response.
+NOTE: One may want to experiment with frequency/presence penalty fields in chatRequestOptions
+wrt the set of fields sent to server along with the user query. To check how the model behaves
+wrt repeatations in general in the generated text response.
 
 A end-user can change these behaviour by editing gMe from browser's devel-tool/console.