From d1e73d8777b177f94a0de6e8e69b06e723caaa2d Mon Sep 17 00:00:00 2001 From: HanishKVC Date: Sun, 26 May 2024 01:57:28 +0530 Subject: [PATCH] SimpleChat:DU: Switch trim garbage hist based to maxUniq simple Instead of blindly building histogram for specified substring length, and then checking if any new char within specified min garbage length limit, NOW exit learn state when specified maxUniq chars are found. Inturn there should be no new chars with in the specified min garbage length required limit. TODO: Need to track char classes like alphabets, numerals and special/other chars. --- examples/server/public_simplechat/datautils.mjs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/examples/server/public_simplechat/datautils.mjs b/examples/server/public_simplechat/datautils.mjs index 64ffcf745..b42d58555 100644 --- a/examples/server/public_simplechat/datautils.mjs +++ b/examples/server/public_simplechat/datautils.mjs @@ -73,20 +73,25 @@ export function trim_repeat_garbage_at_end_loop(sIn, maxSubL, maxMatchLenThresho /** * A simple minded try trim garbage at end using histogram characteristics * @param {string} sIn - * @param {number} maxSubL + * @param {number} maxUniq * @param {number} maxMatchLenThreshold */ -export function trim_hist_garbage_at_end(sIn, maxSubL, maxMatchLenThreshold) { +export function trim_hist_garbage_at_end(sIn, maxUniq, maxMatchLenThreshold) { if (sIn.length < maxMatchLenThreshold) { return { trimmed: false, data: sIn }; } // Learn let hist = {}; - for(let i=0; i= maxUniq) { + break; + } hist[c] = 1; } }