From f33aa28149de423b6bf7cf722046a1f2046f5f08 Mon Sep 17 00:00:00 2001 From: HanishKVC Date: Sun, 26 May 2024 01:07:07 +0530 Subject: [PATCH] SimpleChat:DU: Try trim using histogram based info TODO: May have to add max number of uniq chars in histogram at end of learning phase. --- .../server/public_simplechat/datautils.mjs | 52 +++++++++++++++++++ .../server/public_simplechat/simplechat.js | 3 +- 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/examples/server/public_simplechat/datautils.mjs b/examples/server/public_simplechat/datautils.mjs index 8d513d8b1..64ffcf745 100644 --- a/examples/server/public_simplechat/datautils.mjs +++ b/examples/server/public_simplechat/datautils.mjs @@ -68,3 +68,55 @@ export function trim_repeat_garbage_at_end_loop(sIn, maxSubL, maxMatchLenThresho sCur = got.data; } } + + +/** + * A simple minded try trim garbage at end using histogram characteristics + * @param {string} sIn + * @param {number} maxSubL + * @param {number} maxMatchLenThreshold + */ +export function trim_hist_garbage_at_end(sIn, maxSubL, maxMatchLenThreshold) { + if (sIn.length < maxMatchLenThreshold) { + return { trimmed: false, data: sIn }; + } + // Learn + let hist = {}; + for(let i=0; i