From 3287fdba28530671dfac3144983a790b4cc1eba4 Mon Sep 17 00:00:00 2001 From: HanishKVC Date: Sat, 4 May 2024 11:43:15 +0530 Subject: [PATCH] SimpCfg:Fix/cleanup trim related test samples and flow Use the commonality between Indian languages to show mixup issue with the simple minded trim_dump logic and how trim_oversmart could potentially avoid that. Given that I am using valid strings to show the pitfalls of fixed native char size driven logic, so no need to keep the dump and oversmart flows seperate, so merge into a common loop. --- common/simpcfg.hpp | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/common/simpcfg.hpp b/common/simpcfg.hpp index 2a7f96d6b..27fb9c56d 100644 --- a/common/simpcfg.hpp +++ b/common/simpcfg.hpp @@ -541,18 +541,15 @@ void check_nonenglish() { std::string sGotOSmart = str_trim_oversmart(sTest, {" \n\t"}); std::cout << std::format("{}: Test1[{}] Dumb[{}] OverSmart[{}]", __func__, sTest, sGotDumb, sGotOSmart) << std::endl; } - std::vector vTest2 = { "\n\t this र remove 0s at end 000 ", "\n\tthis र remove 0s and अs at end 000रअ0अ ", "\n\tthis र remove 0s and अs at end 000रअ0\xa4अ "}; + // The string "\n\tthis र remove 0s and अs at end 000रअ0\xa4अ ", + // * will mess up str_trim_dumb, + // * but will rightly trigger a exception with oversmart. + std::vector vTest2 = { "\n\t this र remove 0s at end 000 ", "\n\tthis र remove 0s, अs, ഇs at end 000रअ0अ ", "\n\tthis र remove 0s, अs, ഇs at end 000रअ0इअ "}; + std::string trimChars = {" \n\tഇ0अ"}; for (auto sTest: vTest2) { - std::string sGotDumb = str_trim_dumb(sTest, {" \n\t0अ"}); - std::cout << std::format("{}: Test2[{}] Dumb[{}]", __func__, sTest, sGotDumb) << std::endl; - } - // This partly invalid utf8 string will mess up str_trim_dumb "\n\tthis र remove 0s and अs at end 000रअ0\xa4अ " - // but will trigger a exception with oversmart. - // std::vector vTest3 = { "\n\t this र remove 0s at end 000 ", "\n\tthis र remove 0s and अs at end 000रअ0अ ", "\n\tthis र remove 0s and अs at end 000रअ0\xa4अ "}; - std::vector vTest3 = { "\n\t this र remove 0s at end 000 ", "\n\tthis र remove 0s and अs at end 000रअ0अ ", "\n\tthis र remove 0s and अs at end 000रअ0\xe0\xa4\x30अ "}; // \xe0\xa4 - for (auto sTest: vTest3) { - std::string sGotOSmart = str_trim_oversmart(sTest, {" \n\t0अ"}); - std::cout << std::format("{}: Test3[{}] OverSmart[{}]", __func__, sTest, sGotOSmart) << std::endl; + std::string sGotDumb = str_trim_dumb(sTest, trimChars); + std::string sGotOSmart = str_trim_oversmart(sTest, trimChars); + std::cout << std::format("{}: Test2 [{}]\n\tDumb[{}]\n\tOverSmart[{}]", __func__, sTest, sGotDumb, sGotOSmart) << std::endl; } }