SimpCfg:Fix/cleanup trim related test samples and flow

Use the commonality between Indian languages to show mixup issue
with the simple minded trim_dump logic and how trim_oversmart
could potentially avoid that.

Given that I am using valid strings to show the pitfalls of fixed
native char size driven logic, so no need to keep the dump and
oversmart flows seperate, so merge into a common loop.
This commit is contained in:
HanishKVC 2024-05-04 11:43:15 +05:30
parent 33619a3b92
commit 3287fdba28

View file

@ -541,18 +541,15 @@ void check_nonenglish() {
std::string sGotOSmart = str_trim_oversmart(sTest, {" \n\t"}); std::string sGotOSmart = str_trim_oversmart(sTest, {" \n\t"});
std::cout << std::format("{}: Test1[{}] Dumb[{}] OverSmart[{}]", __func__, sTest, sGotDumb, sGotOSmart) << std::endl; std::cout << std::format("{}: Test1[{}] Dumb[{}] OverSmart[{}]", __func__, sTest, sGotDumb, sGotOSmart) << std::endl;
} }
std::vector<std::string> vTest2 = { "\n\t this र remove 0s at end 000 ", "\n\tthis र remove 0s and अs at end 000रअ0अ ", "\n\tthis र remove 0s and अs at end 000रअ0\xa4"}; // The string "\n\tthis र remove 0s and अs at end 000रअ0\xa4अ ",
// * will mess up str_trim_dumb,
// * but will rightly trigger a exception with oversmart.
std::vector<std::string> vTest2 = { "\n\t this र remove 0s at end 000 ", "\n\tthis र remove 0s, अs, ഇs at end 000रअ0अ ", "\n\tthis र remove 0s, अs, ഇs at end 000रअ0इअ "};
std::string trimChars = {" \n\tഇ0अ"};
for (auto sTest: vTest2) { for (auto sTest: vTest2) {
std::string sGotDumb = str_trim_dumb(sTest, {" \n\t0अ"}); std::string sGotDumb = str_trim_dumb(sTest, trimChars);
std::cout << std::format("{}: Test2[{}] Dumb[{}]", __func__, sTest, sGotDumb) << std::endl; std::string sGotOSmart = str_trim_oversmart(sTest, trimChars);
} std::cout << std::format("{}: Test2 [{}]\n\tDumb[{}]\n\tOverSmart[{}]", __func__, sTest, sGotDumb, sGotOSmart) << std::endl;
// This partly invalid utf8 string will mess up str_trim_dumb "\n\tthis र remove 0s and अs at end 000रअ0\xa4अ "
// but will trigger a exception with oversmart.
// std::vector<std::string> vTest3 = { "\n\t this र remove 0s at end 000 ", "\n\tthis र remove 0s and अs at end 000रअ0अ ", "\n\tthis र remove 0s and अs at end 000रअ0\xa4अ "};
std::vector<std::string> vTest3 = { "\n\t this र remove 0s at end 000 ", "\n\tthis र remove 0s and अs at end 000रअ0अ ", "\n\tthis र remove 0s and अs at end 000रअ0\xe0\xa4\x30"}; // \xe0\xa4
for (auto sTest: vTest3) {
std::string sGotOSmart = str_trim_oversmart(sTest, {" \n\t0अ"});
std::cout << std::format("{}: Test3[{}] OverSmart[{}]", __func__, sTest, sGotOSmart) << std::endl;
} }
} }