From f53c19baac7c75de73f6672204944f609004be11 Mon Sep 17 00:00:00 2001 From: HanishKVC Date: Sat, 4 May 2024 12:22:06 +0530 Subject: [PATCH] SimpCfg: Update the notes wrt tolower and add test code --- common/simpcfg.hpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/common/simpcfg.hpp b/common/simpcfg.hpp index 27fb9c56d..a04860714 100644 --- a/common/simpcfg.hpp +++ b/common/simpcfg.hpp @@ -213,8 +213,15 @@ TString str_trim_single(TString sin, const TString& trimChars=" \t\n") { return sin; } -// This works for NativeCharSize encoded chars, including in utf8 encoding space. -// This wont work for multibyte encoded chars. +// Convert to lower case, if language has upper and lower case semantic +// +// This works for fixed size encoded char spaces. +// +// For variable length encoded char spaces, it can work +// * if one is doing the conversion for languages which fit into NativeCharSized chars in it +// * AND if one is working with a sane variable length encoding standard +// * ex: this will work if trying to do the conversion for english language within utf-8 +// template TString str_tolower(const TString &sin) { TString sout; @@ -539,7 +546,8 @@ void check_nonenglish() { for (auto sTest: vTest1) { std::string sGotDumb = str_trim_dumb(sTest, {" \n\t"}); std::string sGotOSmart = str_trim_oversmart(sTest, {" \n\t"}); - std::cout << std::format("{}: Test1[{}] Dumb[{}] OverSmart[{}]", __func__, sTest, sGotDumb, sGotOSmart) << std::endl; + std::string sLower = str_tolower(sTest); + std::cout << std::format("{}: Test1 [{}]\n\tTrimDumb[{}]\n\tTrimOverSmart[{}]\n\tLowerDumb[{}]", __func__, sTest, sGotDumb, sGotOSmart, sLower) << std::endl; } // The string "\n\tthis र remove 0s and अs at end 000रअ0\xa4अ ", // * will mess up str_trim_dumb,