SimpCfg+DataUtilsString: Move string helpers to its own file
This commit is contained in:
parent
cdd91f5ad1
commit
bb3fe48c16
2 changed files with 223 additions and 213 deletions
220
common/datautils_string.hpp
Normal file
220
common/datautils_string.hpp
Normal file
|
@ -0,0 +1,220 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A bunch of helper routines to work with strings.
|
||||||
|
* by Humans for All
|
||||||
|
*
|
||||||
|
* ## Some notes for later
|
||||||
|
*
|
||||||
|
* NativeCharSize encoded char refers to chars which fit within the size of char type in a given
|
||||||
|
* type of c++ string or base bitsize of a encoding standard, like 1 byte in case of std::string,
|
||||||
|
* utf-8, ...
|
||||||
|
* * example english alphabets in utf-8 encoding space are 1byte chars, in its variable length
|
||||||
|
* encoding space.
|
||||||
|
*
|
||||||
|
* MultiNativeCharSize encoded char refers to chars which occupy multiple base-char-bit-size of
|
||||||
|
* a c++ string type or char encoding standard.
|
||||||
|
* * example indian scripts alphabets in utf-8 encoding space occupy multiple bytes in its variable
|
||||||
|
* length encoding space.
|
||||||
|
*
|
||||||
|
* Sane variable length encoding - refers to encoding where the values of NativeCharSized chars of
|
||||||
|
* a char encoding space cant overlap with values in NativeCharSize subparts of MultiNativeCharSized
|
||||||
|
* chars of the same char encoding standard.
|
||||||
|
* * utf-8 shows this behaviour
|
||||||
|
* * chances are utf-16 and utf-32 also show this behaviour (need to cross check once)
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "log.h"
|
||||||
|
|
||||||
|
|
||||||
|
#undef DUS_DEBUG_VERBOSE
|
||||||
|
|
||||||
|
#undef DUS_STR_OVERSMART
|
||||||
|
#ifdef DUS_STR_OVERSMART
|
||||||
|
#define str_trim str_trim_oversmart
|
||||||
|
#else
|
||||||
|
#define str_trim str_trim_dumb
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
inline size_t wcs_to_mbs(std::string &sDest, const std::wstring &wSrc) {
|
||||||
|
std::mbstate_t mbState = std::mbstate_t();
|
||||||
|
const wchar_t *wSrcP = wSrc.c_str();
|
||||||
|
auto reqLen = std::wcsrtombs(nullptr, &wSrcP, 0, &mbState);
|
||||||
|
if (reqLen == static_cast<std::size_t>(-1)) {
|
||||||
|
throw std::runtime_error("ERRR:WCS2MBS:Failed probing of size...");
|
||||||
|
}
|
||||||
|
sDest.resize(reqLen);
|
||||||
|
return std::wcsrtombs(sDest.data(), &wSrcP, sDest.length(), &mbState);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline size_t mbs_to_wcs(std::wstring &wDest, const std::string &sSrc) {
|
||||||
|
std::mbstate_t mbState = std::mbstate_t();
|
||||||
|
const char *sSrcP = sSrc.c_str();
|
||||||
|
auto reqLen = std::mbsrtowcs(nullptr, &sSrcP, 0, &mbState);
|
||||||
|
if (reqLen == static_cast<std::size_t>(-1)) {
|
||||||
|
throw std::runtime_error("ERRR:MBS2WCS:Failed probing of size...");
|
||||||
|
}
|
||||||
|
wDest.resize(reqLen);
|
||||||
|
return std::mbsrtowcs(wDest.data(), &sSrcP, wDest.length(), &mbState);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename TString>
|
||||||
|
inline void dumphex_string(const TString &sIn, const std::string &msgTag){
|
||||||
|
LDBUG("%s[ ", msgTag.c_str());
|
||||||
|
for(auto c: sIn) {
|
||||||
|
auto cSize = sizeof(c);
|
||||||
|
if (cSize == 1) {
|
||||||
|
LDBUG("%02x, ", (uint8_t)c);
|
||||||
|
} else if (cSize == 2) {
|
||||||
|
LDBUG("%04x, ", (uint16_t)c);
|
||||||
|
} else if (cSize == 4) {
|
||||||
|
LDBUG("%08x, ", (uint32_t)c);
|
||||||
|
} else {
|
||||||
|
std::stringstream ss;
|
||||||
|
ss << "ERRR:" << __func__ << ":Unsupported char type with size [" << cSize << "]";
|
||||||
|
throw std::runtime_error( ss.str().c_str() );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
LDBUG_LN(" ]");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove chars from begin and end of the passed string, provided the char
|
||||||
|
// belongs to one of the chars in trimChars.
|
||||||
|
//
|
||||||
|
// NOTE: This will work perfectly provided the string being trimmed as well as
|
||||||
|
// chars being trimmed are made up of NativeCharSize chars from same encoded space.
|
||||||
|
// For utf-8, this means the ascii equivalent 1byteSized chars of utf8 and not
|
||||||
|
// variable length MultiNativeCharSize (ie multibye in case of utf-8) ones.
|
||||||
|
// NOTE: It will also work, if atleast either end of string as well as trimChars
|
||||||
|
// have NativeCharSize chars from their encoding space, rather than variable
|
||||||
|
// length MultiNativeCharSize based chars if any. There needs to be NativeCharSized
|
||||||
|
// chars beyond any chars that get trimmed, on either side.
|
||||||
|
//
|
||||||
|
// NOTE: Given the way UTF-8 char encoding is designed, where NativeCharSize 1byte
|
||||||
|
// encoded chars are fully unique and dont overlap with any bytes from any of the
|
||||||
|
// variable length MultiNativeCharSize encoded chars in the utf-8 space, so as long as
|
||||||
|
// the trimChars belong to NativeCharSize chars subset, the logic should work, even
|
||||||
|
// if string has a mixture of NativeCharSize and MultiNativeCharSize encoded chars.
|
||||||
|
// Chances are utf-16 and utf-32 also have similar characteristics wrt thier
|
||||||
|
// NativeCharSize encoded chars (ie those fully encoded within single 16bit and 32bit
|
||||||
|
// value respectively), and so equivalent semantic applies to them also.
|
||||||
|
//
|
||||||
|
// ALERT: Given that this simple minded logic, works at individual NativeCharSize level
|
||||||
|
// only, If trimChars involve variable length MultiNativeCharSize encoded chars, then
|
||||||
|
// * because different NativeCharSize subparts (bytes in case of utf-8) from different
|
||||||
|
// MultiNativeCharSize trim chars when clubbed together can map to some other new char
|
||||||
|
// in a variable length encoded char space, if there is that new char at either end
|
||||||
|
// of the string, it may get trimmed, because of the possibility of mix up mentioned.
|
||||||
|
// * given that different variable length MultiNativeCharSize encoded chars may have
|
||||||
|
// some common NativeCharSize subparts (bytes in case of utf-8) between them, if one
|
||||||
|
// of these chars is at either end of the string and another char is in trimChars,
|
||||||
|
// then string may get partially trimmed wrt such a char at either end.
|
||||||
|
//
|
||||||
|
template <typename TString>
|
||||||
|
inline TString str_trim_dumb(TString sin, const TString &trimChars=" \t\n") {
|
||||||
|
#ifdef DUS_DEBUG_VERBOSE
|
||||||
|
dumphex_string(sin, "DBUG:StrTrimDumb:Str:");
|
||||||
|
dumphex_string(trimChars, "DBUG:StrTrimDumb:TrimChars:");
|
||||||
|
#endif
|
||||||
|
sin.erase(sin.find_last_not_of(trimChars)+1);
|
||||||
|
sin.erase(0, sin.find_first_not_of(trimChars));
|
||||||
|
return sin;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove chars from begin and end of the passed string, provided the char belongs
|
||||||
|
// to one of the chars in trimChars.
|
||||||
|
// NOTE: Internally converts to wchar/wstring to try and support proper trimming,
|
||||||
|
// wrt possibly more languages, to some extent. IE even if the passed string
|
||||||
|
// contains multibyte encoded characters in it in utf-8 space (ie MultiNativeCharSize),
|
||||||
|
// it may get converted to NativeCharSize chars in the expanded wchar_t encoding space,
|
||||||
|
// thus leading to fixed NativeCharSize driven logic itself handling things sufficiently.
|
||||||
|
// Look at str_trim_dumb comments for additional aspects.
|
||||||
|
inline std::string str_trim_oversmart(std::string sIn, const std::string &trimChars=" \t\n") {
|
||||||
|
std::wstring wIn;
|
||||||
|
mbs_to_wcs(wIn, sIn);
|
||||||
|
std::wstring wTrimChars;
|
||||||
|
mbs_to_wcs(wTrimChars, trimChars);
|
||||||
|
auto wOut = str_trim_dumb(wIn, wTrimChars);
|
||||||
|
std::string sOut;
|
||||||
|
wcs_to_mbs(sOut, wOut);
|
||||||
|
return sOut;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove atmost 1 char at the begin and 1 char at the end of the passed string,
|
||||||
|
// provided the char belongs to one of the chars in trimChars.
|
||||||
|
//
|
||||||
|
// NOTE: Chars being trimmed (ie in trimChars) needs to be part of NativeCharSize
|
||||||
|
// subset of the string's encoded char space, to avoid mix up when working with
|
||||||
|
// strings which can be utf-8/utf-16/utf-32/sane-variable-length encoded strings.
|
||||||
|
//
|
||||||
|
// NOTE:UTF8: This will work provided the string being trimmed as well the chars
|
||||||
|
// being trimmed are made up of 1byte encoded chars in case of utf8 encoding space.
|
||||||
|
// If the string being trimmed includes multibyte (ie MultiNativeCharSize) encoded
|
||||||
|
// characters at either end, then trimming can mess things up, if you have multibyte
|
||||||
|
// encoded utf-8 chars in the trimChars set.
|
||||||
|
//
|
||||||
|
// Currently given that SimpCfg only uses this with NativeCharSize chars in the
|
||||||
|
// trimChars and most of the platforms are likely to be using utf-8 based char
|
||||||
|
// space (which is a realtively sane variable length char encoding from this
|
||||||
|
// logics perspective), so not providing oversmart variant.
|
||||||
|
//
|
||||||
|
template <typename TString>
|
||||||
|
inline TString str_trim_single(TString sin, const TString& trimChars=" \t\n") {
|
||||||
|
if (sin.empty()) return sin;
|
||||||
|
for(auto c: trimChars) {
|
||||||
|
if (c == sin.front()) {
|
||||||
|
sin = sin.substr(1, TString::npos);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (sin.empty()) return sin;
|
||||||
|
for(auto c: trimChars) {
|
||||||
|
if (c == sin.back()) {
|
||||||
|
sin = sin.substr(0, sin.length()-1);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return sin;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert to lower case, if language has upper and lower case semantic
|
||||||
|
//
|
||||||
|
// This works for fixed size encoded char spaces.
|
||||||
|
//
|
||||||
|
// For variable length encoded char spaces, it can work
|
||||||
|
// * if one is doing the conversion for languages which fit into NativeCharSized chars in it
|
||||||
|
// * AND if one is working with a sane variable length encoding standard
|
||||||
|
// * ex: this will work if trying to do the conversion for english language within utf-8
|
||||||
|
//
|
||||||
|
template <typename TString>
|
||||||
|
inline TString str_tolower(const TString &sin) {
|
||||||
|
TString sout;
|
||||||
|
sout.resize(sin.size());
|
||||||
|
std::transform(sin.begin(), sin.end(), sout.begin(), [](auto c)->auto {return std::tolower(c);});
|
||||||
|
#ifdef DUS_DEBUG_VERBOSE
|
||||||
|
dumphex_string(sin, "DBUG:StrToLower:in:");
|
||||||
|
dumphex_string(sout, "DBUG:StrToLower:out:");
|
||||||
|
#endif
|
||||||
|
return sout;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void str_compare_dump(const std::string &s1, const std::string &s2) {
|
||||||
|
LDBUG_LN("DBUG:%s:%s:Len:%zu", __func__, s1.c_str(), s1.length());
|
||||||
|
LDBUG_LN("DBUG:%s:%s:Len:%zu", __func__, s2.c_str(), s2.length());
|
||||||
|
int minLen = s1.length() < s2.length() ? s1.length() : s2.length();
|
||||||
|
for(int i=0; i<minLen; i++) {
|
||||||
|
LDBUG_LN("DBUG:%s:%d:%c:%c", __func__, i, s1[i], s2[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<typename TypeWithStrSupp>
|
||||||
|
std::string str(TypeWithStrSupp value) {
|
||||||
|
std::stringstream ss;
|
||||||
|
ss << value;
|
||||||
|
return ss.str();
|
||||||
|
}
|
|
@ -4,6 +4,8 @@
|
||||||
* Provides a simple direct 1-level only config file logic
|
* Provides a simple direct 1-level only config file logic
|
||||||
* by Humans for All
|
* by Humans for All
|
||||||
*
|
*
|
||||||
|
* This builds on the GroupKV class.
|
||||||
|
*
|
||||||
* ## File format
|
* ## File format
|
||||||
*
|
*
|
||||||
* It can consist of multiple config groups.
|
* It can consist of multiple config groups.
|
||||||
|
@ -24,24 +26,6 @@
|
||||||
* It tries to provide a crude expanded form of array wrt any of the above supported types.
|
* It tries to provide a crude expanded form of array wrt any of the above supported types.
|
||||||
* For this one needs to define keys using the pattern TheKeyName-0, TheKeyName-1, ....
|
* For this one needs to define keys using the pattern TheKeyName-0, TheKeyName-1, ....
|
||||||
*
|
*
|
||||||
* ## Additional notes
|
|
||||||
*
|
|
||||||
* NativeCharSize encoded char refers to chars which fit within the size of char type in a given
|
|
||||||
* type of c++ string or base bitsize of a encoding standard, like 1 byte in case of std::string,
|
|
||||||
* utf-8, ...
|
|
||||||
* * example english alphabets in utf-8 encoding space are 1byte chars, in its variable length
|
|
||||||
* encoding space.
|
|
||||||
*
|
|
||||||
* MultiNativeCharSize encoded char refers to chars which occupy multiple base-char-bit-size of
|
|
||||||
* a c++ string type or char encoding standard.
|
|
||||||
* * example indian scripts alphabets in utf-8 encoding space occupy multiple bytes in its variable
|
|
||||||
* length encoding space.
|
|
||||||
*
|
|
||||||
* Sane variable length encoding - refers to encoding where the values of NativeCharSized chars of
|
|
||||||
* a char encoding space cant overlap with values in NativeCharSize subparts of MultiNativeCharSized
|
|
||||||
* chars of the same char encoding standard.
|
|
||||||
* * utf-8 shows this behaviour
|
|
||||||
* * chances are utf-16 and utf-32 also show this behaviour (need to cross check once)
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <map>
|
#include <map>
|
||||||
|
@ -54,203 +38,9 @@
|
||||||
#include <cuchar>
|
#include <cuchar>
|
||||||
|
|
||||||
#include "groupkv.hpp"
|
#include "groupkv.hpp"
|
||||||
|
#include "datautils_string.hpp"
|
||||||
|
|
||||||
|
|
||||||
#undef SC_DEBUG_VERBOSE
|
|
||||||
|
|
||||||
#undef SC_STR_OVERSMART
|
|
||||||
#ifdef SC_STR_OVERSMART
|
|
||||||
#define str_trim str_trim_oversmart
|
|
||||||
#else
|
|
||||||
#define str_trim str_trim_dumb
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
// **** **** **** String related helpers **** **** **** //
|
|
||||||
|
|
||||||
|
|
||||||
inline size_t wcs_to_mbs(std::string &sDest, const std::wstring &wSrc) {
|
|
||||||
std::mbstate_t mbState = std::mbstate_t();
|
|
||||||
const wchar_t *wSrcP = wSrc.c_str();
|
|
||||||
auto reqLen = std::wcsrtombs(nullptr, &wSrcP, 0, &mbState);
|
|
||||||
if (reqLen == static_cast<std::size_t>(-1)) {
|
|
||||||
throw std::runtime_error("ERRR:WCS2MBS:Failed probing of size...");
|
|
||||||
}
|
|
||||||
sDest.resize(reqLen);
|
|
||||||
return std::wcsrtombs(sDest.data(), &wSrcP, sDest.length(), &mbState);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline size_t mbs_to_wcs(std::wstring &wDest, const std::string &sSrc) {
|
|
||||||
std::mbstate_t mbState = std::mbstate_t();
|
|
||||||
const char *sSrcP = sSrc.c_str();
|
|
||||||
auto reqLen = std::mbsrtowcs(nullptr, &sSrcP, 0, &mbState);
|
|
||||||
if (reqLen == static_cast<std::size_t>(-1)) {
|
|
||||||
throw std::runtime_error("ERRR:MBS2WCS:Failed probing of size...");
|
|
||||||
}
|
|
||||||
wDest.resize(reqLen);
|
|
||||||
return std::mbsrtowcs(wDest.data(), &sSrcP, wDest.length(), &mbState);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename TString>
|
|
||||||
inline void dumphex_string(const TString &sIn, const std::string &msgTag){
|
|
||||||
LDBUG("%s[ ", msgTag.c_str());
|
|
||||||
for(auto c: sIn) {
|
|
||||||
auto cSize = sizeof(c);
|
|
||||||
if (cSize == 1) {
|
|
||||||
LDBUG("%02x, ", (uint8_t)c);
|
|
||||||
} else if (cSize == 2) {
|
|
||||||
LDBUG("%04x, ", (uint16_t)c);
|
|
||||||
} else if (cSize == 4) {
|
|
||||||
LDBUG("%08x, ", (uint32_t)c);
|
|
||||||
} else {
|
|
||||||
std::stringstream ss;
|
|
||||||
ss << "ERRR:" << __func__ << ":Unsupported char type with size [" << cSize << "]";
|
|
||||||
throw std::runtime_error( ss.str().c_str() );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
LDBUG_LN(" ]");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove chars from begin and end of the passed string, provided the char
|
|
||||||
// belongs to one of the chars in trimChars.
|
|
||||||
//
|
|
||||||
// NOTE: This will work perfectly provided the string being trimmed as well as
|
|
||||||
// chars being trimmed are made up of NativeCharSize chars from same encoded space.
|
|
||||||
// For utf-8, this means the ascii equivalent 1byteSized chars of utf8 and not
|
|
||||||
// variable length MultiNativeCharSize (ie multibye in case of utf-8) ones.
|
|
||||||
// NOTE: It will also work, if atleast either end of string as well as trimChars
|
|
||||||
// have NativeCharSize chars from their encoding space, rather than variable
|
|
||||||
// length MultiNativeCharSize based chars if any. There needs to be NativeCharSized
|
|
||||||
// chars beyond any chars that get trimmed, on either side.
|
|
||||||
//
|
|
||||||
// NOTE: Given the way UTF-8 char encoding is designed, where NativeCharSize 1byte
|
|
||||||
// encoded chars are fully unique and dont overlap with any bytes from any of the
|
|
||||||
// variable length MultiNativeCharSize encoded chars in the utf-8 space, so as long as
|
|
||||||
// the trimChars belong to NativeCharSize chars subset, the logic should work, even
|
|
||||||
// if string has a mixture of NativeCharSize and MultiNativeCharSize encoded chars.
|
|
||||||
// Chances are utf-16 and utf-32 also have similar characteristics wrt thier
|
|
||||||
// NativeCharSize encoded chars (ie those fully encoded within single 16bit and 32bit
|
|
||||||
// value respectively), and so equivalent semantic applies to them also.
|
|
||||||
//
|
|
||||||
// ALERT: Given that this simple minded logic, works at individual NativeCharSize level
|
|
||||||
// only, If trimChars involve variable length MultiNativeCharSize encoded chars, then
|
|
||||||
// * because different NativeCharSize subparts (bytes in case of utf-8) from different
|
|
||||||
// MultiNativeCharSize trim chars when clubbed together can map to some other new char
|
|
||||||
// in a variable length encoded char space, if there is that new char at either end
|
|
||||||
// of the string, it may get trimmed, because of the possibility of mix up mentioned.
|
|
||||||
// * given that different variable length MultiNativeCharSize encoded chars may have
|
|
||||||
// some common NativeCharSize subparts (bytes in case of utf-8) between them, if one
|
|
||||||
// of these chars is at either end of the string and another char is in trimChars,
|
|
||||||
// then string may get partially trimmed wrt such a char at either end.
|
|
||||||
//
|
|
||||||
template <typename TString>
|
|
||||||
inline TString str_trim_dumb(TString sin, const TString &trimChars=" \t\n") {
|
|
||||||
#ifdef SC_DEBUG_VERBOSE
|
|
||||||
dumphex_string(sin, "DBUG:StrTrimDumb:Str:");
|
|
||||||
dumphex_string(trimChars, "DBUG:StrTrimDumb:TrimChars:");
|
|
||||||
#endif
|
|
||||||
sin.erase(sin.find_last_not_of(trimChars)+1);
|
|
||||||
sin.erase(0, sin.find_first_not_of(trimChars));
|
|
||||||
return sin;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove chars from begin and end of the passed string, provided the char belongs
|
|
||||||
// to one of the chars in trimChars.
|
|
||||||
// NOTE: Internally converts to wchar/wstring to try and support proper trimming,
|
|
||||||
// wrt possibly more languages, to some extent. IE even if the passed string
|
|
||||||
// contains multibyte encoded characters in it in utf-8 space (ie MultiNativeCharSize),
|
|
||||||
// it may get converted to NativeCharSize chars in the expanded wchar_t encoding space,
|
|
||||||
// thus leading to fixed NativeCharSize driven logic itself handling things sufficiently.
|
|
||||||
// Look at str_trim_dumb comments for additional aspects.
|
|
||||||
inline std::string str_trim_oversmart(std::string sIn, const std::string &trimChars=" \t\n") {
|
|
||||||
std::wstring wIn;
|
|
||||||
mbs_to_wcs(wIn, sIn);
|
|
||||||
std::wstring wTrimChars;
|
|
||||||
mbs_to_wcs(wTrimChars, trimChars);
|
|
||||||
auto wOut = str_trim_dumb(wIn, wTrimChars);
|
|
||||||
std::string sOut;
|
|
||||||
wcs_to_mbs(sOut, wOut);
|
|
||||||
return sOut;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove atmost 1 char at the begin and 1 char at the end of the passed string,
|
|
||||||
// provided the char belongs to one of the chars in trimChars.
|
|
||||||
//
|
|
||||||
// NOTE: Chars being trimmed (ie in trimChars) needs to be part of NativeCharSize
|
|
||||||
// subset of the string's encoded char space, to avoid mix up when working with
|
|
||||||
// strings which can be utf-8/utf-16/utf-32/sane-variable-length encoded strings.
|
|
||||||
//
|
|
||||||
// NOTE:UTF8: This will work provided the string being trimmed as well the chars
|
|
||||||
// being trimmed are made up of 1byte encoded chars in case of utf8 encoding space.
|
|
||||||
// If the string being trimmed includes multibyte (ie MultiNativeCharSize) encoded
|
|
||||||
// characters at either end, then trimming can mess things up, if you have multibyte
|
|
||||||
// encoded utf-8 chars in the trimChars set.
|
|
||||||
//
|
|
||||||
// Currently given that SimpCfg only uses this with NativeCharSize chars in the
|
|
||||||
// trimChars and most of the platforms are likely to be using utf-8 based char
|
|
||||||
// space (which is a realtively sane variable length char encoding from this
|
|
||||||
// logics perspective), so not providing oversmart variant.
|
|
||||||
//
|
|
||||||
template <typename TString>
|
|
||||||
inline TString str_trim_single(TString sin, const TString& trimChars=" \t\n") {
|
|
||||||
if (sin.empty()) return sin;
|
|
||||||
for(auto c: trimChars) {
|
|
||||||
if (c == sin.front()) {
|
|
||||||
sin = sin.substr(1, TString::npos);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (sin.empty()) return sin;
|
|
||||||
for(auto c: trimChars) {
|
|
||||||
if (c == sin.back()) {
|
|
||||||
sin = sin.substr(0, sin.length()-1);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return sin;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Convert to lower case, if language has upper and lower case semantic
|
|
||||||
//
|
|
||||||
// This works for fixed size encoded char spaces.
|
|
||||||
//
|
|
||||||
// For variable length encoded char spaces, it can work
|
|
||||||
// * if one is doing the conversion for languages which fit into NativeCharSized chars in it
|
|
||||||
// * AND if one is working with a sane variable length encoding standard
|
|
||||||
// * ex: this will work if trying to do the conversion for english language within utf-8
|
|
||||||
//
|
|
||||||
template <typename TString>
|
|
||||||
inline TString str_tolower(const TString &sin) {
|
|
||||||
TString sout;
|
|
||||||
sout.resize(sin.size());
|
|
||||||
std::transform(sin.begin(), sin.end(), sout.begin(), [](auto c)->auto {return std::tolower(c);});
|
|
||||||
#ifdef SC_DEBUG_VERBOSE
|
|
||||||
dumphex_string(sin, "DBUG:StrToLower:in:");
|
|
||||||
dumphex_string(sout, "DBUG:StrToLower:out:");
|
|
||||||
#endif
|
|
||||||
return sout;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void str_compare_dump(const std::string &s1, const std::string &s2) {
|
|
||||||
LDBUG_LN("DBUG:%s:%s:Len:%zu", __func__, s1.c_str(), s1.length());
|
|
||||||
LDBUG_LN("DBUG:%s:%s:Len:%zu", __func__, s2.c_str(), s2.length());
|
|
||||||
int minLen = s1.length() < s2.length() ? s1.length() : s2.length();
|
|
||||||
for(int i=0; i<minLen; i++) {
|
|
||||||
LDBUG_LN("DBUG:%s:%d:%c:%c", __func__, i, s1[i], s2[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template<typename TypeWithStrSupp>
|
|
||||||
std::string str(TypeWithStrSupp value) {
|
|
||||||
std::stringstream ss;
|
|
||||||
ss << value;
|
|
||||||
return ss.str();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// **** **** **** the SimpCfg **** **** **** //
|
|
||||||
|
|
||||||
|
|
||||||
class SimpCfg : public GroupKV {
|
class SimpCfg : public GroupKV {
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue