use builtin tolower

This commit is contained in:
Douglas Hanley 2024-02-27 11:42:23 -06:00
parent 2dd36d6ddd
commit 6b33a09462

View file

@ -68,6 +68,7 @@
#include <cstdio> #include <cstdio>
#include <cstring> #include <cstring>
#include <ctime> #include <ctime>
#include <cwctype>
#include <forward_list> #include <forward_list>
#include <fstream> #include <fstream>
#include <functional> #include <functional>
@ -8955,15 +8956,15 @@ struct llm_tokenizer_wpm {
} }
uint32_t to_lower(uint32_t code) { uint32_t to_lower(uint32_t code) {
if ( #if defined(_WIN32)
(code >= 0x041 && code <= 0x05A) || // latin if (code > 0xFFFF) {
(code >= 0x391 && code <= 0x3A9) || // greek
(code >= 0x410 && code <= 0x42F) // cyrillic
) {
return code + 32;
}
return code; return code;
} }
return std::tolower(static_cast<wchar_t>(code), std::locale("en_US.UTF-8"));
#else
return std::tolower((wchar_t)code, std::locale("en_US.UTF-8"));
#endif
}
bool is_ascii_punct(uint32_t code) { bool is_ascii_punct(uint32_t code) {
return code < 256 && ispunct(code); return code < 256 && ispunct(code);