use builtin tolower
This commit is contained in:
parent
2dd36d6ddd
commit
6b33a09462
1 changed files with 8 additions and 7 deletions
15
llama.cpp
15
llama.cpp
|
@ -68,6 +68,7 @@
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <ctime>
|
#include <ctime>
|
||||||
|
#include <cwctype>
|
||||||
#include <forward_list>
|
#include <forward_list>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
|
@ -8955,15 +8956,15 @@ struct llm_tokenizer_wpm {
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t to_lower(uint32_t code) {
|
uint32_t to_lower(uint32_t code) {
|
||||||
if (
|
#if defined(_WIN32)
|
||||||
(code >= 0x041 && code <= 0x05A) || // latin
|
if (code > 0xFFFF) {
|
||||||
(code >= 0x391 && code <= 0x3A9) || // greek
|
|
||||||
(code >= 0x410 && code <= 0x42F) // cyrillic
|
|
||||||
) {
|
|
||||||
return code + 32;
|
|
||||||
}
|
|
||||||
return code;
|
return code;
|
||||||
}
|
}
|
||||||
|
return std::tolower(static_cast<wchar_t>(code), std::locale("en_US.UTF-8"));
|
||||||
|
#else
|
||||||
|
return std::tolower((wchar_t)code, std::locale("en_US.UTF-8"));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
bool is_ascii_punct(uint32_t code) {
|
bool is_ascii_punct(uint32_t code) {
|
||||||
return code < 256 && ispunct(code);
|
return code < 256 && ispunct(code);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue