unicode : fix data race for unidentified codepoints
ggml-ci
This commit is contained in:
parent
21851c11d1
commit
6c533edb94
1 changed files with 11 additions and 9 deletions
|
@ -344,8 +344,9 @@ static uint32_t codepoint_from_utf16(const std::vector<uint16_t> & utf16, size_t
|
||||||
static std::vector<uint32_t> codepoints_from_utf16(const std::vector<uint16_t> & utf16) {
|
static std::vector<uint32_t> codepoints_from_utf16(const std::vector<uint16_t> & utf16) {
|
||||||
std::vector<uint32_t> result;
|
std::vector<uint32_t> result;
|
||||||
size_t offset = 0;
|
size_t offset = 0;
|
||||||
while (offset < utf16.size())
|
while (offset < utf16.size()) {
|
||||||
result.push_back(codepoint_from_utf16(utf16, offset));
|
result.push_back(codepoint_from_utf16(utf16, offset));
|
||||||
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -393,12 +394,13 @@ static std::unordered_map<uint32_t, int> codepoint_type_map() {
|
||||||
|
|
||||||
static int codepoint_type(uint32_t cp) {
|
static int codepoint_type(uint32_t cp) {
|
||||||
static std::unordered_map<uint32_t, int> codepoint_types = codepoint_type_map();
|
static std::unordered_map<uint32_t, int> codepoint_types = codepoint_type_map();
|
||||||
return codepoint_types[cp];
|
return codepoint_types.find(cp) == codepoint_types.end() ? CODEPOINT_TYPE_UNIDENTIFIED : codepoint_types[cp];
|
||||||
}
|
}
|
||||||
|
|
||||||
static int codepoint_type(const std::string & utf8) {
|
static int codepoint_type(const std::string & utf8) {
|
||||||
if (utf8.length() == 0)
|
if (utf8.length() == 0) {
|
||||||
return CODEPOINT_TYPE_UNIDENTIFIED;
|
return CODEPOINT_TYPE_UNIDENTIFIED;
|
||||||
|
}
|
||||||
size_t offset = 0;
|
size_t offset = 0;
|
||||||
return codepoint_type(codepoint_from_utf8(utf8, offset));
|
return codepoint_type(codepoint_from_utf8(utf8, offset));
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue