unicode : fix data race for unidentified codepoints
ggml-ci
This commit is contained in:
parent
21851c11d1
commit
6c533edb94
1 changed files with 11 additions and 9 deletions
|
@ -344,8 +344,9 @@ static uint32_t codepoint_from_utf16(const std::vector<uint16_t> & utf16, size_t
|
|||
static std::vector<uint32_t> codepoints_from_utf16(const std::vector<uint16_t> & utf16) {
|
||||
std::vector<uint32_t> result;
|
||||
size_t offset = 0;
|
||||
while (offset < utf16.size())
|
||||
while (offset < utf16.size()) {
|
||||
result.push_back(codepoint_from_utf16(utf16, offset));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -393,12 +394,13 @@ static std::unordered_map<uint32_t, int> codepoint_type_map() {
|
|||
|
||||
static int codepoint_type(uint32_t cp) {
|
||||
static std::unordered_map<uint32_t, int> codepoint_types = codepoint_type_map();
|
||||
return codepoint_types[cp];
|
||||
return codepoint_types.find(cp) == codepoint_types.end() ? CODEPOINT_TYPE_UNIDENTIFIED : codepoint_types[cp];
|
||||
}
|
||||
|
||||
static int codepoint_type(const std::string & utf8) {
|
||||
if (utf8.length() == 0)
|
||||
if (utf8.length() == 0) {
|
||||
return CODEPOINT_TYPE_UNIDENTIFIED;
|
||||
}
|
||||
size_t offset = 0;
|
||||
return codepoint_type(codepoint_from_utf8(utf8, offset));
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue