llama : reduce compile time and binary size (#9712)
* llama : speed up compile time * fix build * fix build (2)
This commit is contained in:
parent
f536f4c439
commit
a39ab216aa
4 changed files with 35 additions and 26 deletions
|
@ -123,11 +123,11 @@ uint32_t unicode_cpt_from_utf8(const std::string & utf8, size_t & offset) {
|
|||
static std::vector<codepoint_flags> unicode_cpt_flags_array() {
|
||||
std::vector<codepoint_flags> cpt_flags(MAX_CODEPOINTS, codepoint_flags::UNDEFINED);
|
||||
|
||||
assert (unicode_ranges_flags.front().first == 0);
|
||||
assert (unicode_ranges_flags.back().first == MAX_CODEPOINTS);
|
||||
assert (unicode_ranges_flags.begin()[0].first == 0);
|
||||
assert (unicode_ranges_flags.begin()[unicode_ranges_flags.size()-1].first == MAX_CODEPOINTS);
|
||||
for (size_t i = 1; i < unicode_ranges_flags.size(); ++i) {
|
||||
const auto range_ini = unicode_ranges_flags[i-1]; // codepoint_ini, flags
|
||||
const auto range_end = unicode_ranges_flags[i]; // codepoint_end, flags
|
||||
const auto range_ini = unicode_ranges_flags.begin()[i-1]; // codepoint_ini, flags
|
||||
const auto range_end = unicode_ranges_flags.begin()[i]; // codepoint_end, flags
|
||||
for (uint32_t cpt = range_ini.first; cpt < range_end.first; ++cpt) {
|
||||
cpt_flags[cpt] = range_ini.second;
|
||||
}
|
||||
|
@ -597,7 +597,7 @@ std::vector<uint32_t> unicode_cpts_normalize_nfd(const std::vector<uint32_t> & c
|
|||
std::vector<uint32_t> result(cpts.size());
|
||||
for (size_t i = 0; i < cpts.size(); ++i) {
|
||||
const uint32_t cpt = cpts[i];
|
||||
auto it = std::upper_bound(unicode_ranges_nfd.cbegin(), unicode_ranges_nfd.cend(), cpt, comp) - 1;
|
||||
auto it = std::upper_bound(unicode_ranges_nfd.begin(), unicode_ranges_nfd.end(), cpt, comp) - 1;
|
||||
result[i] = (it->first <= cpt && cpt <= it->last) ? it->nfd : cpt;
|
||||
}
|
||||
return result;
|
||||
|
@ -639,8 +639,15 @@ uint8_t unicode_utf8_to_byte(const std::string & utf8) {
|
|||
}
|
||||
|
||||
uint32_t unicode_tolower(uint32_t cp) {
|
||||
auto it = unicode_map_lowercase.find(cp);
|
||||
return it == unicode_map_lowercase.end() ? cp : it->second;
|
||||
// binary search
|
||||
auto it = std::lower_bound(unicode_map_lowercase.begin(), unicode_map_lowercase.end(), cp,
|
||||
[](const std::pair<uint32_t, uint32_t> & pair, uint32_t value) {
|
||||
return pair.first < value;
|
||||
});
|
||||
if (it != unicode_map_lowercase.end() && it->first == cp) {
|
||||
return it->second;
|
||||
}
|
||||
return cp; // Return the original code point if no lowercase mapping is found
|
||||
}
|
||||
|
||||
std::vector<std::string> unicode_regex_split(const std::string & text, const std::vector<std::string> & regex_exprs) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue