Using range_nfd instead of std::tuple

This commit is contained in:
jaime-m-p 2024-05-17 19:04:08 +02:00
parent 6ca6c46058
commit a28dfdc394
4 changed files with 13 additions and 8 deletions

View file

@ -128,7 +128,7 @@ for tuple in table_uppercase:
out("{0x%06X, 0x%06X}," % tuple) out("{0x%06X, 0x%06X}," % tuple)
out("};\n") out("};\n")
out("const std::vector<std::tuple<uint32_t, uint32_t, uint32_t>> unicode_ranges_nfd = { // start, last, nfd") out("const std::vector<range_nfd> unicode_ranges_nfd = { // start, last, nfd")
for triple in ranges_nfd: for triple in ranges_nfd:
out("{0x%06X, 0x%06X, 0x%06X}," % triple) out("{0x%06X, 0x%06X, 0x%06X}," % triple)
out("};\n") out("};\n")

View file

@ -5150,7 +5150,7 @@ const std::unordered_map<uint32_t, uint32_t> unicode_map_uppercase = {
{0x01E943, 0x01E921}, {0x01E943, 0x01E921},
}; };
const std::vector<std::tuple<uint32_t, uint32_t, uint32_t>> unicode_ranges_nfd = { // start, last, nfd const std::vector<range_nfd> unicode_ranges_nfd = { // start, last, nfd
{0x000000, 0x000000, 0x000000}, {0x000000, 0x000000, 0x000000},
{0x0000C0, 0x0000C5, 0x000041}, {0x0000C0, 0x0000C5, 0x000041},
{0x0000C7, 0x0000C7, 0x000043}, {0x0000C7, 0x0000C7, 0x000043},

View file

@ -5,10 +5,16 @@
#include <unordered_map> #include <unordered_map>
#include <unordered_set> #include <unordered_set>
struct range_nfd {
uint32_t first;
uint32_t last;
uint32_t nfd;
};
static const uint32_t MAX_CODEPOINTS = 0x110000; static const uint32_t MAX_CODEPOINTS = 0x110000;
extern const std::vector<std::pair<uint32_t, uint16_t>> unicode_ranges_flags; extern const std::vector<std::pair<uint32_t, uint16_t>> unicode_ranges_flags;
extern const std::unordered_set<uint32_t> unicode_set_whitespace; extern const std::unordered_set<uint32_t> unicode_set_whitespace;
extern const std::unordered_map<uint32_t, uint32_t> unicode_map_lowercase; extern const std::unordered_map<uint32_t, uint32_t> unicode_map_lowercase;
extern const std::unordered_map<uint32_t, uint32_t> unicode_map_uppercase; extern const std::unordered_map<uint32_t, uint32_t> unicode_map_uppercase;
extern const std::vector<std::tuple<uint32_t, uint32_t, uint32_t>> unicode_ranges_nfd; extern const std::vector<range_nfd> unicode_ranges_nfd;

View file

@ -135,7 +135,7 @@ static std::vector<codepoint_flags> unicode_cpt_flags_array() {
} }
for (auto &range : unicode_ranges_nfd) { // start, last, nfd for (auto &range : unicode_ranges_nfd) { // start, last, nfd
cpt_flags[std::get<2>(range)].is_nfd = true; cpt_flags[range.nfd].is_nfd = true;
} }
return cpt_flags; return cpt_flags;
@ -580,15 +580,14 @@ std::string unicode_cpt_to_utf8(uint32_t cp) {
} }
std::vector<uint32_t> unicode_cpts_normalize_nfd(const std::vector<uint32_t> & cpts) { std::vector<uint32_t> unicode_cpts_normalize_nfd(const std::vector<uint32_t> & cpts) {
// unicode_ranges_nfd[i] -> tuple(first, last, nfd) auto comp = [] (const uint32_t cpt, const range_nfd & range) {
auto comp = +[] (const uint32_t cpt, const decltype(unicode_ranges_nfd)::value_type & triple) { return cpt < range.first;
return cpt < std::get<0>(triple);
}; };
std::vector<uint32_t> result(cpts.size()); std::vector<uint32_t> result(cpts.size());
for (size_t i = 0; i < cpts.size(); ++i) { for (size_t i = 0; i < cpts.size(); ++i) {
const uint32_t cpt = cpts[i]; const uint32_t cpt = cpts[i];
auto it = std::upper_bound(unicode_ranges_nfd.cbegin(), unicode_ranges_nfd.cend(), cpt, comp) - 1; auto it = std::upper_bound(unicode_ranges_nfd.cbegin(), unicode_ranges_nfd.cend(), cpt, comp) - 1;
result[i] = (std::get<0>(*it) <= cpt && cpt <= std::get<1>(*it)) ? std::get<2>(*it) : cpt; result[i] = (it->first <= cpt && cpt <= it->last) ? it->nfd : cpt;
} }
return result; return result;
} }