From a28dfdc394dad6e7aee165cd7dbe1ab18a87b42b Mon Sep 17 00:00:00 2001 From: jaime-m-p <> Date: Fri, 17 May 2024 19:04:08 +0200 Subject: [PATCH] Using range_nfd instead of std::tuple --- scripts/gen-unicode-data.py | 2 +- unicode-data.cpp | 2 +- unicode-data.h | 8 +++++++- unicode.cpp | 9 ++++----- 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/scripts/gen-unicode-data.py b/scripts/gen-unicode-data.py index 06fab5372..744873c2a 100644 --- a/scripts/gen-unicode-data.py +++ b/scripts/gen-unicode-data.py @@ -128,7 +128,7 @@ for tuple in table_uppercase: out("{0x%06X, 0x%06X}," % tuple) out("};\n") -out("const std::vector> unicode_ranges_nfd = { // start, last, nfd") +out("const std::vector unicode_ranges_nfd = { // start, last, nfd") for triple in ranges_nfd: out("{0x%06X, 0x%06X, 0x%06X}," % triple) out("};\n") diff --git a/unicode-data.cpp b/unicode-data.cpp index 007161f1b..d7c1c898d 100644 --- a/unicode-data.cpp +++ b/unicode-data.cpp @@ -5150,7 +5150,7 @@ const std::unordered_map unicode_map_uppercase = { {0x01E943, 0x01E921}, }; -const std::vector> unicode_ranges_nfd = { // start, last, nfd +const std::vector unicode_ranges_nfd = { // start, last, nfd {0x000000, 0x000000, 0x000000}, {0x0000C0, 0x0000C5, 0x000041}, {0x0000C7, 0x0000C7, 0x000043}, diff --git a/unicode-data.h b/unicode-data.h index ce26748d0..e27fe1770 100644 --- a/unicode-data.h +++ b/unicode-data.h @@ -5,10 +5,16 @@ #include #include +struct range_nfd { + uint32_t first; + uint32_t last; + uint32_t nfd; +}; + static const uint32_t MAX_CODEPOINTS = 0x110000; extern const std::vector> unicode_ranges_flags; extern const std::unordered_set unicode_set_whitespace; extern const std::unordered_map unicode_map_lowercase; extern const std::unordered_map unicode_map_uppercase; -extern const std::vector> unicode_ranges_nfd; +extern const std::vector unicode_ranges_nfd; diff --git a/unicode.cpp b/unicode.cpp index e47d1cc2e..056a4c741 100644 --- a/unicode.cpp +++ b/unicode.cpp @@ -135,7 +135,7 @@ static std::vector unicode_cpt_flags_array() { } for (auto &range : unicode_ranges_nfd) { // start, last, nfd - cpt_flags[std::get<2>(range)].is_nfd = true; + cpt_flags[range.nfd].is_nfd = true; } return cpt_flags; @@ -580,15 +580,14 @@ std::string unicode_cpt_to_utf8(uint32_t cp) { } std::vector unicode_cpts_normalize_nfd(const std::vector & cpts) { - // unicode_ranges_nfd[i] -> tuple(first, last, nfd) - auto comp = +[] (const uint32_t cpt, const decltype(unicode_ranges_nfd)::value_type & triple) { - return cpt < std::get<0>(triple); + auto comp = [] (const uint32_t cpt, const range_nfd & range) { + return cpt < range.first; }; std::vector result(cpts.size()); for (size_t i = 0; i < cpts.size(); ++i) { const uint32_t cpt = cpts[i]; auto it = std::upper_bound(unicode_ranges_nfd.cbegin(), unicode_ranges_nfd.cend(), cpt, comp) - 1; - result[i] = (std::get<0>(*it) <= cpt && cpt <= std::get<1>(*it)) ? std::get<2>(*it) : cpt; + result[i] = (it->first <= cpt && cpt <= it->last) ? it->nfd : cpt; } return result; }