From ecebfc0c718c81b7f0d6d6552e81c71fe9bf2053 Mon Sep 17 00:00:00 2001 From: jaime-m-p <> Date: Fri, 26 Jul 2024 00:16:24 +0200 Subject: [PATCH] Update unicode data: sorted whitespaces --- scripts/gen-unicode-data.py | 2 +- src/unicode-data.cpp | 3 +-- src/unicode-data.h | 3 +-- src/unicode.cpp | 2 +- 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/scripts/gen-unicode-data.py b/scripts/gen-unicode-data.py index 542a9edba..d774fcabe 100644 --- a/scripts/gen-unicode-data.py +++ b/scripts/gen-unicode-data.py @@ -170,7 +170,7 @@ for rle in codepoint_categs_runs: out("0x%04X," % rle) out("};\n") -out("const std::unordered_set unicode_set_whitespace = {") +out("const std::vector unicode_vec_whitespace = {") for codepoint in table_whitespace: out("0x%06X," % codepoint) out("};\n") diff --git a/src/unicode-data.cpp b/src/unicode-data.cpp index 4a0c0547c..2591723ce 100644 --- a/src/unicode-data.cpp +++ b/src/unicode-data.cpp @@ -5,7 +5,6 @@ #include #include #include -#include const std::vector unicode_rle_codepoints_categs = { // run length encoding, 5 bits categ + 11 bits length 0x03E1, @@ -4527,7 +4526,7 @@ const std::vector unicode_rle_codepoints_categs = { // run length enc 0x0020, }; -const std::unordered_set unicode_set_whitespace = { +const std::vector unicode_vec_whitespace = { 0x000009, 0x00000A, 0x00000B, diff --git a/src/unicode-data.h b/src/unicode-data.h index cd6a6451a..682f79c37 100644 --- a/src/unicode-data.h +++ b/src/unicode-data.h @@ -3,7 +3,6 @@ #include #include #include -#include struct range_nfd { uint32_t first; @@ -14,7 +13,7 @@ struct range_nfd { static const uint32_t MAX_CODEPOINTS = 0x110000; extern const std::vector unicode_rle_codepoints_categs; -extern const std::unordered_set unicode_set_whitespace; +extern const std::vector unicode_vec_whitespace; extern const std::unordered_map unicode_map_lowercase; extern const std::unordered_map unicode_map_uppercase; extern const std::vector unicode_ranges_nfd; diff --git a/src/unicode.cpp b/src/unicode.cpp index 4c3335974..dd413c809 100644 --- a/src/unicode.cpp +++ b/src/unicode.cpp @@ -591,7 +591,7 @@ codepoint_categ unicode_cpt_category(const uint32_t cp) { } assert (cpt == MAX_CODEPOINTS); - for (auto cpt : unicode_set_whitespace) { + for (auto cpt : unicode_vec_whitespace) { cpt_categs[cpt].set_flag(codepoint_categ::WHITESPACE); }