Update unicode data: sorted whitespaces

This commit is contained in:
jaime-m-p 2024-07-26 00:16:24 +02:00
parent 23cf064e3b
commit ecebfc0c71
4 changed files with 4 additions and 6 deletions

View file

@ -170,7 +170,7 @@ for rle in codepoint_categs_runs:
out("0x%04X," % rle)
out("};\n")
out("const std::unordered_set<uint32_t> unicode_set_whitespace = {")
out("const std::vector<uint32_t> unicode_vec_whitespace = {")
for codepoint in table_whitespace:
out("0x%06X," % codepoint)
out("};\n")

View file

@ -5,7 +5,6 @@
#include <cstdint>
#include <vector>
#include <unordered_map>
#include <unordered_set>
const std::vector<uint16_t> unicode_rle_codepoints_categs = { // run length encoding, 5 bits categ + 11 bits length
0x03E1,
@ -4527,7 +4526,7 @@ const std::vector<uint16_t> unicode_rle_codepoints_categs = { // run length enc
0x0020,
};
const std::unordered_set<uint32_t> unicode_set_whitespace = {
const std::vector<uint32_t> unicode_vec_whitespace = {
0x000009,
0x00000A,
0x00000B,

View file

@ -3,7 +3,6 @@
#include <cstdint>
#include <vector>
#include <unordered_map>
#include <unordered_set>
struct range_nfd {
uint32_t first;
@ -14,7 +13,7 @@ struct range_nfd {
static const uint32_t MAX_CODEPOINTS = 0x110000;
extern const std::vector<uint16_t> unicode_rle_codepoints_categs;
extern const std::unordered_set<uint32_t> unicode_set_whitespace;
extern const std::vector<uint32_t> unicode_vec_whitespace;
extern const std::unordered_map<uint32_t, uint32_t> unicode_map_lowercase;
extern const std::unordered_map<uint32_t, uint32_t> unicode_map_uppercase;
extern const std::vector<range_nfd> unicode_ranges_nfd;

View file

@ -591,7 +591,7 @@ codepoint_categ unicode_cpt_category(const uint32_t cp) {
}
assert (cpt == MAX_CODEPOINTS);
for (auto cpt : unicode_set_whitespace) {
for (auto cpt : unicode_vec_whitespace) {
cpt_categs[cpt].set_flag(codepoint_categ::WHITESPACE);
}