Update unicode data: sorted whitespaces
This commit is contained in:
parent
23cf064e3b
commit
ecebfc0c71
4 changed files with 4 additions and 6 deletions
|
@ -170,7 +170,7 @@ for rle in codepoint_categs_runs:
|
|||
out("0x%04X," % rle)
|
||||
out("};\n")
|
||||
|
||||
out("const std::unordered_set<uint32_t> unicode_set_whitespace = {")
|
||||
out("const std::vector<uint32_t> unicode_vec_whitespace = {")
|
||||
for codepoint in table_whitespace:
|
||||
out("0x%06X," % codepoint)
|
||||
out("};\n")
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
#include <cstdint>
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
|
||||
const std::vector<uint16_t> unicode_rle_codepoints_categs = { // run length encoding, 5 bits categ + 11 bits length
|
||||
0x03E1,
|
||||
|
@ -4527,7 +4526,7 @@ const std::vector<uint16_t> unicode_rle_codepoints_categs = { // run length enc
|
|||
0x0020,
|
||||
};
|
||||
|
||||
const std::unordered_set<uint32_t> unicode_set_whitespace = {
|
||||
const std::vector<uint32_t> unicode_vec_whitespace = {
|
||||
0x000009,
|
||||
0x00000A,
|
||||
0x00000B,
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
#include <cstdint>
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
|
||||
struct range_nfd {
|
||||
uint32_t first;
|
||||
|
@ -14,7 +13,7 @@ struct range_nfd {
|
|||
static const uint32_t MAX_CODEPOINTS = 0x110000;
|
||||
|
||||
extern const std::vector<uint16_t> unicode_rle_codepoints_categs;
|
||||
extern const std::unordered_set<uint32_t> unicode_set_whitespace;
|
||||
extern const std::vector<uint32_t> unicode_vec_whitespace;
|
||||
extern const std::unordered_map<uint32_t, uint32_t> unicode_map_lowercase;
|
||||
extern const std::unordered_map<uint32_t, uint32_t> unicode_map_uppercase;
|
||||
extern const std::vector<range_nfd> unicode_ranges_nfd;
|
||||
|
|
|
@ -591,7 +591,7 @@ codepoint_categ unicode_cpt_category(const uint32_t cp) {
|
|||
}
|
||||
assert (cpt == MAX_CODEPOINTS);
|
||||
|
||||
for (auto cpt : unicode_set_whitespace) {
|
||||
for (auto cpt : unicode_vec_whitespace) {
|
||||
cpt_categs[cpt].set_flag(codepoint_categ::WHITESPACE);
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue