Fix unicode_ranges_nfd

This commit is contained in:
jaime-m-p 2024-05-14 22:04:07 +02:00
parent 641944a3a3
commit 1714e1a775
3 changed files with 26 additions and 13 deletions

View file

@ -83,7 +83,7 @@ ranges_flags.append((MAX_CODEPOINTS, CoodepointFlags()))
ranges_nfd = [(0, 0, 0)] # start, last, nfd ranges_nfd = [(0, 0, 0)] # start, last, nfd
for codepoint, norm in table_nfd: for codepoint, norm in table_nfd:
start = ranges_nfd[-1][0] start = ranges_nfd[-1][0]
if norm != ranges_nfd[-1][2]: if ranges_nfd[-1] != (start, codepoint - 1, norm):
ranges_nfd.append(None) ranges_nfd.append(None)
start = codepoint start = codepoint
ranges_nfd[-1] = (start, codepoint, norm) ranges_nfd[-1] = (start, codepoint, norm)

View file

@ -5167,7 +5167,8 @@ const std::vector<std::tuple<uint32_t, uint32_t, uint32_t>> unicode_ranges_nfd =
{0x0000F1, 0x0000F1, 0x00006E}, {0x0000F1, 0x0000F1, 0x00006E},
{0x0000F2, 0x0000F6, 0x00006F}, {0x0000F2, 0x0000F6, 0x00006F},
{0x0000F9, 0x0000FC, 0x000075}, {0x0000F9, 0x0000FC, 0x000075},
{0x0000FD, 0x0000FF, 0x000079}, {0x0000FD, 0x0000FD, 0x000079},
{0x0000FF, 0x0000FF, 0x000079},
{0x000100, 0x000100, 0x000041}, {0x000100, 0x000100, 0x000041},
{0x000101, 0x000101, 0x000061}, {0x000101, 0x000101, 0x000061},
{0x000102, 0x000102, 0x000041}, {0x000102, 0x000102, 0x000041},
@ -5474,7 +5475,8 @@ const std::vector<std::tuple<uint32_t, uint32_t, uint32_t>> unicode_ranges_nfd =
{0x000A5A, 0x000A5A, 0x000A17}, {0x000A5A, 0x000A5A, 0x000A17},
{0x000A5B, 0x000A5B, 0x000A1C}, {0x000A5B, 0x000A5B, 0x000A1C},
{0x000A5E, 0x000A5E, 0x000A2B}, {0x000A5E, 0x000A5E, 0x000A2B},
{0x000B48, 0x000B4C, 0x000B47}, {0x000B48, 0x000B48, 0x000B47},
{0x000B4B, 0x000B4C, 0x000B47},
{0x000B5C, 0x000B5C, 0x000B21}, {0x000B5C, 0x000B5C, 0x000B21},
{0x000B5D, 0x000B5D, 0x000B22}, {0x000B5D, 0x000B5D, 0x000B22},
{0x000B94, 0x000B94, 0x000B92}, {0x000B94, 0x000B94, 0x000B92},
@ -5483,18 +5485,21 @@ const std::vector<std::tuple<uint32_t, uint32_t, uint32_t>> unicode_ranges_nfd =
{0x000BCC, 0x000BCC, 0x000BC6}, {0x000BCC, 0x000BCC, 0x000BC6},
{0x000C48, 0x000C48, 0x000C46}, {0x000C48, 0x000C48, 0x000C46},
{0x000CC0, 0x000CC0, 0x000CBF}, {0x000CC0, 0x000CC0, 0x000CBF},
{0x000CC7, 0x000CCB, 0x000CC6}, {0x000CC7, 0x000CC8, 0x000CC6},
{0x000CCA, 0x000CCB, 0x000CC6},
{0x000D4A, 0x000D4A, 0x000D46}, {0x000D4A, 0x000D4A, 0x000D46},
{0x000D4B, 0x000D4B, 0x000D47}, {0x000D4B, 0x000D4B, 0x000D47},
{0x000D4C, 0x000D4C, 0x000D46}, {0x000D4C, 0x000D4C, 0x000D46},
{0x000DDA, 0x000DDE, 0x000DD9}, {0x000DDA, 0x000DDA, 0x000DD9},
{0x000DDC, 0x000DDE, 0x000DD9},
{0x000F43, 0x000F43, 0x000F42}, {0x000F43, 0x000F43, 0x000F42},
{0x000F4D, 0x000F4D, 0x000F4C}, {0x000F4D, 0x000F4D, 0x000F4C},
{0x000F52, 0x000F52, 0x000F51}, {0x000F52, 0x000F52, 0x000F51},
{0x000F57, 0x000F57, 0x000F56}, {0x000F57, 0x000F57, 0x000F56},
{0x000F5C, 0x000F5C, 0x000F5B}, {0x000F5C, 0x000F5C, 0x000F5B},
{0x000F69, 0x000F69, 0x000F40}, {0x000F69, 0x000F69, 0x000F40},
{0x000F73, 0x000F75, 0x000F71}, {0x000F73, 0x000F73, 0x000F71},
{0x000F75, 0x000F75, 0x000F71},
{0x000F76, 0x000F76, 0x000FB2}, {0x000F76, 0x000F76, 0x000FB2},
{0x000F78, 0x000F78, 0x000FB3}, {0x000F78, 0x000F78, 0x000FB3},
{0x000F81, 0x000F81, 0x000F71}, {0x000F81, 0x000F81, 0x000F71},
@ -5772,7 +5777,10 @@ const std::vector<std::tuple<uint32_t, uint32_t, uint32_t>> unicode_ranges_nfd =
{0x001F40, 0x001F45, 0x0003BF}, {0x001F40, 0x001F45, 0x0003BF},
{0x001F48, 0x001F4D, 0x00039F}, {0x001F48, 0x001F4D, 0x00039F},
{0x001F50, 0x001F57, 0x0003C5}, {0x001F50, 0x001F57, 0x0003C5},
{0x001F59, 0x001F5F, 0x0003A5}, {0x001F59, 0x001F59, 0x0003A5},
{0x001F5B, 0x001F5B, 0x0003A5},
{0x001F5D, 0x001F5D, 0x0003A5},
{0x001F5F, 0x001F5F, 0x0003A5},
{0x001F60, 0x001F67, 0x0003C9}, {0x001F60, 0x001F67, 0x0003C9},
{0x001F68, 0x001F6F, 0x0003A9}, {0x001F68, 0x001F6F, 0x0003A9},
{0x001F70, 0x001F71, 0x0003B1}, {0x001F70, 0x001F71, 0x0003B1},
@ -5788,15 +5796,18 @@ const std::vector<std::tuple<uint32_t, uint32_t, uint32_t>> unicode_ranges_nfd =
{0x001F98, 0x001F9F, 0x000397}, {0x001F98, 0x001F9F, 0x000397},
{0x001FA0, 0x001FA7, 0x0003C9}, {0x001FA0, 0x001FA7, 0x0003C9},
{0x001FA8, 0x001FAF, 0x0003A9}, {0x001FA8, 0x001FAF, 0x0003A9},
{0x001FB0, 0x001FB7, 0x0003B1}, {0x001FB0, 0x001FB4, 0x0003B1},
{0x001FB6, 0x001FB7, 0x0003B1},
{0x001FB8, 0x001FBC, 0x000391}, {0x001FB8, 0x001FBC, 0x000391},
{0x001FBE, 0x001FBE, 0x0003B9}, {0x001FBE, 0x001FBE, 0x0003B9},
{0x001FC1, 0x001FC1, 0x0000A8}, {0x001FC1, 0x001FC1, 0x0000A8},
{0x001FC2, 0x001FC7, 0x0003B7}, {0x001FC2, 0x001FC4, 0x0003B7},
{0x001FC6, 0x001FC7, 0x0003B7},
{0x001FC8, 0x001FC9, 0x000395}, {0x001FC8, 0x001FC9, 0x000395},
{0x001FCA, 0x001FCC, 0x000397}, {0x001FCA, 0x001FCC, 0x000397},
{0x001FCD, 0x001FCF, 0x001FBF}, {0x001FCD, 0x001FCF, 0x001FBF},
{0x001FD0, 0x001FD7, 0x0003B9}, {0x001FD0, 0x001FD3, 0x0003B9},
{0x001FD6, 0x001FD7, 0x0003B9},
{0x001FD8, 0x001FDB, 0x000399}, {0x001FD8, 0x001FDB, 0x000399},
{0x001FDD, 0x001FDF, 0x001FFE}, {0x001FDD, 0x001FDF, 0x001FFE},
{0x001FE0, 0x001FE3, 0x0003C5}, {0x001FE0, 0x001FE3, 0x0003C5},
@ -5806,7 +5817,8 @@ const std::vector<std::tuple<uint32_t, uint32_t, uint32_t>> unicode_ranges_nfd =
{0x001FEC, 0x001FEC, 0x0003A1}, {0x001FEC, 0x001FEC, 0x0003A1},
{0x001FED, 0x001FEE, 0x0000A8}, {0x001FED, 0x001FEE, 0x0000A8},
{0x001FEF, 0x001FEF, 0x000060}, {0x001FEF, 0x001FEF, 0x000060},
{0x001FF2, 0x001FF7, 0x0003C9}, {0x001FF2, 0x001FF4, 0x0003C9},
{0x001FF6, 0x001FF7, 0x0003C9},
{0x001FF8, 0x001FF9, 0x00039F}, {0x001FF8, 0x001FF9, 0x00039F},
{0x001FFA, 0x001FFC, 0x0003A9}, {0x001FFA, 0x001FFC, 0x0003A9},
{0x001FFD, 0x001FFD, 0x0000B4}, {0x001FFD, 0x001FFD, 0x0000B4},
@ -6422,7 +6434,8 @@ const std::vector<std::tuple<uint32_t, uint32_t, uint32_t>> unicode_ranges_nfd =
{0x01112E, 0x01112E, 0x011131}, {0x01112E, 0x01112E, 0x011131},
{0x01112F, 0x01112F, 0x011132}, {0x01112F, 0x01112F, 0x011132},
{0x01134B, 0x01134C, 0x011347}, {0x01134B, 0x01134C, 0x011347},
{0x0114BB, 0x0114BE, 0x0114B9}, {0x0114BB, 0x0114BC, 0x0114B9},
{0x0114BE, 0x0114BE, 0x0114B9},
{0x0115BA, 0x0115BA, 0x0115B8}, {0x0115BA, 0x0115BA, 0x0115B8},
{0x0115BB, 0x0115BB, 0x0115B9}, {0x0115BB, 0x0115BB, 0x0115B9},
{0x011938, 0x011938, 0x011935}, {0x011938, 0x011938, 0x011935},

View file

@ -5,7 +5,7 @@
#include <unordered_map> #include <unordered_map>
#include <unordered_set> #include <unordered_set>
static const size_t MAX_CODEPOINTS = 0x110000; static const uint32_t MAX_CODEPOINTS = 0x110000;
extern const std::vector<std::pair<uint32_t, uint16_t>> unicode_ranges_flags; extern const std::vector<std::pair<uint32_t, uint16_t>> unicode_ranges_flags;
extern const std::unordered_set<uint32_t> unicode_set_whitespace; extern const std::unordered_set<uint32_t> unicode_set_whitespace;