diff --git a/scripts/gen-unicode-data.py b/scripts/gen-unicode-data.py index 4091c05ee..e5ec1797c 100644 --- a/scripts/gen-unicode-data.py +++ b/scripts/gen-unicode-data.py @@ -83,7 +83,7 @@ ranges_flags.append((MAX_CODEPOINTS, CoodepointFlags())) ranges_nfd = [(0, 0, 0)] # start, last, nfd for codepoint, norm in table_nfd: start = ranges_nfd[-1][0] - if norm != ranges_nfd[-1][2]: + if ranges_nfd[-1] != (start, codepoint - 1, norm): ranges_nfd.append(None) start = codepoint ranges_nfd[-1] = (start, codepoint, norm) diff --git a/unicode-data.cpp b/unicode-data.cpp index 96f318831..007161f1b 100644 --- a/unicode-data.cpp +++ b/unicode-data.cpp @@ -5167,7 +5167,8 @@ const std::vector> unicode_ranges_nfd = {0x0000F1, 0x0000F1, 0x00006E}, {0x0000F2, 0x0000F6, 0x00006F}, {0x0000F9, 0x0000FC, 0x000075}, -{0x0000FD, 0x0000FF, 0x000079}, +{0x0000FD, 0x0000FD, 0x000079}, +{0x0000FF, 0x0000FF, 0x000079}, {0x000100, 0x000100, 0x000041}, {0x000101, 0x000101, 0x000061}, {0x000102, 0x000102, 0x000041}, @@ -5474,7 +5475,8 @@ const std::vector> unicode_ranges_nfd = {0x000A5A, 0x000A5A, 0x000A17}, {0x000A5B, 0x000A5B, 0x000A1C}, {0x000A5E, 0x000A5E, 0x000A2B}, -{0x000B48, 0x000B4C, 0x000B47}, +{0x000B48, 0x000B48, 0x000B47}, +{0x000B4B, 0x000B4C, 0x000B47}, {0x000B5C, 0x000B5C, 0x000B21}, {0x000B5D, 0x000B5D, 0x000B22}, {0x000B94, 0x000B94, 0x000B92}, @@ -5483,18 +5485,21 @@ const std::vector> unicode_ranges_nfd = {0x000BCC, 0x000BCC, 0x000BC6}, {0x000C48, 0x000C48, 0x000C46}, {0x000CC0, 0x000CC0, 0x000CBF}, -{0x000CC7, 0x000CCB, 0x000CC6}, +{0x000CC7, 0x000CC8, 0x000CC6}, +{0x000CCA, 0x000CCB, 0x000CC6}, {0x000D4A, 0x000D4A, 0x000D46}, {0x000D4B, 0x000D4B, 0x000D47}, {0x000D4C, 0x000D4C, 0x000D46}, -{0x000DDA, 0x000DDE, 0x000DD9}, +{0x000DDA, 0x000DDA, 0x000DD9}, +{0x000DDC, 0x000DDE, 0x000DD9}, {0x000F43, 0x000F43, 0x000F42}, {0x000F4D, 0x000F4D, 0x000F4C}, {0x000F52, 0x000F52, 0x000F51}, {0x000F57, 0x000F57, 0x000F56}, {0x000F5C, 0x000F5C, 0x000F5B}, {0x000F69, 0x000F69, 0x000F40}, -{0x000F73, 0x000F75, 0x000F71}, +{0x000F73, 0x000F73, 0x000F71}, +{0x000F75, 0x000F75, 0x000F71}, {0x000F76, 0x000F76, 0x000FB2}, {0x000F78, 0x000F78, 0x000FB3}, {0x000F81, 0x000F81, 0x000F71}, @@ -5772,7 +5777,10 @@ const std::vector> unicode_ranges_nfd = {0x001F40, 0x001F45, 0x0003BF}, {0x001F48, 0x001F4D, 0x00039F}, {0x001F50, 0x001F57, 0x0003C5}, -{0x001F59, 0x001F5F, 0x0003A5}, +{0x001F59, 0x001F59, 0x0003A5}, +{0x001F5B, 0x001F5B, 0x0003A5}, +{0x001F5D, 0x001F5D, 0x0003A5}, +{0x001F5F, 0x001F5F, 0x0003A5}, {0x001F60, 0x001F67, 0x0003C9}, {0x001F68, 0x001F6F, 0x0003A9}, {0x001F70, 0x001F71, 0x0003B1}, @@ -5788,15 +5796,18 @@ const std::vector> unicode_ranges_nfd = {0x001F98, 0x001F9F, 0x000397}, {0x001FA0, 0x001FA7, 0x0003C9}, {0x001FA8, 0x001FAF, 0x0003A9}, -{0x001FB0, 0x001FB7, 0x0003B1}, +{0x001FB0, 0x001FB4, 0x0003B1}, +{0x001FB6, 0x001FB7, 0x0003B1}, {0x001FB8, 0x001FBC, 0x000391}, {0x001FBE, 0x001FBE, 0x0003B9}, {0x001FC1, 0x001FC1, 0x0000A8}, -{0x001FC2, 0x001FC7, 0x0003B7}, +{0x001FC2, 0x001FC4, 0x0003B7}, +{0x001FC6, 0x001FC7, 0x0003B7}, {0x001FC8, 0x001FC9, 0x000395}, {0x001FCA, 0x001FCC, 0x000397}, {0x001FCD, 0x001FCF, 0x001FBF}, -{0x001FD0, 0x001FD7, 0x0003B9}, +{0x001FD0, 0x001FD3, 0x0003B9}, +{0x001FD6, 0x001FD7, 0x0003B9}, {0x001FD8, 0x001FDB, 0x000399}, {0x001FDD, 0x001FDF, 0x001FFE}, {0x001FE0, 0x001FE3, 0x0003C5}, @@ -5806,7 +5817,8 @@ const std::vector> unicode_ranges_nfd = {0x001FEC, 0x001FEC, 0x0003A1}, {0x001FED, 0x001FEE, 0x0000A8}, {0x001FEF, 0x001FEF, 0x000060}, -{0x001FF2, 0x001FF7, 0x0003C9}, +{0x001FF2, 0x001FF4, 0x0003C9}, +{0x001FF6, 0x001FF7, 0x0003C9}, {0x001FF8, 0x001FF9, 0x00039F}, {0x001FFA, 0x001FFC, 0x0003A9}, {0x001FFD, 0x001FFD, 0x0000B4}, @@ -6422,7 +6434,8 @@ const std::vector> unicode_ranges_nfd = {0x01112E, 0x01112E, 0x011131}, {0x01112F, 0x01112F, 0x011132}, {0x01134B, 0x01134C, 0x011347}, -{0x0114BB, 0x0114BE, 0x0114B9}, +{0x0114BB, 0x0114BC, 0x0114B9}, +{0x0114BE, 0x0114BE, 0x0114B9}, {0x0115BA, 0x0115BA, 0x0115B8}, {0x0115BB, 0x0115BB, 0x0115B9}, {0x011938, 0x011938, 0x011935}, diff --git a/unicode-data.h b/unicode-data.h index 2afd06f23..ce26748d0 100644 --- a/unicode-data.h +++ b/unicode-data.h @@ -5,7 +5,7 @@ #include #include -static const size_t MAX_CODEPOINTS = 0x110000; +static const uint32_t MAX_CODEPOINTS = 0x110000; extern const std::vector> unicode_ranges_flags; extern const std::unordered_set unicode_set_whitespace;