/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8                                :vi│
╞══════════════════════════════════════════════════════════════════════════════╡
│ Copyright 2021 Justine Alexandra Roberts Tunney                              │
│                                                                              │
│ Permission to use, copy, modify, and/or distribute this software for         │
│ any purpose with or without fee is hereby granted, provided that the         │
│ above copyright notice and this permission notice appear in all copies.      │
│                                                                              │
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL                │
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED                │
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE             │
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL         │
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR        │
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER               │
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
│ PERFORMANCE OF THIS SOFTWARE.                                                │
╚─────────────────────────────────────────────────────────────────────────────*/
#include "libc/str/str.h"

static const unsigned short kCodes[][2] = {
    {0x00aa, 0x00aa}, /*     1x English */
    {0x00b2, 0x00b3}, /*     2x English Arabic */
    {0x00b5, 0x00b5}, /*     1x Greek */
    {0x00b9, 0x00ba}, /*     2x English Arabic */
    {0x00bc, 0x00be}, /*     3x Vulgar English Arabic */
    {0x00c0, 0x00d6}, /*    23x Watin */
    {0x00d8, 0x00f6}, /*    31x Watin */
    {0x0100, 0x02c1}, /*   450x Watin-AB,IPA,Spacemod */
    {0x02c6, 0x02d1}, /*    12x Spacemod */
    {0x02e0, 0x02e4}, /*     5x Spacemod */
    {0x02ec, 0x02ec}, /*     1x Spacemod */
    {0x02ee, 0x02ee}, /*     1x Spacemod */
    {0x0370, 0x0374}, /*     5x Greek */
    {0x0376, 0x0377}, /*     2x Greek */
    {0x037a, 0x037d}, /*     4x Greek */
    {0x037f, 0x037f}, /*     1x Greek */
    {0x0386, 0x0386}, /*     1x Greek */
    {0x0388, 0x038a}, /*     3x Greek */
    {0x038c, 0x038c}, /*     1x Greek */
    {0x038e, 0x03a1}, /*    20x Greek */
    {0x03a3, 0x03f5}, /*    83x Greek */
    {0x03f7, 0x0481}, /*   139x Greek */
    {0x048a, 0x052f}, /*   166x Cyrillic */
    {0x0531, 0x0556}, /*    38x Armenian */
    {0x0560, 0x0588}, /*    41x Armenian */
    {0x05d0, 0x05ea}, /*    27x Hebrew */
    {0x0620, 0x064a}, /*    43x Arabic */
    {0x0660, 0x0669}, /*    10x Arabic */
    {0x0671, 0x06d3}, /*    99x Arabic */
    {0x06ee, 0x06fc}, /*    15x Arabic */
    {0x0712, 0x072f}, /*    30x Syriac */
    {0x074d, 0x07a5}, /*    89x Syriac,Arabic2,Thaana */
    {0x07c0, 0x07ea}, /*    43x NKo */
    {0x0800, 0x0815}, /*    22x Samaritan */
    {0x0840, 0x0858}, /*    25x Mandaic */
    {0x0904, 0x0939}, /*    54x Devanagari */
    {0x0993, 0x09a8}, /*    22x Bengali */
    {0x09e6, 0x09f1}, /*    12x Bengali */
    {0x0a13, 0x0a28}, /*    22x Gurmukhi */
    {0x0a66, 0x0a6f}, /*    10x Gurmukhi */
    {0x0a93, 0x0aa8}, /*    22x Gujarati */
    {0x0b13, 0x0b28}, /*    22x Oriya */
    {0x0c92, 0x0ca8}, /*    23x Kannada */
    {0x0caa, 0x0cb3}, /*    10x Kannada */
    {0x0ce6, 0x0cef}, /*    10x Kannada */
    {0x0d12, 0x0d3a}, /*    41x Malayalam */
    {0x0d85, 0x0d96}, /*    18x Sinhala */
    {0x0d9a, 0x0db1}, /*    24x Sinhala */
    {0x0de6, 0x0def}, /*    10x Sinhala */
    {0x0e01, 0x0e30}, /*    48x Thai */
    {0x0e8c, 0x0ea3}, /*    24x Lao */
    {0x0f20, 0x0f33}, /*    20x Tibetan */
    {0x0f49, 0x0f6c}, /*    36x Tibetan */
    {0x109e, 0x10c5}, /*    40x Myanmar,Georgian */
    {0x10d0, 0x10fa}, /*    43x Georgian */
    {0x10fc, 0x1248}, /*   333x Georgian,Hangul,Ethiopic */
    {0x13a0, 0x13f5}, /*    86x Cherokee */
    {0x1401, 0x166d}, /*   621x Aboriginal */
    {0x16a0, 0x16ea}, /*    75x Runic */
    {0x1700, 0x170c}, /*    13x Tagalog */
    {0x1780, 0x17b3}, /*    52x Khmer */
    {0x1820, 0x1878}, /*    89x Mongolian */
    {0x1a00, 0x1a16}, /*    23x Buginese */
    {0x1a20, 0x1a54}, /*    53x Tai Tham */
    {0x1a80, 0x1a89}, /*    10x Tai Tham */
    {0x1a90, 0x1a99}, /*    10x Tai Tham */
    {0x1b05, 0x1b33}, /*    47x Balinese */
    {0x1b50, 0x1b59}, /*    10x Balinese */
    {0x1b83, 0x1ba0}, /*    30x Sundanese */
    {0x1bae, 0x1be5}, /*    56x Sundanese */
    {0x1c90, 0x1cba}, /*    43x Georgian2 */
    {0x1cbd, 0x1cbf}, /*     3x Georgian2 */
    {0x1e00, 0x1f15}, /*   278x Watin-C,Greek2 */
    {0x2070, 0x2071}, /*     2x Supersub */
    {0x2074, 0x2079}, /*     6x Supersub */
    {0x207f, 0x2089}, /*    11x Supersub */
    {0x2090, 0x209c}, /*    13x Supersub */
    {0x2100, 0x2117}, /*    24x Letterlike */
    {0x2119, 0x213f}, /*    39x Letterlike */
    {0x2145, 0x214a}, /*     6x Letterlike */
    {0x214c, 0x218b}, /*    64x Letterlike,Numbery */
    {0x21af, 0x21cd}, /*    31x Arrows */
    {0x21d5, 0x21f3}, /*    31x Arrows */
    {0x230c, 0x231f}, /*    20x Technical */
    {0x232b, 0x237b}, /*    81x Technical */
    {0x237d, 0x239a}, /*    30x Technical */
    {0x23b4, 0x23db}, /*    40x Technical */
    {0x23e2, 0x2426}, /*    69x Technical,ControlPictures */
    {0x2460, 0x25b6}, /*   343x Enclosed,Boxes,Blocks,Shapes */
    {0x25c2, 0x25f7}, /*    54x Shapes */
    {0x2600, 0x266e}, /*   111x Symbols */
    {0x2670, 0x2767}, /*   248x Symbols,Dingbats */
    {0x2776, 0x27bf}, /*    74x Dingbats */
    {0x2800, 0x28ff}, /*   256x Braille */
    {0x2c00, 0x2c2e}, /*    47x Glagolitic */
    {0x2c30, 0x2c5e}, /*    47x Glagolitic */
    {0x2c60, 0x2ce4}, /*   133x Watin-D */
    {0x2d00, 0x2d25}, /*    38x Georgian2 */
    {0x2d30, 0x2d67}, /*    56x Tifinagh */
    {0x2d80, 0x2d96}, /*    23x Ethiopic2 */
    {0x2e2f, 0x2e2f}, /*     1x Punctuation2 */
    {0x3005, 0x3007}, /*     3x CJK Symbols & Punctuation */
    {0x3021, 0x3029}, /*     9x CJK Symbols & Punctuation */
    {0x3031, 0x3035}, /*     5x CJK Symbols & Punctuation */
    {0x3038, 0x303c}, /*     5x CJK Symbols & Punctuation */
    {0x3041, 0x3096}, /*    86x Hiragana */
    {0x30a1, 0x30fa}, /*    90x Katakana */
    {0x3105, 0x312f}, /*    43x Bopomofo */
    {0x3131, 0x318e}, /*    94x Hangul Compatibility Jamo */
    {0x31a0, 0x31ba}, /*    27x Bopomofo Extended */
    {0x31f0, 0x31ff}, /*    16x Katakana Phonetic Extensions */
    {0x3220, 0x3229}, /*    10x Enclosed CJK Letters & Months */
    {0x3248, 0x324f}, /*     8x Enclosed CJK Letters & Months */
    {0x3251, 0x325f}, /*    15x Enclosed CJK Letters & Months */
    {0x3280, 0x3289}, /*    10x Enclosed CJK Letters & Months */
    {0x32b1, 0x32bf}, /*    15x Enclosed CJK Letters & Months */
    {0x3400, 0x4db5}, /*  6582x CJK Unified Ideographs Extension A */
    {0x4dc0, 0x9fef}, /* 21040x Yijing Hexagram, CJK Unified Ideographs */
    {0xa000, 0xa48c}, /*  1165x Yi Syllables */
    {0xa4d0, 0xa4fd}, /*    46x Lisu */
    {0xa500, 0xa60c}, /*   269x Vai */
    {0xa610, 0xa62b}, /*    28x Vai */
    {0xa6a0, 0xa6ef}, /*    80x Bamum */
    {0xa80c, 0xa822}, /*    23x Syloti Nagri */
    {0xa840, 0xa873}, /*    52x Phags-pa */
    {0xa882, 0xa8b3}, /*    50x Saurashtra */
    {0xa8d0, 0xa8d9}, /*    10x Saurashtra */
    {0xa900, 0xa925}, /*    38x Kayah Li */
    {0xa930, 0xa946}, /*    23x Rejang */
    {0xa960, 0xa97c}, /*    29x Hangul Jamo Extended-A */
    {0xa984, 0xa9b2}, /*    47x Javanese */
    {0xa9cf, 0xa9d9}, /*    11x Javanese */
    {0xaa00, 0xaa28}, /*    41x Cham */
    {0xaa50, 0xaa59}, /*    10x Cham */
    {0xabf0, 0xabf9}, /*    10x Meetei Mayek */
    {0xac00, 0xd7a3}, /* 11172x Hangul Syllables */
    {0xf900, 0xfa6d}, /*   366x CJK Compatibility Ideographs */
    {0xfa70, 0xfad9}, /*   106x CJK Compatibility Ideographs */
    {0xfb1f, 0xfb28}, /*    10x Alphabetic Presentation Forms */
    {0xfb2a, 0xfb36}, /*    13x Alphabetic Presentation Forms */
    {0xfb46, 0xfbb1}, /*   108x Alphabetic Presentation Forms */
    {0xfbd3, 0xfd3d}, /*   363x Arabic Presentation Forms-A */
    {0xfe76, 0xfefc}, /*   135x Arabic Presentation Forms-B */
    {0xff10, 0xff19}, /*    10x Dubs */
    {0xff21, 0xff3a}, /*    26x Dubs */
    {0xff41, 0xff5a}, /*    26x Dubs */
    {0xff66, 0xffbe}, /*    89x Dubs */
    {0xffc2, 0xffc7}, /*     6x Dubs */
    {0xffca, 0xffcf}, /*     6x Dubs */
    {0xffd2, 0xffd7}, /*     6x Dubs */
    {0xffda, 0xffdc}, /*     3x Dubs */
};

static const unsigned kAstralCodes[][2] = {
    {0x10107, 0x10133}, /*    45x Aegean */
    {0x10140, 0x10178}, /*    57x Ancient Greek Numbers */
    {0x1018a, 0x1018b}, /*     2x Ancient Greek Numbers */
    {0x10280, 0x1029c}, /*    29x Lycian */
    {0x102a0, 0x102d0}, /*    49x Carian */
    {0x102e1, 0x102fb}, /*    27x Coptic Epact Numbers */
    {0x10300, 0x10323}, /*    36x Old Italic */
    {0x1032d, 0x1034a}, /*    30x Old Italic, Gothic */
    {0x10350, 0x10375}, /*    38x Old Permic */
    {0x10380, 0x1039d}, /*    30x Ugaritic */
    {0x103a0, 0x103c3}, /*    36x Old Persian */
    {0x103c8, 0x103cf}, /*     8x Old Persian */
    {0x103d1, 0x103d5}, /*     5x Old Persian */
    {0x10400, 0x1049d}, /*    158x Deseret, Shavian, Osmanya */
    {0x104b0, 0x104d3}, /*    36x Osage */
    {0x104d8, 0x104fb}, /*    36x Osage */
    {0x10500, 0x10527}, /*    40x Elbasan */
    {0x10530, 0x10563}, /*    52x Caucasian Albanian */
    {0x10600, 0x10736}, /*   311x Linear A */
    {0x10800, 0x10805}, /*     6x Cypriot Syllabary */
    {0x1080a, 0x10835}, /*    44x Cypriot Syllabary */
    {0x10837, 0x10838}, /*     2x Cypriot Syllabary */
    {0x1083f, 0x1089e}, /*    86x Cypriot,ImperialAramaic,Palmyrene,Nabataean */
    {0x108e0, 0x108f2}, /*    19x Hatran */
    {0x108f4, 0x108f5}, /*     2x Hatran */
    {0x108fb, 0x1091b}, /*    33x Hatran */
    {0x10920, 0x10939}, /*    26x Lydian */
    {0x10980, 0x109b7}, /*    56x Meroitic Hieromarks */
    {0x109bc, 0x109cf}, /*    20x Meroitic Cursive */
    {0x109d2, 0x10a00}, /*    47x Meroitic Cursive */
    {0x10a10, 0x10a13}, /*     4x Kharoshthi */
    {0x10a15, 0x10a17}, /*     3x Kharoshthi */
    {0x10a19, 0x10a35}, /*    29x Kharoshthi */
    {0x10a40, 0x10a48}, /*     9x Kharoshthi */
    {0x10a60, 0x10a7e}, /*    31x Old South Arabian */
    {0x10a80, 0x10a9f}, /*    32x Old North Arabian */
    {0x10ac0, 0x10ac7}, /*     8x Manichaean */
    {0x10ac9, 0x10ae4}, /*    28x Manichaean */
    {0x10aeb, 0x10aef}, /*     5x Manichaean */
    {0x10b00, 0x10b35}, /*    54x Avestan */
    {0x10b40, 0x10b55}, /*    22x Inscriptional Parthian */
    {0x10b58, 0x10b72}, /*    27x Inscriptional Parthian and Pahlavi */
    {0x10b78, 0x10b91}, /*    26x Inscriptional Pahlavi, Psalter Pahlavi */
    {0x10c00, 0x10c48}, /*    73x Old Turkic */
    {0x10c80, 0x10cb2}, /*    51x Old Hungarian */
    {0x10cc0, 0x10cf2}, /*    51x Old Hungarian */
    {0x10cfa, 0x10d23}, /*    42x Old Hungarian, Hanifi Rohingya */
    {0x10d30, 0x10d39}, /*    10x Hanifi Rohingya */
    {0x10e60, 0x10e7e}, /*    31x Rumi Numeral Symbols */
    {0x10f00, 0x10f27}, /*    40x Old Sogdian */
    {0x10f30, 0x10f45}, /*    22x Sogdian */
    {0x10f51, 0x10f54}, /*     4x Sogdian */
    {0x10fe0, 0x10ff6}, /*    23x Elymaic */
    {0x11003, 0x11037}, /*    53x Brahmi */
    {0x11052, 0x1106f}, /*    30x Brahmi */
    {0x11083, 0x110af}, /*    45x Kaithi */
    {0x110d0, 0x110e8}, /*    25x Sora Sompeng */
    {0x110f0, 0x110f9}, /*    10x Sora Sompeng */
    {0x11103, 0x11126}, /*    36x Chakma */
    {0x11136, 0x1113f}, /*    10x Chakma */
    {0x11144, 0x11144}, /*     1x Chakma */
    {0x11150, 0x11172}, /*    35x Mahajani */
    {0x11176, 0x11176}, /*     1x Mahajani */
    {0x11183, 0x111b2}, /*    48x Sharada */
    {0x111c1, 0x111c4}, /*     4x Sharada */
    {0x111d0, 0x111da}, /*    11x Sharada */
    {0x111dc, 0x111dc}, /*     1x Sharada */
    {0x111e1, 0x111f4}, /*    20x Sinhala Archaic Numbers */
    {0x11200, 0x11211}, /*    18x Khojki */
    {0x11213, 0x1122b}, /*    25x Khojki */
    {0x11280, 0x11286}, /*     7x Multani */
    {0x11288, 0x11288}, /*     1x Multani */
    {0x1128a, 0x1128d}, /*     4x Multani */
    {0x1128f, 0x1129d}, /*    15x Multani */
    {0x1129f, 0x112a8}, /*    10x Multani */
    {0x112b0, 0x112de}, /*    47x Khudawadi */
    {0x112f0, 0x112f9}, /*    10x Khudawadi */
    {0x11305, 0x1130c}, /*     8x Grantha */
    {0x1130f, 0x11310}, /*     2x Grantha */
    {0x11313, 0x11328}, /*    22x Grantha */
    {0x1132a, 0x11330}, /*     7x Grantha */
    {0x11332, 0x11333}, /*     2x Grantha */
    {0x11335, 0x11339}, /*     5x Grantha */
    {0x1133d, 0x1133d}, /*     1x Grantha */
    {0x11350, 0x11350}, /*     1x Grantha */
    {0x1135d, 0x11361}, /*     5x Grantha */
    {0x11400, 0x11434}, /*    53x Newa */
    {0x11447, 0x1144a}, /*     4x Newa */
    {0x11450, 0x11459}, /*    10x Newa */
    {0x1145f, 0x1145f}, /*     1x Newa */
    {0x11480, 0x114af}, /*    48x Tirhuta */
    {0x114c4, 0x114c5}, /*     2x Tirhuta */
    {0x114c7, 0x114c7}, /*     1x Tirhuta */
    {0x114d0, 0x114d9}, /*    10x Tirhuta */
    {0x11580, 0x115ae}, /*    47x Siddham */
    {0x115d8, 0x115db}, /*     4x Siddham */
    {0x11600, 0x1162f}, /*    48x Modi */
    {0x11644, 0x11644}, /*     1x Modi */
    {0x11650, 0x11659}, /*    10x Modi */
    {0x11680, 0x116aa}, /*    43x Takri */
    {0x116b8, 0x116b8}, /*     1x Takri */
    {0x116c0, 0x116c9}, /*    10x Takri */
    {0x11700, 0x1171a}, /*    27x Ahom */
    {0x11730, 0x1173b}, /*    12x Ahom */
    {0x11800, 0x1182b}, /*    44x Dogra */
    {0x118a0, 0x118f2}, /*    83x Warang Citi */
    {0x118ff, 0x118ff}, /*     1x Warang Citi */
    {0x119a0, 0x119a7}, /*     8x Nandinagari */
    {0x119aa, 0x119d0}, /*    39x Nandinagari */
    {0x119e1, 0x119e1}, /*     1x Nandinagari */
    {0x119e3, 0x119e3}, /*     1x Nandinagari */
    {0x11a00, 0x11a00}, /*     1x Zanabazar Square */
    {0x11a0b, 0x11a32}, /*    40x Zanabazar Square */
    {0x11a3a, 0x11a3a}, /*     1x Zanabazar Square */
    {0x11a50, 0x11a50}, /*     1x Soyombo */
    {0x11a5c, 0x11a89}, /*    46x Soyombo */
    {0x11a9d, 0x11a9d}, /*     1x Soyombo */
    {0x11ac0, 0x11af8}, /*    57x Pau Cin Hau */
    {0x11c00, 0x11c08}, /*     9x Bhaiksuki */
    {0x11c0a, 0x11c2e}, /*    37x Bhaiksuki */
    {0x11c40, 0x11c40}, /*     1x Bhaiksuki */
    {0x11c50, 0x11c6c}, /*    29x Bhaiksuki */
    {0x11c72, 0x11c8f}, /*    30x Marchen */
    {0x11d00, 0x11d06}, /*     7x Masaram Gondi */
    {0x11d08, 0x11d09}, /*     2x Masaram Gondi */
    {0x11d0b, 0x11d30}, /*    38x Masaram Gondi */
    {0x11d46, 0x11d46}, /*     1x Masaram Gondi */
    {0x11d50, 0x11d59}, /*    10x Masaram Gondi */
    {0x11d60, 0x11d65}, /*     6x Gunjala Gondi */
    {0x11d67, 0x11d68}, /*     2x Gunjala Gondi */
    {0x11d6a, 0x11d89}, /*    32x Gunjala Gondi */
    {0x11d98, 0x11d98}, /*     1x Gunjala Gondi */
    {0x11da0, 0x11da9}, /*    10x Gunjala Gondi */
    {0x11ee0, 0x11ef2}, /*    19x Makasar */
    {0x11fc0, 0x11fd4}, /*    21x Tamil Supplement */
    {0x12000, 0x12399}, /*   922x Cuneiform */
    {0x12400, 0x1246e}, /*   111x Cuneiform Numbers & Punctuation */
    {0x12480, 0x12543}, /*   196x Early Dynastic Cuneiform */
    {0x13000, 0x1342e}, /*  1071x Egyptian Hieromarks */
    {0x14400, 0x14646}, /*   583x Anatolian Hieromarks */
    {0x16800, 0x16a38}, /*   569x Bamum Supplement */
    {0x16a40, 0x16a5e}, /*    31x Mro */
    {0x16a60, 0x16a69}, /*    10x Mro */
    {0x16ad0, 0x16aed}, /*    30x Bassa Vah */
    {0x16b00, 0x16b2f}, /*    48x Pahawh Hmong */
    {0x16b40, 0x16b43}, /*     4x Pahawh Hmong */
    {0x16b50, 0x16b59}, /*    10x Pahawh Hmong */
    {0x16b5b, 0x16b61}, /*     7x Pahawh Hmong */
    {0x16b63, 0x16b77}, /*    21x Pahawh Hmong */
    {0x16b7d, 0x16b8f}, /*    19x Pahawh Hmong */
    {0x16e40, 0x16e96}, /*    87x Medefaidrin */
    {0x16f00, 0x16f4a}, /*    75x Miao */
    {0x16f50, 0x16f50}, /*     1x Miao */
    {0x16f93, 0x16f9f}, /*    13x Miao */
    {0x16fe0, 0x16fe1}, /*     2x Ideographic Symbols & Punctuation */
    {0x16fe3, 0x16fe3}, /*     1x Ideographic Symbols & Punctuation */
    {0x17000, 0x187f7}, /*  6136x Tangut */
    {0x18800, 0x18af2}, /*   755x Tangut Components */
    {0x1b000, 0x1b11e}, /*   287x Kana Supplement */
    {0x1b150, 0x1b152}, /*     3x Small Kana Extension */
    {0x1b164, 0x1b167}, /*     4x Small Kana Extension */
    {0x1b170, 0x1b2fb}, /*   396x Nushu */
    {0x1bc00, 0x1bc6a}, /*   107x Duployan */
    {0x1bc70, 0x1bc7c}, /*    13x Duployan */
    {0x1bc80, 0x1bc88}, /*     9x Duployan */
    {0x1bc90, 0x1bc99}, /*    10x Duployan */
    {0x1d2e0, 0x1d2f3}, /*    20x Mayan Numerals */
    {0x1d360, 0x1d378}, /*    25x Counting Rod Numerals */
    {0x1d400, 0x1d454}, /*    85x 𝐀..𝑔 Math */
    {0x1d456, 0x1d49c}, /*    71x 𝑖..𝒜 Math */
    {0x1d49e, 0x1d49f}, /*     2x 𝒞..𝒟 Math */
    {0x1d4a2, 0x1d4a2}, /*     1x 𝒢..𝒢 Math */
    {0x1d4a5, 0x1d4a6}, /*     2x 𝒥..𝒦 Math */
    {0x1d4a9, 0x1d4ac}, /*     4x 𝒩..𝒬 Math */
    {0x1d4ae, 0x1d4b9}, /*    12x 𝒮..𝒹 Math */
    {0x1d4bb, 0x1d4bb}, /*     1x 𝒻..𝒻 Math */
    {0x1d4bd, 0x1d4c3}, /*     7x 𝒽..𝓃 Math */
    {0x1d4c5, 0x1d505}, /*    65x 𝓅..𝔅 Math */
    {0x1d507, 0x1d50a}, /*     4x 𝔇..𝔊 Math */
    {0x1d50d, 0x1d514}, /*     8x 𝔍..𝔔 Math */
    {0x1d516, 0x1d51c}, /*     7x 𝔖..𝔜 Math */
    {0x1d51e, 0x1d539}, /*    28x 𝔞..𝔹 Math */
    {0x1d53b, 0x1d53e}, /*     4x 𝔻..𝔾 Math */
    {0x1d540, 0x1d544}, /*     5x 𝕀..𝕄 Math */
    {0x1d546, 0x1d546}, /*     1x 𝕆..𝕆 Math */
    {0x1d54a, 0x1d550}, /*     7x 𝕊..𝕐 Math */
    {0x1d552, 0x1d6a5}, /*   340x 𝕒..𝚥 Math */
    {0x1d6a8, 0x1d6c0}, /*    25x 𝚨..𝛀 Math */
    {0x1d6c2, 0x1d6da}, /*    25x 𝛂..𝛚 Math */
    {0x1d6dc, 0x1d6fa}, /*    31x 𝛜..𝛺 Math */
    {0x1d6fc, 0x1d714}, /*    25x 𝛼..𝜔 Math */
    {0x1d716, 0x1d734}, /*    31x 𝜖..𝜴 Math */
    {0x1d736, 0x1d74e}, /*    25x 𝜶..𝝎 Math */
    {0x1d750, 0x1d76e}, /*    31x 𝝐..𝝮 Math */
    {0x1d770, 0x1d788}, /*    25x 𝝰..𝞈 Math */
    {0x1d78a, 0x1d7a8}, /*    31x 𝞊..𝞨 Math */
    {0x1d7aa, 0x1d7c2}, /*    25x 𝞪..𝟂 Math */
    {0x1d7c4, 0x1d7cb}, /*     8x 𝟄..𝟋 Math */
    {0x1d7ce, 0x1d9ff}, /*   562x Math, Sutton SignWriting */
    {0x1f100, 0x1f10c}, /*    13x Enclosed Alphanumeric Supplement */
    {0x20000, 0x2a6d6}, /* 42711x CJK Unified Ideographs Extension B */
    {0x2a700, 0x2b734}, /*  4149x CJK Unified Ideographs Extension C */
    {0x2b740, 0x2b81d}, /*   222x CJK Unified Ideographs Extension D */
    {0x2b820, 0x2cea1}, /*  5762x CJK Unified Ideographs Extension E */
    {0x2ceb0, 0x2ebe0}, /*  7473x CJK Unified Ideographs Extension F */
    {0x2f800, 0x2fa1d}, /*   542x CJK Compatibility Ideographs Supplement */
};

/**
 * Returns nonzero if 𝑐 isn't alphanumeric.
 *
 * Line reading interfaces generally define this operation as UNICODE
 * characters that aren't in the letter category (Lu, Ll, Lt, Lm, Lo)
 * and aren't in the number categorie (Nd, Nl, No). We also add a few
 * other things like blocks and emoji (So).
 */
int iswseparator(wint_t c) {
  int m, l, r, n;
  if (c < 0200) {
    return !(('0' <= c && c <= '9') || ('A' <= c && c <= 'Z') ||
             ('a' <= c && c <= 'z'));
  }
  if (c <= 0xffff) {
    l = 0;
    r = n = sizeof(kCodes) / sizeof(kCodes[0]);
    while (l < r) {
      m = (l & r) + ((l ^ r) >> 1);  // floor((a+b)/2)
      if (kCodes[m][1] < c) {
        l = m + 1;
      } else {
        r = m;
      }
    }
    return !(l < n && kCodes[l][0] <= c && c <= kCodes[l][1]);
  } else {
    l = 0;
    r = n = sizeof(kAstralCodes) / sizeof(kAstralCodes[0]);
    while (l < r) {
      m = (l & r) + ((l ^ r) >> 1);  // floor((a+b)/2)
      if (kAstralCodes[m][1] < c) {
        l = m + 1;
      } else {
        r = m;
      }
    }
    return !(l < n && kAstralCodes[l][0] <= c && c <= kAstralCodes[l][1]);
  }
}