mirror of
				https://github.com/jart/cosmopolitan.git
				synced 2025-10-26 19:16:41 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			423 lines
		
	
	
	
		
			20 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			423 lines
		
	
	
	
		
			20 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
 | ||
| │ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
 | ||
| ╞══════════════════════════════════════════════════════════════════════════════╡
 | ||
| │ Copyright 2021 Justine Alexandra Roberts Tunney                              │
 | ||
| │                                                                              │
 | ||
| │ Permission to use, copy, modify, and/or distribute this software for         │
 | ||
| │ any purpose with or without fee is hereby granted, provided that the         │
 | ||
| │ above copyright notice and this permission notice appear in all copies.      │
 | ||
| │                                                                              │
 | ||
| │ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL                │
 | ||
| │ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED                │
 | ||
| │ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE             │
 | ||
| │ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL         │
 | ||
| │ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR        │
 | ||
| │ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER               │
 | ||
| │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
 | ||
| │ PERFORMANCE OF THIS SOFTWARE.                                                │
 | ||
| ╚─────────────────────────────────────────────────────────────────────────────*/
 | ||
| #include "libc/wctype.h"
 | ||
| 
 | ||
| static const unsigned short kCodes[][2] = {
 | ||
|     {0x00aa, 0x00aa}, /*     1x English */
 | ||
|     {0x00b2, 0x00b3}, /*     2x English Arabic */
 | ||
|     {0x00b5, 0x00b5}, /*     1x Greek */
 | ||
|     {0x00b9, 0x00ba}, /*     2x English Arabic */
 | ||
|     {0x00bc, 0x00be}, /*     3x Vulgar English Arabic */
 | ||
|     {0x00c0, 0x00d6}, /*    23x Watin */
 | ||
|     {0x00d8, 0x00f6}, /*    31x Watin */
 | ||
|     {0x0100, 0x02c1}, /*   450x Watin-AB,IPA,Spacemod */
 | ||
|     {0x02c6, 0x02d1}, /*    12x Spacemod */
 | ||
|     {0x02e0, 0x02e4}, /*     5x Spacemod */
 | ||
|     {0x02ec, 0x02ec}, /*     1x Spacemod */
 | ||
|     {0x02ee, 0x02ee}, /*     1x Spacemod */
 | ||
|     {0x0370, 0x0374}, /*     5x Greek */
 | ||
|     {0x0376, 0x0377}, /*     2x Greek */
 | ||
|     {0x037a, 0x037d}, /*     4x Greek */
 | ||
|     {0x037f, 0x037f}, /*     1x Greek */
 | ||
|     {0x0386, 0x0386}, /*     1x Greek */
 | ||
|     {0x0388, 0x038a}, /*     3x Greek */
 | ||
|     {0x038c, 0x038c}, /*     1x Greek */
 | ||
|     {0x038e, 0x03a1}, /*    20x Greek */
 | ||
|     {0x03a3, 0x03f5}, /*    83x Greek */
 | ||
|     {0x03f7, 0x0481}, /*   139x Greek */
 | ||
|     {0x048a, 0x052f}, /*   166x Cyrillic */
 | ||
|     {0x0531, 0x0556}, /*    38x Armenian */
 | ||
|     {0x0560, 0x0588}, /*    41x Armenian */
 | ||
|     {0x05d0, 0x05ea}, /*    27x Hebrew */
 | ||
|     {0x0620, 0x064a}, /*    43x Arabic */
 | ||
|     {0x0660, 0x0669}, /*    10x Arabic */
 | ||
|     {0x0671, 0x06d3}, /*    99x Arabic */
 | ||
|     {0x06ee, 0x06fc}, /*    15x Arabic */
 | ||
|     {0x0712, 0x072f}, /*    30x Syriac */
 | ||
|     {0x074d, 0x07a5}, /*    89x Syriac,Arabic2,Thaana */
 | ||
|     {0x07c0, 0x07ea}, /*    43x NKo */
 | ||
|     {0x0800, 0x0815}, /*    22x Samaritan */
 | ||
|     {0x0840, 0x0858}, /*    25x Mandaic */
 | ||
|     {0x0904, 0x0939}, /*    54x Devanagari */
 | ||
|     {0x0993, 0x09a8}, /*    22x Bengali */
 | ||
|     {0x09e6, 0x09f1}, /*    12x Bengali */
 | ||
|     {0x0a13, 0x0a28}, /*    22x Gurmukhi */
 | ||
|     {0x0a66, 0x0a6f}, /*    10x Gurmukhi */
 | ||
|     {0x0a93, 0x0aa8}, /*    22x Gujarati */
 | ||
|     {0x0b13, 0x0b28}, /*    22x Oriya */
 | ||
|     {0x0c92, 0x0ca8}, /*    23x Kannada */
 | ||
|     {0x0caa, 0x0cb3}, /*    10x Kannada */
 | ||
|     {0x0ce6, 0x0cef}, /*    10x Kannada */
 | ||
|     {0x0d12, 0x0d3a}, /*    41x Malayalam */
 | ||
|     {0x0d85, 0x0d96}, /*    18x Sinhala */
 | ||
|     {0x0d9a, 0x0db1}, /*    24x Sinhala */
 | ||
|     {0x0de6, 0x0def}, /*    10x Sinhala */
 | ||
|     {0x0e01, 0x0e30}, /*    48x Thai */
 | ||
|     {0x0e8c, 0x0ea3}, /*    24x Lao */
 | ||
|     {0x0f20, 0x0f33}, /*    20x Tibetan */
 | ||
|     {0x0f49, 0x0f6c}, /*    36x Tibetan */
 | ||
|     {0x109e, 0x10c5}, /*    40x Myanmar,Georgian */
 | ||
|     {0x10d0, 0x10fa}, /*    43x Georgian */
 | ||
|     {0x10fc, 0x1248}, /*   333x Georgian,Hangul,Ethiopic */
 | ||
|     {0x13a0, 0x13f5}, /*    86x Cherokee */
 | ||
|     {0x1401, 0x166d}, /*   621x Aboriginal */
 | ||
|     {0x16a0, 0x16ea}, /*    75x Runic */
 | ||
|     {0x1700, 0x170c}, /*    13x Tagalog */
 | ||
|     {0x1780, 0x17b3}, /*    52x Khmer */
 | ||
|     {0x1820, 0x1878}, /*    89x Mongolian */
 | ||
|     {0x1a00, 0x1a16}, /*    23x Buginese */
 | ||
|     {0x1a20, 0x1a54}, /*    53x Tai Tham */
 | ||
|     {0x1a80, 0x1a89}, /*    10x Tai Tham */
 | ||
|     {0x1a90, 0x1a99}, /*    10x Tai Tham */
 | ||
|     {0x1b05, 0x1b33}, /*    47x Balinese */
 | ||
|     {0x1b50, 0x1b59}, /*    10x Balinese */
 | ||
|     {0x1b83, 0x1ba0}, /*    30x Sundanese */
 | ||
|     {0x1bae, 0x1be5}, /*    56x Sundanese */
 | ||
|     {0x1c90, 0x1cba}, /*    43x Georgian2 */
 | ||
|     {0x1cbd, 0x1cbf}, /*     3x Georgian2 */
 | ||
|     {0x1e00, 0x1f15}, /*   278x Watin-C,Greek2 */
 | ||
|     {0x2070, 0x2071}, /*     2x Supersub */
 | ||
|     {0x2074, 0x2079}, /*     6x Supersub */
 | ||
|     {0x207f, 0x2089}, /*    11x Supersub */
 | ||
|     {0x2090, 0x209c}, /*    13x Supersub */
 | ||
|     {0x2100, 0x2117}, /*    24x Letterlike */
 | ||
|     {0x2119, 0x213f}, /*    39x Letterlike */
 | ||
|     {0x2145, 0x214a}, /*     6x Letterlike */
 | ||
|     {0x214c, 0x218b}, /*    64x Letterlike,Numbery */
 | ||
|     {0x21af, 0x21cd}, /*    31x Arrows */
 | ||
|     {0x21d5, 0x21f3}, /*    31x Arrows */
 | ||
|     {0x230c, 0x231f}, /*    20x Technical */
 | ||
|     {0x232b, 0x237b}, /*    81x Technical */
 | ||
|     {0x237d, 0x239a}, /*    30x Technical */
 | ||
|     {0x23b4, 0x23db}, /*    40x Technical */
 | ||
|     {0x23e2, 0x2426}, /*    69x Technical,ControlPictures */
 | ||
|     {0x2460, 0x25b6}, /*   343x Enclosed,Boxes,Blocks,Shapes */
 | ||
|     {0x25c2, 0x25f7}, /*    54x Shapes */
 | ||
|     {0x2600, 0x266e}, /*   111x Symbols */
 | ||
|     {0x2670, 0x2767}, /*   248x Symbols,Dingbats */
 | ||
|     {0x2776, 0x27bf}, /*    74x Dingbats */
 | ||
|     {0x2800, 0x28ff}, /*   256x Braille */
 | ||
|     {0x2c00, 0x2c2e}, /*    47x Glagolitic */
 | ||
|     {0x2c30, 0x2c5e}, /*    47x Glagolitic */
 | ||
|     {0x2c60, 0x2ce4}, /*   133x Watin-D */
 | ||
|     {0x2d00, 0x2d25}, /*    38x Georgian2 */
 | ||
|     {0x2d30, 0x2d67}, /*    56x Tifinagh */
 | ||
|     {0x2d80, 0x2d96}, /*    23x Ethiopic2 */
 | ||
|     {0x2e2f, 0x2e2f}, /*     1x Punctuation2 */
 | ||
|     {0x3005, 0x3007}, /*     3x CJK Symbols & Punctuation */
 | ||
|     {0x3021, 0x3029}, /*     9x CJK Symbols & Punctuation */
 | ||
|     {0x3031, 0x3035}, /*     5x CJK Symbols & Punctuation */
 | ||
|     {0x3038, 0x303c}, /*     5x CJK Symbols & Punctuation */
 | ||
|     {0x3041, 0x3096}, /*    86x Hiragana */
 | ||
|     {0x30a1, 0x30fa}, /*    90x Katakana */
 | ||
|     {0x3105, 0x312f}, /*    43x Bopomofo */
 | ||
|     {0x3131, 0x318e}, /*    94x Hangul Compatibility Jamo */
 | ||
|     {0x31a0, 0x31ba}, /*    27x Bopomofo Extended */
 | ||
|     {0x31f0, 0x31ff}, /*    16x Katakana Phonetic Extensions */
 | ||
|     {0x3220, 0x3229}, /*    10x Enclosed CJK Letters & Months */
 | ||
|     {0x3248, 0x324f}, /*     8x Enclosed CJK Letters & Months */
 | ||
|     {0x3251, 0x325f}, /*    15x Enclosed CJK Letters & Months */
 | ||
|     {0x3280, 0x3289}, /*    10x Enclosed CJK Letters & Months */
 | ||
|     {0x32b1, 0x32bf}, /*    15x Enclosed CJK Letters & Months */
 | ||
|     {0x3400, 0x4db5}, /*  6582x CJK Unified Ideographs Extension A */
 | ||
|     {0x4dc0, 0x9fef}, /* 21040x Yijing Hexagram, CJK Unified Ideographs */
 | ||
|     {0xa000, 0xa48c}, /*  1165x Yi Syllables */
 | ||
|     {0xa4d0, 0xa4fd}, /*    46x Lisu */
 | ||
|     {0xa500, 0xa60c}, /*   269x Vai */
 | ||
|     {0xa610, 0xa62b}, /*    28x Vai */
 | ||
|     {0xa6a0, 0xa6ef}, /*    80x Bamum */
 | ||
|     {0xa80c, 0xa822}, /*    23x Syloti Nagri */
 | ||
|     {0xa840, 0xa873}, /*    52x Phags-pa */
 | ||
|     {0xa882, 0xa8b3}, /*    50x Saurashtra */
 | ||
|     {0xa8d0, 0xa8d9}, /*    10x Saurashtra */
 | ||
|     {0xa900, 0xa925}, /*    38x Kayah Li */
 | ||
|     {0xa930, 0xa946}, /*    23x Rejang */
 | ||
|     {0xa960, 0xa97c}, /*    29x Hangul Jamo Extended-A */
 | ||
|     {0xa984, 0xa9b2}, /*    47x Javanese */
 | ||
|     {0xa9cf, 0xa9d9}, /*    11x Javanese */
 | ||
|     {0xaa00, 0xaa28}, /*    41x Cham */
 | ||
|     {0xaa50, 0xaa59}, /*    10x Cham */
 | ||
|     {0xabf0, 0xabf9}, /*    10x Meetei Mayek */
 | ||
|     {0xac00, 0xd7a3}, /* 11172x Hangul Syllables */
 | ||
|     {0xf900, 0xfa6d}, /*   366x CJK Compatibility Ideographs */
 | ||
|     {0xfa70, 0xfad9}, /*   106x CJK Compatibility Ideographs */
 | ||
|     {0xfb1f, 0xfb28}, /*    10x Alphabetic Presentation Forms */
 | ||
|     {0xfb2a, 0xfb36}, /*    13x Alphabetic Presentation Forms */
 | ||
|     {0xfb46, 0xfbb1}, /*   108x Alphabetic Presentation Forms */
 | ||
|     {0xfbd3, 0xfd3d}, /*   363x Arabic Presentation Forms-A */
 | ||
|     {0xfe76, 0xfefc}, /*   135x Arabic Presentation Forms-B */
 | ||
|     {0xff10, 0xff19}, /*    10x Dubs */
 | ||
|     {0xff21, 0xff3a}, /*    26x Dubs */
 | ||
|     {0xff41, 0xff5a}, /*    26x Dubs */
 | ||
|     {0xff66, 0xffbe}, /*    89x Dubs */
 | ||
|     {0xffc2, 0xffc7}, /*     6x Dubs */
 | ||
|     {0xffca, 0xffcf}, /*     6x Dubs */
 | ||
|     {0xffd2, 0xffd7}, /*     6x Dubs */
 | ||
|     {0xffda, 0xffdc}, /*     3x Dubs */
 | ||
| };
 | ||
| 
 | ||
| static const unsigned kAstralCodes[][2] = {
 | ||
|     {0x10107, 0x10133}, /*    45x Aegean */
 | ||
|     {0x10140, 0x10178}, /*    57x Ancient Greek Numbers */
 | ||
|     {0x1018a, 0x1018b}, /*     2x Ancient Greek Numbers */
 | ||
|     {0x10280, 0x1029c}, /*    29x Lycian */
 | ||
|     {0x102a0, 0x102d0}, /*    49x Carian */
 | ||
|     {0x102e1, 0x102fb}, /*    27x Coptic Epact Numbers */
 | ||
|     {0x10300, 0x10323}, /*    36x Old Italic */
 | ||
|     {0x1032d, 0x1034a}, /*    30x Old Italic, Gothic */
 | ||
|     {0x10350, 0x10375}, /*    38x Old Permic */
 | ||
|     {0x10380, 0x1039d}, /*    30x Ugaritic */
 | ||
|     {0x103a0, 0x103c3}, /*    36x Old Persian */
 | ||
|     {0x103c8, 0x103cf}, /*     8x Old Persian */
 | ||
|     {0x103d1, 0x103d5}, /*     5x Old Persian */
 | ||
|     {0x10400, 0x1049d}, /*    158x Deseret, Shavian, Osmanya */
 | ||
|     {0x104b0, 0x104d3}, /*    36x Osage */
 | ||
|     {0x104d8, 0x104fb}, /*    36x Osage */
 | ||
|     {0x10500, 0x10527}, /*    40x Elbasan */
 | ||
|     {0x10530, 0x10563}, /*    52x Caucasian Albanian */
 | ||
|     {0x10600, 0x10736}, /*   311x Linear A */
 | ||
|     {0x10800, 0x10805}, /*     6x Cypriot Syllabary */
 | ||
|     {0x1080a, 0x10835}, /*    44x Cypriot Syllabary */
 | ||
|     {0x10837, 0x10838}, /*     2x Cypriot Syllabary */
 | ||
|     {0x1083f, 0x1089e}, /*    86x Cypriot,ImperialAramaic,Palmyrene,Nabataean */
 | ||
|     {0x108e0, 0x108f2}, /*    19x Hatran */
 | ||
|     {0x108f4, 0x108f5}, /*     2x Hatran */
 | ||
|     {0x108fb, 0x1091b}, /*    33x Hatran */
 | ||
|     {0x10920, 0x10939}, /*    26x Lydian */
 | ||
|     {0x10980, 0x109b7}, /*    56x Meroitic Hieromarks */
 | ||
|     {0x109bc, 0x109cf}, /*    20x Meroitic Cursive */
 | ||
|     {0x109d2, 0x10a00}, /*    47x Meroitic Cursive */
 | ||
|     {0x10a10, 0x10a13}, /*     4x Kharoshthi */
 | ||
|     {0x10a15, 0x10a17}, /*     3x Kharoshthi */
 | ||
|     {0x10a19, 0x10a35}, /*    29x Kharoshthi */
 | ||
|     {0x10a40, 0x10a48}, /*     9x Kharoshthi */
 | ||
|     {0x10a60, 0x10a7e}, /*    31x Old South Arabian */
 | ||
|     {0x10a80, 0x10a9f}, /*    32x Old North Arabian */
 | ||
|     {0x10ac0, 0x10ac7}, /*     8x Manichaean */
 | ||
|     {0x10ac9, 0x10ae4}, /*    28x Manichaean */
 | ||
|     {0x10aeb, 0x10aef}, /*     5x Manichaean */
 | ||
|     {0x10b00, 0x10b35}, /*    54x Avestan */
 | ||
|     {0x10b40, 0x10b55}, /*    22x Inscriptional Parthian */
 | ||
|     {0x10b58, 0x10b72}, /*    27x Inscriptional Parthian and Pahlavi */
 | ||
|     {0x10b78, 0x10b91}, /*    26x Inscriptional Pahlavi, Psalter Pahlavi */
 | ||
|     {0x10c00, 0x10c48}, /*    73x Old Turkic */
 | ||
|     {0x10c80, 0x10cb2}, /*    51x Old Hungarian */
 | ||
|     {0x10cc0, 0x10cf2}, /*    51x Old Hungarian */
 | ||
|     {0x10cfa, 0x10d23}, /*    42x Old Hungarian, Hanifi Rohingya */
 | ||
|     {0x10d30, 0x10d39}, /*    10x Hanifi Rohingya */
 | ||
|     {0x10e60, 0x10e7e}, /*    31x Rumi Numeral Symbols */
 | ||
|     {0x10f00, 0x10f27}, /*    40x Old Sogdian */
 | ||
|     {0x10f30, 0x10f45}, /*    22x Sogdian */
 | ||
|     {0x10f51, 0x10f54}, /*     4x Sogdian */
 | ||
|     {0x10fe0, 0x10ff6}, /*    23x Elymaic */
 | ||
|     {0x11003, 0x11037}, /*    53x Brahmi */
 | ||
|     {0x11052, 0x1106f}, /*    30x Brahmi */
 | ||
|     {0x11083, 0x110af}, /*    45x Kaithi */
 | ||
|     {0x110d0, 0x110e8}, /*    25x Sora Sompeng */
 | ||
|     {0x110f0, 0x110f9}, /*    10x Sora Sompeng */
 | ||
|     {0x11103, 0x11126}, /*    36x Chakma */
 | ||
|     {0x11136, 0x1113f}, /*    10x Chakma */
 | ||
|     {0x11144, 0x11144}, /*     1x Chakma */
 | ||
|     {0x11150, 0x11172}, /*    35x Mahajani */
 | ||
|     {0x11176, 0x11176}, /*     1x Mahajani */
 | ||
|     {0x11183, 0x111b2}, /*    48x Sharada */
 | ||
|     {0x111c1, 0x111c4}, /*     4x Sharada */
 | ||
|     {0x111d0, 0x111da}, /*    11x Sharada */
 | ||
|     {0x111dc, 0x111dc}, /*     1x Sharada */
 | ||
|     {0x111e1, 0x111f4}, /*    20x Sinhala Archaic Numbers */
 | ||
|     {0x11200, 0x11211}, /*    18x Khojki */
 | ||
|     {0x11213, 0x1122b}, /*    25x Khojki */
 | ||
|     {0x11280, 0x11286}, /*     7x Multani */
 | ||
|     {0x11288, 0x11288}, /*     1x Multani */
 | ||
|     {0x1128a, 0x1128d}, /*     4x Multani */
 | ||
|     {0x1128f, 0x1129d}, /*    15x Multani */
 | ||
|     {0x1129f, 0x112a8}, /*    10x Multani */
 | ||
|     {0x112b0, 0x112de}, /*    47x Khudawadi */
 | ||
|     {0x112f0, 0x112f9}, /*    10x Khudawadi */
 | ||
|     {0x11305, 0x1130c}, /*     8x Grantha */
 | ||
|     {0x1130f, 0x11310}, /*     2x Grantha */
 | ||
|     {0x11313, 0x11328}, /*    22x Grantha */
 | ||
|     {0x1132a, 0x11330}, /*     7x Grantha */
 | ||
|     {0x11332, 0x11333}, /*     2x Grantha */
 | ||
|     {0x11335, 0x11339}, /*     5x Grantha */
 | ||
|     {0x1133d, 0x1133d}, /*     1x Grantha */
 | ||
|     {0x11350, 0x11350}, /*     1x Grantha */
 | ||
|     {0x1135d, 0x11361}, /*     5x Grantha */
 | ||
|     {0x11400, 0x11434}, /*    53x Newa */
 | ||
|     {0x11447, 0x1144a}, /*     4x Newa */
 | ||
|     {0x11450, 0x11459}, /*    10x Newa */
 | ||
|     {0x1145f, 0x1145f}, /*     1x Newa */
 | ||
|     {0x11480, 0x114af}, /*    48x Tirhuta */
 | ||
|     {0x114c4, 0x114c5}, /*     2x Tirhuta */
 | ||
|     {0x114c7, 0x114c7}, /*     1x Tirhuta */
 | ||
|     {0x114d0, 0x114d9}, /*    10x Tirhuta */
 | ||
|     {0x11580, 0x115ae}, /*    47x Siddham */
 | ||
|     {0x115d8, 0x115db}, /*     4x Siddham */
 | ||
|     {0x11600, 0x1162f}, /*    48x Modi */
 | ||
|     {0x11644, 0x11644}, /*     1x Modi */
 | ||
|     {0x11650, 0x11659}, /*    10x Modi */
 | ||
|     {0x11680, 0x116aa}, /*    43x Takri */
 | ||
|     {0x116b8, 0x116b8}, /*     1x Takri */
 | ||
|     {0x116c0, 0x116c9}, /*    10x Takri */
 | ||
|     {0x11700, 0x1171a}, /*    27x Ahom */
 | ||
|     {0x11730, 0x1173b}, /*    12x Ahom */
 | ||
|     {0x11800, 0x1182b}, /*    44x Dogra */
 | ||
|     {0x118a0, 0x118f2}, /*    83x Warang Citi */
 | ||
|     {0x118ff, 0x118ff}, /*     1x Warang Citi */
 | ||
|     {0x119a0, 0x119a7}, /*     8x Nandinagari */
 | ||
|     {0x119aa, 0x119d0}, /*    39x Nandinagari */
 | ||
|     {0x119e1, 0x119e1}, /*     1x Nandinagari */
 | ||
|     {0x119e3, 0x119e3}, /*     1x Nandinagari */
 | ||
|     {0x11a00, 0x11a00}, /*     1x Zanabazar Square */
 | ||
|     {0x11a0b, 0x11a32}, /*    40x Zanabazar Square */
 | ||
|     {0x11a3a, 0x11a3a}, /*     1x Zanabazar Square */
 | ||
|     {0x11a50, 0x11a50}, /*     1x Soyombo */
 | ||
|     {0x11a5c, 0x11a89}, /*    46x Soyombo */
 | ||
|     {0x11a9d, 0x11a9d}, /*     1x Soyombo */
 | ||
|     {0x11ac0, 0x11af8}, /*    57x Pau Cin Hau */
 | ||
|     {0x11c00, 0x11c08}, /*     9x Bhaiksuki */
 | ||
|     {0x11c0a, 0x11c2e}, /*    37x Bhaiksuki */
 | ||
|     {0x11c40, 0x11c40}, /*     1x Bhaiksuki */
 | ||
|     {0x11c50, 0x11c6c}, /*    29x Bhaiksuki */
 | ||
|     {0x11c72, 0x11c8f}, /*    30x Marchen */
 | ||
|     {0x11d00, 0x11d06}, /*     7x Masaram Gondi */
 | ||
|     {0x11d08, 0x11d09}, /*     2x Masaram Gondi */
 | ||
|     {0x11d0b, 0x11d30}, /*    38x Masaram Gondi */
 | ||
|     {0x11d46, 0x11d46}, /*     1x Masaram Gondi */
 | ||
|     {0x11d50, 0x11d59}, /*    10x Masaram Gondi */
 | ||
|     {0x11d60, 0x11d65}, /*     6x Gunjala Gondi */
 | ||
|     {0x11d67, 0x11d68}, /*     2x Gunjala Gondi */
 | ||
|     {0x11d6a, 0x11d89}, /*    32x Gunjala Gondi */
 | ||
|     {0x11d98, 0x11d98}, /*     1x Gunjala Gondi */
 | ||
|     {0x11da0, 0x11da9}, /*    10x Gunjala Gondi */
 | ||
|     {0x11ee0, 0x11ef2}, /*    19x Makasar */
 | ||
|     {0x11fc0, 0x11fd4}, /*    21x Tamil Supplement */
 | ||
|     {0x12000, 0x12399}, /*   922x Cuneiform */
 | ||
|     {0x12400, 0x1246e}, /*   111x Cuneiform Numbers & Punctuation */
 | ||
|     {0x12480, 0x12543}, /*   196x Early Dynastic Cuneiform */
 | ||
|     {0x13000, 0x1342e}, /*  1071x Egyptian Hieromarks */
 | ||
|     {0x14400, 0x14646}, /*   583x Anatolian Hieromarks */
 | ||
|     {0x16800, 0x16a38}, /*   569x Bamum Supplement */
 | ||
|     {0x16a40, 0x16a5e}, /*    31x Mro */
 | ||
|     {0x16a60, 0x16a69}, /*    10x Mro */
 | ||
|     {0x16ad0, 0x16aed}, /*    30x Bassa Vah */
 | ||
|     {0x16b00, 0x16b2f}, /*    48x Pahawh Hmong */
 | ||
|     {0x16b40, 0x16b43}, /*     4x Pahawh Hmong */
 | ||
|     {0x16b50, 0x16b59}, /*    10x Pahawh Hmong */
 | ||
|     {0x16b5b, 0x16b61}, /*     7x Pahawh Hmong */
 | ||
|     {0x16b63, 0x16b77}, /*    21x Pahawh Hmong */
 | ||
|     {0x16b7d, 0x16b8f}, /*    19x Pahawh Hmong */
 | ||
|     {0x16e40, 0x16e96}, /*    87x Medefaidrin */
 | ||
|     {0x16f00, 0x16f4a}, /*    75x Miao */
 | ||
|     {0x16f50, 0x16f50}, /*     1x Miao */
 | ||
|     {0x16f93, 0x16f9f}, /*    13x Miao */
 | ||
|     {0x16fe0, 0x16fe1}, /*     2x Ideographic Symbols & Punctuation */
 | ||
|     {0x16fe3, 0x16fe3}, /*     1x Ideographic Symbols & Punctuation */
 | ||
|     {0x17000, 0x187f7}, /*  6136x Tangut */
 | ||
|     {0x18800, 0x18af2}, /*   755x Tangut Components */
 | ||
|     {0x1b000, 0x1b11e}, /*   287x Kana Supplement */
 | ||
|     {0x1b150, 0x1b152}, /*     3x Small Kana Extension */
 | ||
|     {0x1b164, 0x1b167}, /*     4x Small Kana Extension */
 | ||
|     {0x1b170, 0x1b2fb}, /*   396x Nushu */
 | ||
|     {0x1bc00, 0x1bc6a}, /*   107x Duployan */
 | ||
|     {0x1bc70, 0x1bc7c}, /*    13x Duployan */
 | ||
|     {0x1bc80, 0x1bc88}, /*     9x Duployan */
 | ||
|     {0x1bc90, 0x1bc99}, /*    10x Duployan */
 | ||
|     {0x1d2e0, 0x1d2f3}, /*    20x Mayan Numerals */
 | ||
|     {0x1d360, 0x1d378}, /*    25x Counting Rod Numerals */
 | ||
|     {0x1d400, 0x1d454}, /*    85x 𝐀..𝑔 Math */
 | ||
|     {0x1d456, 0x1d49c}, /*    71x 𝑖..𝒜 Math */
 | ||
|     {0x1d49e, 0x1d49f}, /*     2x 𝒞..𝒟 Math */
 | ||
|     {0x1d4a2, 0x1d4a2}, /*     1x 𝒢..𝒢 Math */
 | ||
|     {0x1d4a5, 0x1d4a6}, /*     2x 𝒥..𝒦 Math */
 | ||
|     {0x1d4a9, 0x1d4ac}, /*     4x 𝒩..𝒬 Math */
 | ||
|     {0x1d4ae, 0x1d4b9}, /*    12x 𝒮..𝒹 Math */
 | ||
|     {0x1d4bb, 0x1d4bb}, /*     1x 𝒻..𝒻 Math */
 | ||
|     {0x1d4bd, 0x1d4c3}, /*     7x 𝒽..𝓃 Math */
 | ||
|     {0x1d4c5, 0x1d505}, /*    65x 𝓅..𝔅 Math */
 | ||
|     {0x1d507, 0x1d50a}, /*     4x 𝔇..𝔊 Math */
 | ||
|     {0x1d50d, 0x1d514}, /*     8x 𝔍..𝔔 Math */
 | ||
|     {0x1d516, 0x1d51c}, /*     7x 𝔖..𝔜 Math */
 | ||
|     {0x1d51e, 0x1d539}, /*    28x 𝔞..𝔹 Math */
 | ||
|     {0x1d53b, 0x1d53e}, /*     4x 𝔻..𝔾 Math */
 | ||
|     {0x1d540, 0x1d544}, /*     5x 𝕀..𝕄 Math */
 | ||
|     {0x1d546, 0x1d546}, /*     1x 𝕆..𝕆 Math */
 | ||
|     {0x1d54a, 0x1d550}, /*     7x 𝕊..𝕐 Math */
 | ||
|     {0x1d552, 0x1d6a5}, /*   340x 𝕒..𝚥 Math */
 | ||
|     {0x1d6a8, 0x1d6c0}, /*    25x 𝚨..𝛀 Math */
 | ||
|     {0x1d6c2, 0x1d6da}, /*    25x 𝛂..𝛚 Math */
 | ||
|     {0x1d6dc, 0x1d6fa}, /*    31x 𝛜..𝛺 Math */
 | ||
|     {0x1d6fc, 0x1d714}, /*    25x 𝛼..𝜔 Math */
 | ||
|     {0x1d716, 0x1d734}, /*    31x 𝜖..𝜴 Math */
 | ||
|     {0x1d736, 0x1d74e}, /*    25x 𝜶..𝝎 Math */
 | ||
|     {0x1d750, 0x1d76e}, /*    31x 𝝐..𝝮 Math */
 | ||
|     {0x1d770, 0x1d788}, /*    25x 𝝰..𝞈 Math */
 | ||
|     {0x1d78a, 0x1d7a8}, /*    31x 𝞊..𝞨 Math */
 | ||
|     {0x1d7aa, 0x1d7c2}, /*    25x 𝞪..𝟂 Math */
 | ||
|     {0x1d7c4, 0x1d7cb}, /*     8x 𝟄..𝟋 Math */
 | ||
|     {0x1d7ce, 0x1d9ff}, /*   562x Math, Sutton SignWriting */
 | ||
|     {0x1f100, 0x1f10c}, /*    13x Enclosed Alphanumeric Supplement */
 | ||
|     {0x20000, 0x2a6d6}, /* 42711x CJK Unified Ideographs Extension B */
 | ||
|     {0x2a700, 0x2b734}, /*  4149x CJK Unified Ideographs Extension C */
 | ||
|     {0x2b740, 0x2b81d}, /*   222x CJK Unified Ideographs Extension D */
 | ||
|     {0x2b820, 0x2cea1}, /*  5762x CJK Unified Ideographs Extension E */
 | ||
|     {0x2ceb0, 0x2ebe0}, /*  7473x CJK Unified Ideographs Extension F */
 | ||
|     {0x2f800, 0x2fa1d}, /*   542x CJK Compatibility Ideographs Supplement */
 | ||
| };
 | ||
| 
 | ||
| /**
 | ||
|  * Returns nonzero if 𝑐 isn't alphanumeric.
 | ||
|  *
 | ||
|  * Line reading interfaces generally define this operation as UNICODE
 | ||
|  * characters that aren't in the letter category (Lu, Ll, Lt, Lm, Lo)
 | ||
|  * and aren't in the number categorie (Nd, Nl, No). We also add a few
 | ||
|  * other things like blocks and emoji (So).
 | ||
|  */
 | ||
| int iswseparator(wint_t c) {
 | ||
|   int m, l, r, n;
 | ||
|   if (c < 0200) {
 | ||
|     return !(('0' <= c && c <= '9') || ('A' <= c && c <= 'Z') ||
 | ||
|              ('a' <= c && c <= 'z'));
 | ||
|   }
 | ||
|   if (c <= 0xffff) {
 | ||
|     l = 0;
 | ||
|     r = n = sizeof(kCodes) / sizeof(kCodes[0]);
 | ||
|     while (l < r) {
 | ||
|       m = (l & r) + ((l ^ r) >> 1);  // floor((a+b)/2)
 | ||
|       if (kCodes[m][1] < c) {
 | ||
|         l = m + 1;
 | ||
|       } else {
 | ||
|         r = m;
 | ||
|       }
 | ||
|     }
 | ||
|     return !(l < n && kCodes[l][0] <= c && c <= kCodes[l][1]);
 | ||
|   } else {
 | ||
|     l = 0;
 | ||
|     r = n = sizeof(kAstralCodes) / sizeof(kAstralCodes[0]);
 | ||
|     while (l < r) {
 | ||
|       m = (l & r) + ((l ^ r) >> 1);  // floor((a+b)/2)
 | ||
|       if (kAstralCodes[m][1] < c) {
 | ||
|         l = m + 1;
 | ||
|       } else {
 | ||
|         r = m;
 | ||
|       }
 | ||
|     }
 | ||
|     return !(l < n && kAstralCodes[l][0] <= c && c <= kAstralCodes[l][1]);
 | ||
|   }
 | ||
| }
 |