diff --git a/ctl/shared_ptr.h b/ctl/shared_ptr.h index 40e7a1a7d..e85429c48 100644 --- a/ctl/shared_ptr.h +++ b/ctl/shared_ptr.h @@ -335,6 +335,7 @@ class shared_ptr return p; } +#if 0 // TODO(mrdomino): find a different way template bool owner_before(const shared_ptr& r) const noexcept { @@ -346,6 +347,7 @@ class shared_ptr { return !r.owner_before(*this); } +#endif private: template diff --git a/libc/fmt/BUILD.mk b/libc/fmt/BUILD.mk index 4114c6ba7..8fdbfeb14 100644 --- a/libc/fmt/BUILD.mk +++ b/libc/fmt/BUILD.mk @@ -40,7 +40,7 @@ LIBC_FMT_A_DIRECTDEPS = \ LIBC_STR \ LIBC_SYSV \ LIBC_TINYMATH \ - THIRD_PARTY_COMPILER_RT + THIRD_PARTY_COMPILER_RT \ LIBC_FMT_A_DEPS := \ $(call uniq,$(foreach x,$(LIBC_FMT_A_DIRECTDEPS),$($(x)))) diff --git a/libc/intrin/pthread_pause_np.c b/libc/intrin/pthread_pause_np.c index ceb85d242..8f5c399c1 100644 --- a/libc/intrin/pthread_pause_np.c +++ b/libc/intrin/pthread_pause_np.c @@ -16,15 +16,23 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/thread/thread.h" +#ifdef _MSC_VER +#include +#else +#include +#endif /** * Yields hyperthread. */ void pthread_pause_np(void) { #if defined(__GNUC__) && defined(__aarch64__) - __asm__ volatile("yield"); -#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) - __asm__ volatile("pause"); + __asm__("yield"); +#elif defined(__x86_64__) || defined(__i386__) + _mm_pause(); +#elif defined(__GNUC__) && (defined(__PPC__) || defined(__PPC64__)) + __asm__("or 27,27,27"); +#else + // do nothing #endif } diff --git a/libc/mem/alg.h b/libc/mem/alg.h index c9bdb8f53..8a887a524 100644 --- a/libc/mem/alg.h +++ b/libc/mem/alg.h @@ -4,9 +4,6 @@ COSMOPOLITAN_C_START_ void *bsearch(const void *, const void *, size_t, size_t, int (*)(const void *, const void *)) paramsnonnull() nosideeffect; -void *bsearch_r(const void *, const void *, size_t, size_t, - int (*)(const void *, const void *, void *), void *) - paramsnonnull((1, 2, 5)) nosideeffect; void qsort3(void *, size_t, size_t, int (*)(const void *, const void *)) paramsnonnull(); void qsort(void *, size_t, size_t, int (*)(const void *, const void *)) diff --git a/libc/mem/bisect.internal.h b/libc/mem/bisect.internal.h deleted file mode 100644 index 2365f82cd..000000000 --- a/libc/mem/bisect.internal.h +++ /dev/null @@ -1,31 +0,0 @@ -#ifndef COSMOPOLITAN_LIBC_ALG_BISECT_H_ -#define COSMOPOLITAN_LIBC_ALG_BISECT_H_ -COSMOPOLITAN_C_START_ - -forceinline void *bisect(const void *k, const void *data, size_t n, size_t size, - int cmp(const void *a, const void *b, void *arg), - void *arg) { - int c; - const char *p; - ssize_t m, l, r; - if (n) { - l = 0; - r = n - 1; - p = data; - while (l <= r) { - m = (l & r) + ((l ^ r) >> 1); - c = cmp(k, p + m * size, arg); - if (c > 0) { - l = m + 1; - } else if (c < 0) { - r = m - 1; - } else { - return (char *)p + m * size; - } - } - } - return NULL; -} - -COSMOPOLITAN_C_END_ -#endif /* COSMOPOLITAN_LIBC_ALG_BISECT_H_ */ diff --git a/libc/mem/bsearch_r.c b/libc/mem/bsearch_r.c deleted file mode 100644 index 832d79edd..000000000 --- a/libc/mem/bsearch_r.c +++ /dev/null @@ -1,29 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/mem/alg.h" -#include "libc/mem/bisect.internal.h" - -/** - * Searches sorted array for exact item in logarithmic time. - * @see bsearch() - */ -void *bsearch_r(const void *key, const void *base, size_t nmemb, size_t size, - int cmp(const void *a, const void *b, void *arg), void *arg) { - return bisect(key, base, nmemb, size, cmp, arg); -} diff --git a/libc/nexgen32e/BUILD.mk b/libc/nexgen32e/BUILD.mk index cf50a81e7..d84d8d853 100644 --- a/libc/nexgen32e/BUILD.mk +++ b/libc/nexgen32e/BUILD.mk @@ -71,8 +71,6 @@ o/$(MODE)/libc/nexgen32e/ksha512.o: libc/nexgen32e/ksha512.S @$(COMPILE) -AOBJECTIFY.S $(OBJECTIFY.S) $(OUTPUT_OPTION) -c $< o/$(MODE)/libc/nexgen32e/kcp437.o: libc/nexgen32e/kcp437.S @$(COMPILE) -AOBJECTIFY.S $(OBJECTIFY.S) $(OUTPUT_OPTION) -c $< -o/$(MODE)/libc/nexgen32e/kreversebits.o: libc/nexgen32e/kreversebits.S - @$(COMPILE) -AOBJECTIFY.S $(OBJECTIFY.S) $(OUTPUT_OPTION) -c $< o/$(MODE)/libc/nexgen32e/ktensindex.o: libc/nexgen32e/ktensindex.S @$(COMPILE) -AOBJECTIFY.S $(OBJECTIFY.S) $(OUTPUT_OPTION) -c $< o/$(MODE)/libc/nexgen32e/longjmp.o: libc/nexgen32e/longjmp.S diff --git a/libc/nexgen32e/x86info.h b/libc/nexgen32e/x86info.h index 14eed9fd3..5e07d0e9e 100644 --- a/libc/nexgen32e/x86info.h +++ b/libc/nexgen32e/x86info.h @@ -65,7 +65,6 @@ struct X86ProcessorModel { unsigned char grade; }; -extern const size_t kX86ProcessorModelCount; extern const struct X86ProcessorModel kX86ProcessorModels[]; const struct X86ProcessorModel *getx86processormodel(short) nosideeffect; diff --git a/libc/runtime/getsymbol.c b/libc/runtime/getsymbol.c index 855e13611..e7ced6883 100644 --- a/libc/runtime/getsymbol.c +++ b/libc/runtime/getsymbol.c @@ -33,24 +33,22 @@ privileged int __get_symbol(struct SymbolTable *t, intptr_t a) { // we don't want function tracing because: // function tracing depends on this function via kprintf unsigned l, m, r, n, k; - if (!t && __symtab) { + if (!t && __symtab) t = __symtab; - } if (t) { l = 0; r = n = t->count; k = a - t->addr_base; while (l < r) { m = (l & r) + ((l ^ r) >> 1); // floor((a+b)/2) - if (t->symbols[m].y < k) { + if (k < t->symbols[m].x) { + r = m; + } else if (k > t->symbols[m].y) { l = m + 1; } else { - r = m; + return m; } } - if (l < n && t->symbols[l].x <= k && k <= t->symbols[l].y) { - return l; - } } return -1; } diff --git a/libc/str/BUILD.mk b/libc/str/BUILD.mk index d7a655dea..5e10f4ace 100644 --- a/libc/str/BUILD.mk +++ b/libc/str/BUILD.mk @@ -12,16 +12,19 @@ LIBC_STR_A_INCS = $(filter %.inc,$(LIBC_STR_A_FILES)) LIBC_STR_A_SRCS_A = $(filter %.s,$(LIBC_STR_A_FILES)) LIBC_STR_A_SRCS_S = $(filter %.S,$(LIBC_STR_A_FILES)) LIBC_STR_A_SRCS_C = $(filter %.c,$(LIBC_STR_A_FILES)) +LIBC_STR_A_SRCS_CC = $(filter %.cc,$(LIBC_STR_A_FILES)) LIBC_STR_A_SRCS = \ $(LIBC_STR_A_SRCS_A) \ $(LIBC_STR_A_SRCS_S) \ - $(LIBC_STR_A_SRCS_C) + $(LIBC_STR_A_SRCS_C) \ + $(LIBC_STR_A_SRCS_CC) LIBC_STR_A_OBJS = \ $(LIBC_STR_A_SRCS_A:%.s=o/$(MODE)/%.o) \ $(LIBC_STR_A_SRCS_S:%.S=o/$(MODE)/%.o) \ - $(LIBC_STR_A_SRCS_C:%.c=o/$(MODE)/%.o) + $(LIBC_STR_A_SRCS_C:%.c=o/$(MODE)/%.o) \ + $(LIBC_STR_A_SRCS_CC:%.cc=o/$(MODE)/%.o) LIBC_STR_A_CHECKS = \ $(LIBC_STR_A).pkg \ diff --git a/libc/str/eastasianwidth.txt b/libc/str/eastasianwidth.txt index 8e2a738fe..02df4df47 100644 --- a/libc/str/eastasianwidth.txt +++ b/libc/str/eastasianwidth.txt @@ -1,11 +1,11 @@ -# EastAsianWidth-15.0.0.txt -# Date: 2022-01-28, 13:07:15 GMT [KW, LI] -# © 2022 Unicode®, Inc. +# EastAsianWidth-15.1.0.txt +# Date: 2023-07-28, 23:34:08 GMT +# © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html # # Unicode Character Database -# For documentation, see https://www.unicode.org/reports/tr44/ +# For documentation, see https://www.unicode.org/reports/tr44/ # # East_Asian_Width Property # @@ -30,2590 +30,2592 @@ # Character ranges are specified as for other property files in the # Unicode Character Database. # -# For legacy reasons, there are no spaces before or after the semicolon -# which separates the two fields. The comments following the number sign -# "#" list the General_Category property value or the L& alias of the -# derived value LC, the Unicode character name or names, and, in lines -# with ranges of code points, the code point count in square brackets. +# The comments following the number sign "#" list the General_Category +# property value or the L& alias of the derived value LC, the Unicode +# character name or names, and, in lines with ranges of code points, +# the code point count in square brackets. # # For more information, see UAX #11: East Asian Width, # at https://www.unicode.org/reports/tr11/ # # @missing: 0000..10FFFF; N -0000..001F;N # Cc [32] .. -0020;Na # Zs SPACE -0021..0023;Na # Po [3] EXCLAMATION MARK..NUMBER SIGN -0024;Na # Sc DOLLAR SIGN -0025..0027;Na # Po [3] PERCENT SIGN..APOSTROPHE -0028;Na # Ps LEFT PARENTHESIS -0029;Na # Pe RIGHT PARENTHESIS -002A;Na # Po ASTERISK -002B;Na # Sm PLUS SIGN -002C;Na # Po COMMA -002D;Na # Pd HYPHEN-MINUS -002E..002F;Na # Po [2] FULL STOP..SOLIDUS -0030..0039;Na # Nd [10] DIGIT ZERO..DIGIT NINE -003A..003B;Na # Po [2] COLON..SEMICOLON -003C..003E;Na # Sm [3] LESS-THAN SIGN..GREATER-THAN SIGN -003F..0040;Na # Po [2] QUESTION MARK..COMMERCIAL AT -0041..005A;Na # Lu [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z -005B;Na # Ps LEFT SQUARE BRACKET -005C;Na # Po REVERSE SOLIDUS -005D;Na # Pe RIGHT SQUARE BRACKET -005E;Na # Sk CIRCUMFLEX ACCENT -005F;Na # Pc LOW LINE -0060;Na # Sk GRAVE ACCENT -0061..007A;Na # Ll [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z -007B;Na # Ps LEFT CURLY BRACKET -007C;Na # Sm VERTICAL LINE -007D;Na # Pe RIGHT CURLY BRACKET -007E;Na # Sm TILDE -007F;N # Cc -0080..009F;N # Cc [32] .. -00A0;N # Zs NO-BREAK SPACE -00A1;A # Po INVERTED EXCLAMATION MARK -00A2..00A3;Na # Sc [2] CENT SIGN..POUND SIGN -00A4;A # Sc CURRENCY SIGN -00A5;Na # Sc YEN SIGN -00A6;Na # So BROKEN BAR -00A7;A # Po SECTION SIGN -00A8;A # Sk DIAERESIS -00A9;N # So COPYRIGHT SIGN -00AA;A # Lo FEMININE ORDINAL INDICATOR -00AB;N # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK -00AC;Na # Sm NOT SIGN -00AD;A # Cf SOFT HYPHEN -00AE;A # So REGISTERED SIGN -00AF;Na # Sk MACRON -00B0;A # So DEGREE SIGN -00B1;A # Sm PLUS-MINUS SIGN -00B2..00B3;A # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE -00B4;A # Sk ACUTE ACCENT -00B5;N # Ll MICRO SIGN -00B6..00B7;A # Po [2] PILCROW SIGN..MIDDLE DOT -00B8;A # Sk CEDILLA -00B9;A # No SUPERSCRIPT ONE -00BA;A # Lo MASCULINE ORDINAL INDICATOR -00BB;N # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK -00BC..00BE;A # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS -00BF;A # Po INVERTED QUESTION MARK -00C0..00C5;N # Lu [6] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER A WITH RING ABOVE -00C6;A # Lu LATIN CAPITAL LETTER AE -00C7..00CF;N # Lu [9] LATIN CAPITAL LETTER C WITH CEDILLA..LATIN CAPITAL LETTER I WITH DIAERESIS -00D0;A # Lu LATIN CAPITAL LETTER ETH -00D1..00D6;N # Lu [6] LATIN CAPITAL LETTER N WITH TILDE..LATIN CAPITAL LETTER O WITH DIAERESIS -00D7;A # Sm MULTIPLICATION SIGN -00D8;A # Lu LATIN CAPITAL LETTER O WITH STROKE -00D9..00DD;N # Lu [5] LATIN CAPITAL LETTER U WITH GRAVE..LATIN CAPITAL LETTER Y WITH ACUTE -00DE..00E1;A # L& [4] LATIN CAPITAL LETTER THORN..LATIN SMALL LETTER A WITH ACUTE -00E2..00E5;N # Ll [4] LATIN SMALL LETTER A WITH CIRCUMFLEX..LATIN SMALL LETTER A WITH RING ABOVE -00E6;A # Ll LATIN SMALL LETTER AE -00E7;N # Ll LATIN SMALL LETTER C WITH CEDILLA -00E8..00EA;A # Ll [3] LATIN SMALL LETTER E WITH GRAVE..LATIN SMALL LETTER E WITH CIRCUMFLEX -00EB;N # Ll LATIN SMALL LETTER E WITH DIAERESIS -00EC..00ED;A # Ll [2] LATIN SMALL LETTER I WITH GRAVE..LATIN SMALL LETTER I WITH ACUTE -00EE..00EF;N # Ll [2] LATIN SMALL LETTER I WITH CIRCUMFLEX..LATIN SMALL LETTER I WITH DIAERESIS -00F0;A # Ll LATIN SMALL LETTER ETH -00F1;N # Ll LATIN SMALL LETTER N WITH TILDE -00F2..00F3;A # Ll [2] LATIN SMALL LETTER O WITH GRAVE..LATIN SMALL LETTER O WITH ACUTE -00F4..00F6;N # Ll [3] LATIN SMALL LETTER O WITH CIRCUMFLEX..LATIN SMALL LETTER O WITH DIAERESIS -00F7;A # Sm DIVISION SIGN -00F8..00FA;A # Ll [3] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER U WITH ACUTE -00FB;N # Ll LATIN SMALL LETTER U WITH CIRCUMFLEX -00FC;A # Ll LATIN SMALL LETTER U WITH DIAERESIS -00FD;N # Ll LATIN SMALL LETTER Y WITH ACUTE -00FE;A # Ll LATIN SMALL LETTER THORN -00FF;N # Ll LATIN SMALL LETTER Y WITH DIAERESIS -0100;N # Lu LATIN CAPITAL LETTER A WITH MACRON -0101;A # Ll LATIN SMALL LETTER A WITH MACRON -0102..0110;N # L& [15] LATIN CAPITAL LETTER A WITH BREVE..LATIN CAPITAL LETTER D WITH STROKE -0111;A # Ll LATIN SMALL LETTER D WITH STROKE -0112;N # Lu LATIN CAPITAL LETTER E WITH MACRON -0113;A # Ll LATIN SMALL LETTER E WITH MACRON -0114..011A;N # L& [7] LATIN CAPITAL LETTER E WITH BREVE..LATIN CAPITAL LETTER E WITH CARON -011B;A # Ll LATIN SMALL LETTER E WITH CARON -011C..0125;N # L& [10] LATIN CAPITAL LETTER G WITH CIRCUMFLEX..LATIN SMALL LETTER H WITH CIRCUMFLEX -0126..0127;A # L& [2] LATIN CAPITAL LETTER H WITH STROKE..LATIN SMALL LETTER H WITH STROKE -0128..012A;N # L& [3] LATIN CAPITAL LETTER I WITH TILDE..LATIN CAPITAL LETTER I WITH MACRON -012B;A # Ll LATIN SMALL LETTER I WITH MACRON -012C..0130;N # L& [5] LATIN CAPITAL LETTER I WITH BREVE..LATIN CAPITAL LETTER I WITH DOT ABOVE -0131..0133;A # L& [3] LATIN SMALL LETTER DOTLESS I..LATIN SMALL LIGATURE IJ -0134..0137;N # L& [4] LATIN CAPITAL LETTER J WITH CIRCUMFLEX..LATIN SMALL LETTER K WITH CEDILLA -0138;A # Ll LATIN SMALL LETTER KRA -0139..013E;N # L& [6] LATIN CAPITAL LETTER L WITH ACUTE..LATIN SMALL LETTER L WITH CARON -013F..0142;A # L& [4] LATIN CAPITAL LETTER L WITH MIDDLE DOT..LATIN SMALL LETTER L WITH STROKE -0143;N # Lu LATIN CAPITAL LETTER N WITH ACUTE -0144;A # Ll LATIN SMALL LETTER N WITH ACUTE -0145..0147;N # L& [3] LATIN CAPITAL LETTER N WITH CEDILLA..LATIN CAPITAL LETTER N WITH CARON -0148..014B;A # L& [4] LATIN SMALL LETTER N WITH CARON..LATIN SMALL LETTER ENG -014C;N # Lu LATIN CAPITAL LETTER O WITH MACRON -014D;A # Ll LATIN SMALL LETTER O WITH MACRON -014E..0151;N # L& [4] LATIN CAPITAL LETTER O WITH BREVE..LATIN SMALL LETTER O WITH DOUBLE ACUTE -0152..0153;A # L& [2] LATIN CAPITAL LIGATURE OE..LATIN SMALL LIGATURE OE -0154..0165;N # L& [18] LATIN CAPITAL LETTER R WITH ACUTE..LATIN SMALL LETTER T WITH CARON -0166..0167;A # L& [2] LATIN CAPITAL LETTER T WITH STROKE..LATIN SMALL LETTER T WITH STROKE -0168..016A;N # L& [3] LATIN CAPITAL LETTER U WITH TILDE..LATIN CAPITAL LETTER U WITH MACRON -016B;A # Ll LATIN SMALL LETTER U WITH MACRON -016C..017F;N # L& [20] LATIN CAPITAL LETTER U WITH BREVE..LATIN SMALL LETTER LONG S -0180..01BA;N # L& [59] LATIN SMALL LETTER B WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL -01BB;N # Lo LATIN LETTER TWO WITH STROKE -01BC..01BF;N # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN -01C0..01C3;N # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK -01C4..01CD;N # L& [10] LATIN CAPITAL LETTER DZ WITH CARON..LATIN CAPITAL LETTER A WITH CARON -01CE;A # Ll LATIN SMALL LETTER A WITH CARON -01CF;N # Lu LATIN CAPITAL LETTER I WITH CARON -01D0;A # Ll LATIN SMALL LETTER I WITH CARON -01D1;N # Lu LATIN CAPITAL LETTER O WITH CARON -01D2;A # Ll LATIN SMALL LETTER O WITH CARON -01D3;N # Lu LATIN CAPITAL LETTER U WITH CARON -01D4;A # Ll LATIN SMALL LETTER U WITH CARON -01D5;N # Lu LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON -01D6;A # Ll LATIN SMALL LETTER U WITH DIAERESIS AND MACRON -01D7;N # Lu LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE -01D8;A # Ll LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE -01D9;N # Lu LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON -01DA;A # Ll LATIN SMALL LETTER U WITH DIAERESIS AND CARON -01DB;N # Lu LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE -01DC;A # Ll LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE -01DD..024F;N # L& [115] LATIN SMALL LETTER TURNED E..LATIN SMALL LETTER Y WITH STROKE -0250;N # Ll LATIN SMALL LETTER TURNED A -0251;A # Ll LATIN SMALL LETTER ALPHA -0252..0260;N # Ll [15] LATIN SMALL LETTER TURNED ALPHA..LATIN SMALL LETTER G WITH HOOK -0261;A # Ll LATIN SMALL LETTER SCRIPT G -0262..0293;N # Ll [50] LATIN LETTER SMALL CAPITAL G..LATIN SMALL LETTER EZH WITH CURL -0294;N # Lo LATIN LETTER GLOTTAL STOP -0295..02AF;N # Ll [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL -02B0..02C1;N # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP -02C2..02C3;N # Sk [2] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER RIGHT ARROWHEAD -02C4;A # Sk MODIFIER LETTER UP ARROWHEAD -02C5;N # Sk MODIFIER LETTER DOWN ARROWHEAD -02C6;N # Lm MODIFIER LETTER CIRCUMFLEX ACCENT -02C7;A # Lm CARON -02C8;N # Lm MODIFIER LETTER VERTICAL LINE -02C9..02CB;A # Lm [3] MODIFIER LETTER MACRON..MODIFIER LETTER GRAVE ACCENT -02CC;N # Lm MODIFIER LETTER LOW VERTICAL LINE -02CD;A # Lm MODIFIER LETTER LOW MACRON -02CE..02CF;N # Lm [2] MODIFIER LETTER LOW GRAVE ACCENT..MODIFIER LETTER LOW ACUTE ACCENT -02D0;A # Lm MODIFIER LETTER TRIANGULAR COLON -02D1;N # Lm MODIFIER LETTER HALF TRIANGULAR COLON -02D2..02D7;N # Sk [6] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER MINUS SIGN -02D8..02DB;A # Sk [4] BREVE..OGONEK -02DC;N # Sk SMALL TILDE -02DD;A # Sk DOUBLE ACUTE ACCENT -02DE;N # Sk MODIFIER LETTER RHOTIC HOOK -02DF;A # Sk MODIFIER LETTER CROSS ACCENT -02E0..02E4;N # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP -02E5..02EB;N # Sk [7] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK -02EC;N # Lm MODIFIER LETTER VOICING -02ED;N # Sk MODIFIER LETTER UNASPIRATED -02EE;N # Lm MODIFIER LETTER DOUBLE APOSTROPHE -02EF..02FF;N # Sk [17] MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW -0300..036F;A # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X -0370..0373;N # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI -0374;N # Lm GREEK NUMERAL SIGN -0375;N # Sk GREEK LOWER NUMERAL SIGN -0376..0377;N # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA -037A;N # Lm GREEK YPOGEGRAMMENI -037B..037D;N # Ll [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL -037E;N # Po GREEK QUESTION MARK -037F;N # Lu GREEK CAPITAL LETTER YOT -0384..0385;N # Sk [2] GREEK TONOS..GREEK DIALYTIKA TONOS -0386;N # Lu GREEK CAPITAL LETTER ALPHA WITH TONOS -0387;N # Po GREEK ANO TELEIA -0388..038A;N # Lu [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS -038C;N # Lu GREEK CAPITAL LETTER OMICRON WITH TONOS -038E..0390;N # L& [3] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS -0391..03A1;A # Lu [17] GREEK CAPITAL LETTER ALPHA..GREEK CAPITAL LETTER RHO -03A3..03A9;A # Lu [7] GREEK CAPITAL LETTER SIGMA..GREEK CAPITAL LETTER OMEGA -03AA..03B0;N # L& [7] GREEK CAPITAL LETTER IOTA WITH DIALYTIKA..GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS -03B1..03C1;A # Ll [17] GREEK SMALL LETTER ALPHA..GREEK SMALL LETTER RHO -03C2;N # Ll GREEK SMALL LETTER FINAL SIGMA -03C3..03C9;A # Ll [7] GREEK SMALL LETTER SIGMA..GREEK SMALL LETTER OMEGA -03CA..03F5;N # L& [44] GREEK SMALL LETTER IOTA WITH DIALYTIKA..GREEK LUNATE EPSILON SYMBOL -03F6;N # Sm GREEK REVERSED LUNATE EPSILON SYMBOL -03F7..03FF;N # L& [9] GREEK CAPITAL LETTER SHO..GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL -0400;N # Lu CYRILLIC CAPITAL LETTER IE WITH GRAVE -0401;A # Lu CYRILLIC CAPITAL LETTER IO -0402..040F;N # Lu [14] CYRILLIC CAPITAL LETTER DJE..CYRILLIC CAPITAL LETTER DZHE -0410..044F;A # L& [64] CYRILLIC CAPITAL LETTER A..CYRILLIC SMALL LETTER YA -0450;N # Ll CYRILLIC SMALL LETTER IE WITH GRAVE -0451;A # Ll CYRILLIC SMALL LETTER IO -0452..0481;N # L& [48] CYRILLIC SMALL LETTER DJE..CYRILLIC SMALL LETTER KOPPA -0482;N # So CYRILLIC THOUSANDS SIGN -0483..0487;N # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE -0488..0489;N # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN -048A..04FF;N # L& [118] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER HA WITH STROKE -0500..052F;N # L& [48] CYRILLIC CAPITAL LETTER KOMI DE..CYRILLIC SMALL LETTER EL WITH DESCENDER -0531..0556;N # Lu [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH -0559;N # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING -055A..055F;N # Po [6] ARMENIAN APOSTROPHE..ARMENIAN ABBREVIATION MARK -0560..0588;N # Ll [41] ARMENIAN SMALL LETTER TURNED AYB..ARMENIAN SMALL LETTER YI WITH STROKE -0589;N # Po ARMENIAN FULL STOP -058A;N # Pd ARMENIAN HYPHEN -058D..058E;N # So [2] RIGHT-FACING ARMENIAN ETERNITY SIGN..LEFT-FACING ARMENIAN ETERNITY SIGN -058F;N # Sc ARMENIAN DRAM SIGN -0591..05BD;N # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG -05BE;N # Pd HEBREW PUNCTUATION MAQAF -05BF;N # Mn HEBREW POINT RAFE -05C0;N # Po HEBREW PUNCTUATION PASEQ -05C1..05C2;N # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT -05C3;N # Po HEBREW PUNCTUATION SOF PASUQ -05C4..05C5;N # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT -05C6;N # Po HEBREW PUNCTUATION NUN HAFUKHA -05C7;N # Mn HEBREW POINT QAMATS QATAN -05D0..05EA;N # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV -05EF..05F2;N # Lo [4] HEBREW YOD TRIANGLE..HEBREW LIGATURE YIDDISH DOUBLE YOD -05F3..05F4;N # Po [2] HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM -0600..0605;N # Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE -0606..0608;N # Sm [3] ARABIC-INDIC CUBE ROOT..ARABIC RAY -0609..060A;N # Po [2] ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN -060B;N # Sc AFGHANI SIGN -060C..060D;N # Po [2] ARABIC COMMA..ARABIC DATE SEPARATOR -060E..060F;N # So [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA -0610..061A;N # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA -061B;N # Po ARABIC SEMICOLON -061C;N # Cf ARABIC LETTER MARK -061D..061F;N # Po [3] ARABIC END OF TEXT MARK..ARABIC QUESTION MARK -0620..063F;N # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE -0640;N # Lm ARABIC TATWEEL -0641..064A;N # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH -064B..065F;N # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW -0660..0669;N # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE -066A..066D;N # Po [4] ARABIC PERCENT SIGN..ARABIC FIVE POINTED STAR -066E..066F;N # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF -0670;N # Mn ARABIC LETTER SUPERSCRIPT ALEF -0671..06D3;N # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE -06D4;N # Po ARABIC FULL STOP -06D5;N # Lo ARABIC LETTER AE -06D6..06DC;N # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN -06DD;N # Cf ARABIC END OF AYAH -06DE;N # So ARABIC START OF RUB EL HIZB -06DF..06E4;N # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA -06E5..06E6;N # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH -06E7..06E8;N # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON -06E9;N # So ARABIC PLACE OF SAJDAH -06EA..06ED;N # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM -06EE..06EF;N # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V -06F0..06F9;N # Nd [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE -06FA..06FC;N # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW -06FD..06FE;N # So [2] ARABIC SIGN SINDHI AMPERSAND..ARABIC SIGN SINDHI POSTPOSITION MEN -06FF;N # Lo ARABIC LETTER HEH WITH INVERTED V -0700..070D;N # Po [14] SYRIAC END OF PARAGRAPH..SYRIAC HARKLEAN ASTERISCUS -070F;N # Cf SYRIAC ABBREVIATION MARK -0710;N # Lo SYRIAC LETTER ALAPH -0711;N # Mn SYRIAC LETTER SUPERSCRIPT ALAPH -0712..072F;N # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH -0730..074A;N # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH -074D..074F;N # Lo [3] SYRIAC LETTER SOGDIAN ZHAIN..SYRIAC LETTER SOGDIAN FE -0750..077F;N # Lo [48] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS ABOVE -0780..07A5;N # Lo [38] THAANA LETTER HAA..THAANA LETTER WAAVU -07A6..07B0;N # Mn [11] THAANA ABAFILI..THAANA SUKUN -07B1;N # Lo THAANA LETTER NAA -07C0..07C9;N # Nd [10] NKO DIGIT ZERO..NKO DIGIT NINE -07CA..07EA;N # Lo [33] NKO LETTER A..NKO LETTER JONA RA -07EB..07F3;N # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE -07F4..07F5;N # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE -07F6;N # So NKO SYMBOL OO DENNEN -07F7..07F9;N # Po [3] NKO SYMBOL GBAKURUNEN..NKO EXCLAMATION MARK -07FA;N # Lm NKO LAJANYALAN -07FD;N # Mn NKO DANTAYALAN -07FE..07FF;N # Sc [2] NKO DOROME SIGN..NKO TAMAN SIGN -0800..0815;N # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF -0816..0819;N # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH -081A;N # Lm SAMARITAN MODIFIER LETTER EPENTHETIC YUT -081B..0823;N # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A -0824;N # Lm SAMARITAN MODIFIER LETTER SHORT A -0825..0827;N # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U -0828;N # Lm SAMARITAN MODIFIER LETTER I -0829..082D;N # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA -0830..083E;N # Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU -0840..0858;N # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN -0859..085B;N # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK -085E;N # Po MANDAIC PUNCTUATION -0860..086A;N # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA -0870..0887;N # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT -0888;N # Sk ARABIC RAISED ROUND DOT -0889..088E;N # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL -0890..0891;N # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE -0898..089F;N # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA -08A0..08C8;N # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF -08C9;N # Lm ARABIC SMALL FARSI YEH -08CA..08E1;N # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA -08E2;N # Cf ARABIC DISPUTED END OF AYAH -08E3..08FF;N # Mn [29] ARABIC TURNED DAMMA BELOW..ARABIC MARK SIDEWAYS NOON GHUNNA -0900..0902;N # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA -0903;N # Mc DEVANAGARI SIGN VISARGA -0904..0939;N # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA -093A;N # Mn DEVANAGARI VOWEL SIGN OE -093B;N # Mc DEVANAGARI VOWEL SIGN OOE -093C;N # Mn DEVANAGARI SIGN NUKTA -093D;N # Lo DEVANAGARI SIGN AVAGRAHA -093E..0940;N # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II -0941..0948;N # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI -0949..094C;N # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU -094D;N # Mn DEVANAGARI SIGN VIRAMA -094E..094F;N # Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW -0950;N # Lo DEVANAGARI OM -0951..0957;N # Mn [7] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN UUE -0958..0961;N # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL -0962..0963;N # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL -0964..0965;N # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA -0966..096F;N # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE -0970;N # Po DEVANAGARI ABBREVIATION SIGN -0971;N # Lm DEVANAGARI SIGN HIGH SPACING DOT -0972..097F;N # Lo [14] DEVANAGARI LETTER CANDRA A..DEVANAGARI LETTER BBA -0980;N # Lo BENGALI ANJI -0981;N # Mn BENGALI SIGN CANDRABINDU -0982..0983;N # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA -0985..098C;N # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L -098F..0990;N # Lo [2] BENGALI LETTER E..BENGALI LETTER AI -0993..09A8;N # Lo [22] BENGALI LETTER O..BENGALI LETTER NA -09AA..09B0;N # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA -09B2;N # Lo BENGALI LETTER LA -09B6..09B9;N # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA -09BC;N # Mn BENGALI SIGN NUKTA -09BD;N # Lo BENGALI SIGN AVAGRAHA -09BE..09C0;N # Mc [3] BENGALI VOWEL SIGN AA..BENGALI VOWEL SIGN II -09C1..09C4;N # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR -09C7..09C8;N # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI -09CB..09CC;N # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU -09CD;N # Mn BENGALI SIGN VIRAMA -09CE;N # Lo BENGALI LETTER KHANDA TA -09D7;N # Mc BENGALI AU LENGTH MARK -09DC..09DD;N # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA -09DF..09E1;N # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL -09E2..09E3;N # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL -09E6..09EF;N # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE -09F0..09F1;N # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL -09F2..09F3;N # Sc [2] BENGALI RUPEE MARK..BENGALI RUPEE SIGN -09F4..09F9;N # No [6] BENGALI CURRENCY NUMERATOR ONE..BENGALI CURRENCY DENOMINATOR SIXTEEN -09FA;N # So BENGALI ISSHAR -09FB;N # Sc BENGALI GANDA MARK -09FC;N # Lo BENGALI LETTER VEDIC ANUSVARA -09FD;N # Po BENGALI ABBREVIATION SIGN -09FE;N # Mn BENGALI SANDHI MARK -0A01..0A02;N # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI -0A03;N # Mc GURMUKHI SIGN VISARGA -0A05..0A0A;N # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU -0A0F..0A10;N # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI -0A13..0A28;N # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA -0A2A..0A30;N # Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA -0A32..0A33;N # Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA -0A35..0A36;N # Lo [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA -0A38..0A39;N # Lo [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA -0A3C;N # Mn GURMUKHI SIGN NUKTA -0A3E..0A40;N # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II -0A41..0A42;N # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU -0A47..0A48;N # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI -0A4B..0A4D;N # Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA -0A51;N # Mn GURMUKHI SIGN UDAAT -0A59..0A5C;N # Lo [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA -0A5E;N # Lo GURMUKHI LETTER FA -0A66..0A6F;N # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE -0A70..0A71;N # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK -0A72..0A74;N # Lo [3] GURMUKHI IRI..GURMUKHI EK ONKAR -0A75;N # Mn GURMUKHI SIGN YAKASH -0A76;N # Po GURMUKHI ABBREVIATION SIGN -0A81..0A82;N # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA -0A83;N # Mc GUJARATI SIGN VISARGA -0A85..0A8D;N # Lo [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E -0A8F..0A91;N # Lo [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O -0A93..0AA8;N # Lo [22] GUJARATI LETTER O..GUJARATI LETTER NA -0AAA..0AB0;N # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA -0AB2..0AB3;N # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA -0AB5..0AB9;N # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA -0ABC;N # Mn GUJARATI SIGN NUKTA -0ABD;N # Lo GUJARATI SIGN AVAGRAHA -0ABE..0AC0;N # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II -0AC1..0AC5;N # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E -0AC7..0AC8;N # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI -0AC9;N # Mc GUJARATI VOWEL SIGN CANDRA O -0ACB..0ACC;N # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU -0ACD;N # Mn GUJARATI SIGN VIRAMA -0AD0;N # Lo GUJARATI OM -0AE0..0AE1;N # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL -0AE2..0AE3;N # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL -0AE6..0AEF;N # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE -0AF0;N # Po GUJARATI ABBREVIATION SIGN -0AF1;N # Sc GUJARATI RUPEE SIGN -0AF9;N # Lo GUJARATI LETTER ZHA -0AFA..0AFF;N # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE -0B01;N # Mn ORIYA SIGN CANDRABINDU -0B02..0B03;N # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA -0B05..0B0C;N # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L -0B0F..0B10;N # Lo [2] ORIYA LETTER E..ORIYA LETTER AI -0B13..0B28;N # Lo [22] ORIYA LETTER O..ORIYA LETTER NA -0B2A..0B30;N # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA -0B32..0B33;N # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA -0B35..0B39;N # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA -0B3C;N # Mn ORIYA SIGN NUKTA -0B3D;N # Lo ORIYA SIGN AVAGRAHA -0B3E;N # Mc ORIYA VOWEL SIGN AA -0B3F;N # Mn ORIYA VOWEL SIGN I -0B40;N # Mc ORIYA VOWEL SIGN II -0B41..0B44;N # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR -0B47..0B48;N # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI -0B4B..0B4C;N # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU -0B4D;N # Mn ORIYA SIGN VIRAMA -0B55..0B56;N # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK -0B57;N # Mc ORIYA AU LENGTH MARK -0B5C..0B5D;N # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA -0B5F..0B61;N # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL -0B62..0B63;N # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL -0B66..0B6F;N # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE -0B70;N # So ORIYA ISSHAR -0B71;N # Lo ORIYA LETTER WA -0B72..0B77;N # No [6] ORIYA FRACTION ONE QUARTER..ORIYA FRACTION THREE SIXTEENTHS -0B82;N # Mn TAMIL SIGN ANUSVARA -0B83;N # Lo TAMIL SIGN VISARGA -0B85..0B8A;N # Lo [6] TAMIL LETTER A..TAMIL LETTER UU -0B8E..0B90;N # Lo [3] TAMIL LETTER E..TAMIL LETTER AI -0B92..0B95;N # Lo [4] TAMIL LETTER O..TAMIL LETTER KA -0B99..0B9A;N # Lo [2] TAMIL LETTER NGA..TAMIL LETTER CA -0B9C;N # Lo TAMIL LETTER JA -0B9E..0B9F;N # Lo [2] TAMIL LETTER NYA..TAMIL LETTER TTA -0BA3..0BA4;N # Lo [2] TAMIL LETTER NNA..TAMIL LETTER TA -0BA8..0BAA;N # Lo [3] TAMIL LETTER NA..TAMIL LETTER PA -0BAE..0BB9;N # Lo [12] TAMIL LETTER MA..TAMIL LETTER HA -0BBE..0BBF;N # Mc [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I -0BC0;N # Mn TAMIL VOWEL SIGN II -0BC1..0BC2;N # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU -0BC6..0BC8;N # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI -0BCA..0BCC;N # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU -0BCD;N # Mn TAMIL SIGN VIRAMA -0BD0;N # Lo TAMIL OM -0BD7;N # Mc TAMIL AU LENGTH MARK -0BE6..0BEF;N # Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE -0BF0..0BF2;N # No [3] TAMIL NUMBER TEN..TAMIL NUMBER ONE THOUSAND -0BF3..0BF8;N # So [6] TAMIL DAY SIGN..TAMIL AS ABOVE SIGN -0BF9;N # Sc TAMIL RUPEE SIGN -0BFA;N # So TAMIL NUMBER SIGN -0C00;N # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE -0C01..0C03;N # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA -0C04;N # Mn TELUGU SIGN COMBINING ANUSVARA ABOVE -0C05..0C0C;N # Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L -0C0E..0C10;N # Lo [3] TELUGU LETTER E..TELUGU LETTER AI -0C12..0C28;N # Lo [23] TELUGU LETTER O..TELUGU LETTER NA -0C2A..0C39;N # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA -0C3C;N # Mn TELUGU SIGN NUKTA -0C3D;N # Lo TELUGU SIGN AVAGRAHA -0C3E..0C40;N # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II -0C41..0C44;N # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR -0C46..0C48;N # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI -0C4A..0C4D;N # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA -0C55..0C56;N # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK -0C58..0C5A;N # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D;N # Lo TELUGU LETTER NAKAARA POLLU -0C60..0C61;N # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL -0C62..0C63;N # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL -0C66..0C6F;N # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE -0C77;N # Po TELUGU SIGN SIDDHAM -0C78..0C7E;N # No [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR -0C7F;N # So TELUGU SIGN TUUMU -0C80;N # Lo KANNADA SIGN SPACING CANDRABINDU -0C81;N # Mn KANNADA SIGN CANDRABINDU -0C82..0C83;N # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA -0C84;N # Po KANNADA SIGN SIDDHAM -0C85..0C8C;N # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L -0C8E..0C90;N # Lo [3] KANNADA LETTER E..KANNADA LETTER AI -0C92..0CA8;N # Lo [23] KANNADA LETTER O..KANNADA LETTER NA -0CAA..0CB3;N # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA -0CB5..0CB9;N # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA -0CBC;N # Mn KANNADA SIGN NUKTA -0CBD;N # Lo KANNADA SIGN AVAGRAHA -0CBE;N # Mc KANNADA VOWEL SIGN AA -0CBF;N # Mn KANNADA VOWEL SIGN I -0CC0..0CC4;N # Mc [5] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN VOCALIC RR -0CC6;N # Mn KANNADA VOWEL SIGN E -0CC7..0CC8;N # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI -0CCA..0CCB;N # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO -0CCC..0CCD;N # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA -0CD5..0CD6;N # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE;N # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA -0CE0..0CE1;N # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL -0CE2..0CE3;N # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL -0CE6..0CEF;N # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE -0CF1..0CF2;N # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA -0CF3;N # Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT -0D00..0D01;N # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU -0D02..0D03;N # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA -0D04..0D0C;N # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L -0D0E..0D10;N # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI -0D12..0D3A;N # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA -0D3B..0D3C;N # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA -0D3D;N # Lo MALAYALAM SIGN AVAGRAHA -0D3E..0D40;N # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II -0D41..0D44;N # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR -0D46..0D48;N # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI -0D4A..0D4C;N # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU -0D4D;N # Mn MALAYALAM SIGN VIRAMA -0D4E;N # Lo MALAYALAM LETTER DOT REPH -0D4F;N # So MALAYALAM SIGN PARA -0D54..0D56;N # Lo [3] MALAYALAM LETTER CHILLU M..MALAYALAM LETTER CHILLU LLL -0D57;N # Mc MALAYALAM AU LENGTH MARK -0D58..0D5E;N # No [7] MALAYALAM FRACTION ONE ONE-HUNDRED-AND-SIXTIETH..MALAYALAM FRACTION ONE FIFTH -0D5F..0D61;N # Lo [3] MALAYALAM LETTER ARCHAIC II..MALAYALAM LETTER VOCALIC LL -0D62..0D63;N # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL -0D66..0D6F;N # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE -0D70..0D78;N # No [9] MALAYALAM NUMBER TEN..MALAYALAM FRACTION THREE SIXTEENTHS -0D79;N # So MALAYALAM DATE MARK -0D7A..0D7F;N # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K -0D81;N # Mn SINHALA SIGN CANDRABINDU -0D82..0D83;N # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA -0D85..0D96;N # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA -0D9A..0DB1;N # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA -0DB3..0DBB;N # Lo [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA -0DBD;N # Lo SINHALA LETTER DANTAJA LAYANNA -0DC0..0DC6;N # Lo [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA -0DCA;N # Mn SINHALA SIGN AL-LAKUNA -0DCF..0DD1;N # Mc [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA -0DD2..0DD4;N # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA -0DD6;N # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA -0DD8..0DDF;N # Mc [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA -0DE6..0DEF;N # Nd [10] SINHALA LITH DIGIT ZERO..SINHALA LITH DIGIT NINE -0DF2..0DF3;N # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA -0DF4;N # Po SINHALA PUNCTUATION KUNDDALIYA -0E01..0E30;N # Lo [48] THAI CHARACTER KO KAI..THAI CHARACTER SARA A -0E31;N # Mn THAI CHARACTER MAI HAN-AKAT -0E32..0E33;N # Lo [2] THAI CHARACTER SARA AA..THAI CHARACTER SARA AM -0E34..0E3A;N # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU -0E3F;N # Sc THAI CURRENCY SYMBOL BAHT -0E40..0E45;N # Lo [6] THAI CHARACTER SARA E..THAI CHARACTER LAKKHANGYAO -0E46;N # Lm THAI CHARACTER MAIYAMOK -0E47..0E4E;N # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN -0E4F;N # Po THAI CHARACTER FONGMAN -0E50..0E59;N # Nd [10] THAI DIGIT ZERO..THAI DIGIT NINE -0E5A..0E5B;N # Po [2] THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT -0E81..0E82;N # Lo [2] LAO LETTER KO..LAO LETTER KHO SUNG -0E84;N # Lo LAO LETTER KHO TAM -0E86..0E8A;N # Lo [5] LAO LETTER PALI GHA..LAO LETTER SO TAM -0E8C..0EA3;N # Lo [24] LAO LETTER PALI JHA..LAO LETTER LO LING -0EA5;N # Lo LAO LETTER LO LOOT -0EA7..0EB0;N # Lo [10] LAO LETTER WO..LAO VOWEL SIGN A -0EB1;N # Mn LAO VOWEL SIGN MAI KAN -0EB2..0EB3;N # Lo [2] LAO VOWEL SIGN AA..LAO VOWEL SIGN AM -0EB4..0EBC;N # Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO -0EBD;N # Lo LAO SEMIVOWEL SIGN NYO -0EC0..0EC4;N # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI -0EC6;N # Lm LAO KO LA -0EC8..0ECE;N # Mn [7] LAO TONE MAI EK..LAO YAMAKKAN -0ED0..0ED9;N # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE -0EDC..0EDF;N # Lo [4] LAO HO NO..LAO LETTER KHMU NYO -0F00;N # Lo TIBETAN SYLLABLE OM -0F01..0F03;N # So [3] TIBETAN MARK GTER YIG MGO TRUNCATED A..TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA -0F04..0F12;N # Po [15] TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK RGYA GRAM SHAD -0F13;N # So TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN -0F14;N # Po TIBETAN MARK GTER TSHEG -0F15..0F17;N # So [3] TIBETAN LOGOTYPE SIGN CHAD RTAGS..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS -0F18..0F19;N # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS -0F1A..0F1F;N # So [6] TIBETAN SIGN RDEL DKAR GCIG..TIBETAN SIGN RDEL DKAR RDEL NAG -0F20..0F29;N # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE -0F2A..0F33;N # No [10] TIBETAN DIGIT HALF ONE..TIBETAN DIGIT HALF ZERO -0F34;N # So TIBETAN MARK BSDUS RTAGS -0F35;N # Mn TIBETAN MARK NGAS BZUNG NYI ZLA -0F36;N # So TIBETAN MARK CARET -DZUD RTAGS BZHI MIG CAN -0F37;N # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS -0F38;N # So TIBETAN MARK CHE MGO -0F39;N # Mn TIBETAN MARK TSA -PHRU -0F3A;N # Ps TIBETAN MARK GUG RTAGS GYON -0F3B;N # Pe TIBETAN MARK GUG RTAGS GYAS -0F3C;N # Ps TIBETAN MARK ANG KHANG GYON -0F3D;N # Pe TIBETAN MARK ANG KHANG GYAS -0F3E..0F3F;N # Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES -0F40..0F47;N # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA -0F49..0F6C;N # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA -0F71..0F7E;N # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO -0F7F;N # Mc TIBETAN SIGN RNAM BCAD -0F80..0F84;N # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA -0F85;N # Po TIBETAN MARK PALUTA -0F86..0F87;N # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS -0F88..0F8C;N # Lo [5] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN INVERTED MCHU CAN -0F8D..0F97;N # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA -0F99..0FBC;N # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA -0FBE..0FC5;N # So [8] TIBETAN KU RU KHA..TIBETAN SYMBOL RDO RJE -0FC6;N # Mn TIBETAN SYMBOL PADMA GDAN -0FC7..0FCC;N # So [6] TIBETAN SYMBOL RDO RJE RGYA GRAM..TIBETAN SYMBOL NOR BU BZHI -KHYIL -0FCE..0FCF;N # So [2] TIBETAN SIGN RDEL NAG RDEL DKAR..TIBETAN SIGN RDEL NAG GSUM -0FD0..0FD4;N # Po [5] TIBETAN MARK BSKA- SHOG GI MGO RGYAN..TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA -0FD5..0FD8;N # So [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS -0FD9..0FDA;N # Po [2] TIBETAN MARK LEADING MCHAN RTAGS..TIBETAN MARK TRAILING MCHAN RTAGS -1000..102A;N # Lo [43] MYANMAR LETTER KA..MYANMAR LETTER AU -102B..102C;N # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA -102D..1030;N # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU -1031;N # Mc MYANMAR VOWEL SIGN E -1032..1037;N # Mn [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW -1038;N # Mc MYANMAR SIGN VISARGA -1039..103A;N # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT -103B..103C;N # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA -103D..103E;N # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA -103F;N # Lo MYANMAR LETTER GREAT SA -1040..1049;N # Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE -104A..104F;N # Po [6] MYANMAR SIGN LITTLE SECTION..MYANMAR SYMBOL GENITIVE -1050..1055;N # Lo [6] MYANMAR LETTER SHA..MYANMAR LETTER VOCALIC LL -1056..1057;N # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR -1058..1059;N # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL -105A..105D;N # Lo [4] MYANMAR LETTER MON NGA..MYANMAR LETTER MON BBE -105E..1060;N # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA -1061;N # Lo MYANMAR LETTER SGAW KAREN SHA -1062..1064;N # Mc [3] MYANMAR VOWEL SIGN SGAW KAREN EU..MYANMAR TONE MARK SGAW KAREN KE PHO -1065..1066;N # Lo [2] MYANMAR LETTER WESTERN PWO KAREN THA..MYANMAR LETTER WESTERN PWO KAREN PWA -1067..106D;N # Mc [7] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR SIGN WESTERN PWO KAREN TONE-5 -106E..1070;N # Lo [3] MYANMAR LETTER EASTERN PWO KAREN NNA..MYANMAR LETTER EASTERN PWO KAREN GHWA -1071..1074;N # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE -1075..1081;N # Lo [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA -1082;N # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA -1083..1084;N # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E -1085..1086;N # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y -1087..108C;N # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3 -108D;N # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE -108E;N # Lo MYANMAR LETTER RUMAI PALAUNG FA -108F;N # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5 -1090..1099;N # Nd [10] MYANMAR SHAN DIGIT ZERO..MYANMAR SHAN DIGIT NINE -109A..109C;N # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A -109D;N # Mn MYANMAR VOWEL SIGN AITON AI -109E..109F;N # So [2] MYANMAR SYMBOL SHAN ONE..MYANMAR SYMBOL SHAN EXCLAMATION -10A0..10C5;N # Lu [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE -10C7;N # Lu GEORGIAN CAPITAL LETTER YN -10CD;N # Lu GEORGIAN CAPITAL LETTER AEN -10D0..10FA;N # Ll [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN -10FB;N # Po GEORGIAN PARAGRAPH SEPARATOR -10FC;N # Lm MODIFIER LETTER GEORGIAN NAR -10FD..10FF;N # Ll [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN -1100..115F;W # Lo [96] HANGUL CHOSEONG KIYEOK..HANGUL CHOSEONG FILLER -1160..11FF;N # Lo [160] HANGUL JUNGSEONG FILLER..HANGUL JONGSEONG SSANGNIEUN -1200..1248;N # Lo [73] ETHIOPIC SYLLABLE HA..ETHIOPIC SYLLABLE QWA -124A..124D;N # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE -1250..1256;N # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO -1258;N # Lo ETHIOPIC SYLLABLE QHWA -125A..125D;N # Lo [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE -1260..1288;N # Lo [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA -128A..128D;N # Lo [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE -1290..12B0;N # Lo [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA -12B2..12B5;N # Lo [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE -12B8..12BE;N # Lo [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO -12C0;N # Lo ETHIOPIC SYLLABLE KXWA -12C2..12C5;N # Lo [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE -12C8..12D6;N # Lo [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O -12D8..1310;N # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA -1312..1315;N # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE -1318..135A;N # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA -135D..135F;N # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK -1360..1368;N # Po [9] ETHIOPIC SECTION MARK..ETHIOPIC PARAGRAPH SEPARATOR -1369..137C;N # No [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND -1380..138F;N # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE -1390..1399;N # So [10] ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MARK KURT -13A0..13F5;N # Lu [86] CHEROKEE LETTER A..CHEROKEE LETTER MV -13F8..13FD;N # Ll [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV -1400;N # Pd CANADIAN SYLLABICS HYPHEN -1401..166C;N # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA -166D;N # So CANADIAN SYLLABICS CHI SIGN -166E;N # Po CANADIAN SYLLABICS FULL STOP -166F..167F;N # Lo [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W -1680;N # Zs OGHAM SPACE MARK -1681..169A;N # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH -169B;N # Ps OGHAM FEATHER MARK -169C;N # Pe OGHAM REVERSED FEATHER MARK -16A0..16EA;N # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X -16EB..16ED;N # Po [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION -16EE..16F0;N # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL -16F1..16F8;N # Lo [8] RUNIC LETTER K..RUNIC LETTER FRANKS CASKET AESC -1700..1711;N # Lo [18] TAGALOG LETTER A..TAGALOG LETTER HA -1712..1714;N # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA -1715;N # Mc TAGALOG SIGN PAMUDPOD -171F;N # Lo TAGALOG LETTER ARCHAIC RA -1720..1731;N # Lo [18] HANUNOO LETTER A..HANUNOO LETTER HA -1732..1733;N # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U -1734;N # Mc HANUNOO SIGN PAMUDPOD -1735..1736;N # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION -1740..1751;N # Lo [18] BUHID LETTER A..BUHID LETTER HA -1752..1753;N # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U -1760..176C;N # Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA -176E..1770;N # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA -1772..1773;N # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U -1780..17B3;N # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU -17B4..17B5;N # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA -17B6;N # Mc KHMER VOWEL SIGN AA -17B7..17BD;N # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA -17BE..17C5;N # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU -17C6;N # Mn KHMER SIGN NIKAHIT -17C7..17C8;N # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU -17C9..17D3;N # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT -17D4..17D6;N # Po [3] KHMER SIGN KHAN..KHMER SIGN CAMNUC PII KUUH -17D7;N # Lm KHMER SIGN LEK TOO -17D8..17DA;N # Po [3] KHMER SIGN BEYYAL..KHMER SIGN KOOMUUT -17DB;N # Sc KHMER CURRENCY SYMBOL RIEL -17DC;N # Lo KHMER SIGN AVAKRAHASANYA -17DD;N # Mn KHMER SIGN ATTHACAN -17E0..17E9;N # Nd [10] KHMER DIGIT ZERO..KHMER DIGIT NINE -17F0..17F9;N # No [10] KHMER SYMBOL LEK ATTAK SON..KHMER SYMBOL LEK ATTAK PRAM-BUON -1800..1805;N # Po [6] MONGOLIAN BIRGA..MONGOLIAN FOUR DOTS -1806;N # Pd MONGOLIAN TODO SOFT HYPHEN -1807..180A;N # Po [4] MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER..MONGOLIAN NIRUGU -180B..180D;N # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE -180E;N # Cf MONGOLIAN VOWEL SEPARATOR -180F;N # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR -1810..1819;N # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE -1820..1842;N # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI -1843;N # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN -1844..1878;N # Lo [53] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER CHA WITH TWO DOTS -1880..1884;N # Lo [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA -1885..1886;N # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA -1887..18A8;N # Lo [34] MONGOLIAN LETTER ALI GALI A..MONGOLIAN LETTER MANCHU ALI GALI BHA -18A9;N # Mn MONGOLIAN LETTER ALI GALI DAGALGA -18AA;N # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA -18B0..18F5;N # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S -1900..191E;N # Lo [31] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER TRA -1920..1922;N # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U -1923..1926;N # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU -1927..1928;N # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O -1929..192B;N # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA -1930..1931;N # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA -1932;N # Mn LIMBU SMALL LETTER ANUSVARA -1933..1938;N # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA -1939..193B;N # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I -1940;N # So LIMBU SIGN LOO -1944..1945;N # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK -1946..194F;N # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE -1950..196D;N # Lo [30] TAI LE LETTER KA..TAI LE LETTER AI -1970..1974;N # Lo [5] TAI LE LETTER TONE-2..TAI LE LETTER TONE-6 -1980..19AB;N # Lo [44] NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW SUA -19B0..19C9;N # Lo [26] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE TONE MARK-2 -19D0..19D9;N # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE -19DA;N # No NEW TAI LUE THAM DIGIT ONE -19DE..19DF;N # So [2] NEW TAI LUE SIGN LAE..NEW TAI LUE SIGN LAEV -19E0..19FF;N # So [32] KHMER SYMBOL PATHAMASAT..KHMER SYMBOL DAP-PRAM ROC -1A00..1A16;N # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA -1A17..1A18;N # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U -1A19..1A1A;N # Mc [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O -1A1B;N # Mn BUGINESE VOWEL SIGN AE -1A1E..1A1F;N # Po [2] BUGINESE PALLAWA..BUGINESE END OF SECTION -1A20..1A54;N # Lo [53] TAI THAM LETTER HIGH KA..TAI THAM LETTER GREAT SA -1A55;N # Mc TAI THAM CONSONANT SIGN MEDIAL RA -1A56;N # Mn TAI THAM CONSONANT SIGN MEDIAL LA -1A57;N # Mc TAI THAM CONSONANT SIGN LA TANG LAI -1A58..1A5E;N # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA -1A60;N # Mn TAI THAM SIGN SAKOT -1A61;N # Mc TAI THAM VOWEL SIGN A -1A62;N # Mn TAI THAM VOWEL SIGN MAI SAT -1A63..1A64;N # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA -1A65..1A6C;N # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW -1A6D..1A72;N # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI -1A73..1A7C;N # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN -1A7F;N # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT -1A80..1A89;N # Nd [10] TAI THAM HORA DIGIT ZERO..TAI THAM HORA DIGIT NINE -1A90..1A99;N # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE -1AA0..1AA6;N # Po [7] TAI THAM SIGN WIANG..TAI THAM SIGN REVERSED ROTATED RANA -1AA7;N # Lm TAI THAM SIGN MAI YAMOK -1AA8..1AAD;N # Po [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG -1AB0..1ABD;N # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW -1ABE;N # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACE;N # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T -1B00..1B03;N # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG -1B04;N # Mc BALINESE SIGN BISAH -1B05..1B33;N # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA -1B34;N # Mn BALINESE SIGN REREKAN -1B35;N # Mc BALINESE VOWEL SIGN TEDUNG -1B36..1B3A;N # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA -1B3B;N # Mc BALINESE VOWEL SIGN RA REPA TEDUNG -1B3C;N # Mn BALINESE VOWEL SIGN LA LENGA -1B3D..1B41;N # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG -1B42;N # Mn BALINESE VOWEL SIGN PEPET -1B43..1B44;N # Mc [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG -1B45..1B4C;N # Lo [8] BALINESE LETTER KAF SASAK..BALINESE LETTER ARCHAIC JNYA -1B50..1B59;N # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE -1B5A..1B60;N # Po [7] BALINESE PANTI..BALINESE PAMENENG -1B61..1B6A;N # So [10] BALINESE MUSICAL SYMBOL DONG..BALINESE MUSICAL SYMBOL DANG GEDE -1B6B..1B73;N # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG -1B74..1B7C;N # So [9] BALINESE MUSICAL SYMBOL RIGHT-HAND OPEN DUG..BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING -1B7D..1B7E;N # Po [2] BALINESE PANTI LANTANG..BALINESE PAMADA LANTANG -1B80..1B81;N # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR -1B82;N # Mc SUNDANESE SIGN PANGWISAD -1B83..1BA0;N # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA -1BA1;N # Mc SUNDANESE CONSONANT SIGN PAMINGKAL -1BA2..1BA5;N # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU -1BA6..1BA7;N # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG -1BA8..1BA9;N # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG -1BAA;N # Mc SUNDANESE SIGN PAMAAEH -1BAB..1BAD;N # Mn [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA -1BAE..1BAF;N # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA -1BB0..1BB9;N # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE -1BBA..1BBF;N # Lo [6] SUNDANESE AVAGRAHA..SUNDANESE LETTER FINAL M -1BC0..1BE5;N # Lo [38] BATAK LETTER A..BATAK LETTER U -1BE6;N # Mn BATAK SIGN TOMPI -1BE7;N # Mc BATAK VOWEL SIGN E -1BE8..1BE9;N # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE -1BEA..1BEC;N # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O -1BED;N # Mn BATAK VOWEL SIGN KARO O -1BEE;N # Mc BATAK VOWEL SIGN U -1BEF..1BF1;N # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H -1BF2..1BF3;N # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN -1BFC..1BFF;N # Po [4] BATAK SYMBOL BINDU NA METEK..BATAK SYMBOL BINDU PANGOLAT -1C00..1C23;N # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A -1C24..1C2B;N # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU -1C2C..1C33;N # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T -1C34..1C35;N # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG -1C36..1C37;N # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA -1C3B..1C3F;N # Po [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK -1C40..1C49;N # Nd [10] LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE -1C4D..1C4F;N # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA -1C50..1C59;N # Nd [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE -1C5A..1C77;N # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH -1C78..1C7D;N # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD -1C7E..1C7F;N # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD -1C80..1C88;N # Ll [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK -1C90..1CBA;N # Lu [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN -1CBD..1CBF;N # Lu [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN -1CC0..1CC7;N # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA -1CD0..1CD2;N # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA -1CD3;N # Po VEDIC SIGN NIHSHVASA -1CD4..1CE0;N # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA -1CE1;N # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA -1CE2..1CE8;N # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL -1CE9..1CEC;N # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL -1CED;N # Mn VEDIC SIGN TIRYAK -1CEE..1CF3;N # Lo [6] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ROTATED ARDHAVISARGA -1CF4;N # Mn VEDIC TONE CANDRA ABOVE -1CF5..1CF6;N # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA -1CF7;N # Mc VEDIC SIGN ATIKRAMA -1CF8..1CF9;N # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE -1CFA;N # Lo VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA -1D00..1D2B;N # Ll [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D2C..1D6A;N # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI -1D6B..1D77;N # Ll [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G -1D78;N # Lm MODIFIER LETTER CYRILLIC EN -1D79..1D7F;N # Ll [7] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER UPSILON WITH STROKE -1D80..1D9A;N # Ll [27] LATIN SMALL LETTER B WITH PALATAL HOOK..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK -1D9B..1DBF;N # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA -1DC0..1DFF;N # Mn [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW -1E00..1EFF;N # L& [256] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER Y WITH LOOP -1F00..1F15;N # L& [22] GREEK SMALL LETTER ALPHA WITH PSILI..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA -1F18..1F1D;N # Lu [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA -1F20..1F45;N # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA -1F48..1F4D;N # Lu [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA -1F50..1F57;N # Ll [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI -1F59;N # Lu GREEK CAPITAL LETTER UPSILON WITH DASIA -1F5B;N # Lu GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA -1F5D;N # Lu GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA -1F5F..1F7D;N # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA -1F80..1FB4;N # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI -1FB6..1FBC;N # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI -1FBD;N # Sk GREEK KORONIS -1FBE;N # Ll GREEK PROSGEGRAMMENI -1FBF..1FC1;N # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI -1FC2..1FC4;N # Ll [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI -1FC6..1FCC;N # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI -1FCD..1FCF;N # Sk [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI -1FD0..1FD3;N # Ll [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA -1FD6..1FDB;N # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA -1FDD..1FDF;N # Sk [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI -1FE0..1FEC;N # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA -1FED..1FEF;N # Sk [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA -1FF2..1FF4;N # Ll [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI -1FF6..1FFC;N # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI -1FFD..1FFE;N # Sk [2] GREEK OXIA..GREEK DASIA -2000..200A;N # Zs [11] EN QUAD..HAIR SPACE -200B..200F;N # Cf [5] ZERO WIDTH SPACE..RIGHT-TO-LEFT MARK -2010;A # Pd HYPHEN -2011..2012;N # Pd [2] NON-BREAKING HYPHEN..FIGURE DASH -2013..2015;A # Pd [3] EN DASH..HORIZONTAL BAR -2016;A # Po DOUBLE VERTICAL LINE -2017;N # Po DOUBLE LOW LINE -2018;A # Pi LEFT SINGLE QUOTATION MARK -2019;A # Pf RIGHT SINGLE QUOTATION MARK -201A;N # Ps SINGLE LOW-9 QUOTATION MARK -201B;N # Pi SINGLE HIGH-REVERSED-9 QUOTATION MARK -201C;A # Pi LEFT DOUBLE QUOTATION MARK -201D;A # Pf RIGHT DOUBLE QUOTATION MARK -201E;N # Ps DOUBLE LOW-9 QUOTATION MARK -201F;N # Pi DOUBLE HIGH-REVERSED-9 QUOTATION MARK -2020..2022;A # Po [3] DAGGER..BULLET -2023;N # Po TRIANGULAR BULLET -2024..2027;A # Po [4] ONE DOT LEADER..HYPHENATION POINT -2028;N # Zl LINE SEPARATOR -2029;N # Zp PARAGRAPH SEPARATOR -202A..202E;N # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE -202F;N # Zs NARROW NO-BREAK SPACE -2030;A # Po PER MILLE SIGN -2031;N # Po PER TEN THOUSAND SIGN -2032..2033;A # Po [2] PRIME..DOUBLE PRIME -2034;N # Po TRIPLE PRIME -2035;A # Po REVERSED PRIME -2036..2038;N # Po [3] REVERSED DOUBLE PRIME..CARET -2039;N # Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK -203A;N # Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK -203B;A # Po REFERENCE MARK -203C..203D;N # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG -203E;A # Po OVERLINE -203F..2040;N # Pc [2] UNDERTIE..CHARACTER TIE -2041..2043;N # Po [3] CARET INSERTION POINT..HYPHEN BULLET -2044;N # Sm FRACTION SLASH -2045;N # Ps LEFT SQUARE BRACKET WITH QUILL -2046;N # Pe RIGHT SQUARE BRACKET WITH QUILL -2047..2051;N # Po [11] DOUBLE QUESTION MARK..TWO ASTERISKS ALIGNED VERTICALLY -2052;N # Sm COMMERCIAL MINUS SIGN -2053;N # Po SWUNG DASH -2054;N # Pc INVERTED UNDERTIE -2055..205E;N # Po [10] FLOWER PUNCTUATION MARK..VERTICAL FOUR DOTS -205F;N # Zs MEDIUM MATHEMATICAL SPACE -2060..2064;N # Cf [5] WORD JOINER..INVISIBLE PLUS -2066..206F;N # Cf [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIGIT SHAPES -2070;N # No SUPERSCRIPT ZERO -2071;N # Lm SUPERSCRIPT LATIN SMALL LETTER I -2074;A # No SUPERSCRIPT FOUR -2075..2079;N # No [5] SUPERSCRIPT FIVE..SUPERSCRIPT NINE -207A..207C;N # Sm [3] SUPERSCRIPT PLUS SIGN..SUPERSCRIPT EQUALS SIGN -207D;N # Ps SUPERSCRIPT LEFT PARENTHESIS -207E;N # Pe SUPERSCRIPT RIGHT PARENTHESIS -207F;A # Lm SUPERSCRIPT LATIN SMALL LETTER N -2080;N # No SUBSCRIPT ZERO -2081..2084;A # No [4] SUBSCRIPT ONE..SUBSCRIPT FOUR -2085..2089;N # No [5] SUBSCRIPT FIVE..SUBSCRIPT NINE -208A..208C;N # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN -208D;N # Ps SUBSCRIPT LEFT PARENTHESIS -208E;N # Pe SUBSCRIPT RIGHT PARENTHESIS -2090..209C;N # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T -20A0..20A8;N # Sc [9] EURO-CURRENCY SIGN..RUPEE SIGN -20A9;H # Sc WON SIGN -20AA..20AB;N # Sc [2] NEW SHEQEL SIGN..DONG SIGN -20AC;A # Sc EURO SIGN -20AD..20C0;N # Sc [20] KIP SIGN..SOM SIGN -20D0..20DC;N # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE -20DD..20E0;N # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH -20E1;N # Mn COMBINING LEFT RIGHT ARROW ABOVE -20E2..20E4;N # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE -20E5..20F0;N # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE -2100..2101;N # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT -2102;N # Lu DOUBLE-STRUCK CAPITAL C -2103;A # So DEGREE CELSIUS -2104;N # So CENTRE LINE SYMBOL -2105;A # So CARE OF -2106;N # So CADA UNA -2107;N # Lu EULER CONSTANT -2108;N # So SCRUPLE -2109;A # So DEGREE FAHRENHEIT -210A..2112;N # L& [9] SCRIPT SMALL G..SCRIPT CAPITAL L -2113;A # Ll SCRIPT SMALL L -2114;N # So L B BAR SYMBOL -2115;N # Lu DOUBLE-STRUCK CAPITAL N -2116;A # So NUMERO SIGN -2117;N # So SOUND RECORDING COPYRIGHT -2118;N # Sm SCRIPT CAPITAL P -2119..211D;N # Lu [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R -211E..2120;N # So [3] PRESCRIPTION TAKE..SERVICE MARK -2121..2122;A # So [2] TELEPHONE SIGN..TRADE MARK SIGN -2123;N # So VERSICLE -2124;N # Lu DOUBLE-STRUCK CAPITAL Z -2125;N # So OUNCE SIGN -2126;A # Lu OHM SIGN -2127;N # So INVERTED OHM SIGN -2128;N # Lu BLACK-LETTER CAPITAL Z -2129;N # So TURNED GREEK SMALL LETTER IOTA -212A;N # Lu KELVIN SIGN -212B;A # Lu ANGSTROM SIGN -212C..212D;N # Lu [2] SCRIPT CAPITAL B..BLACK-LETTER CAPITAL C -212E;N # So ESTIMATED SYMBOL -212F..2134;N # L& [6] SCRIPT SMALL E..SCRIPT SMALL O -2135..2138;N # Lo [4] ALEF SYMBOL..DALET SYMBOL -2139;N # Ll INFORMATION SOURCE -213A..213B;N # So [2] ROTATED CAPITAL Q..FACSIMILE SIGN -213C..213F;N # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI -2140..2144;N # Sm [5] DOUBLE-STRUCK N-ARY SUMMATION..TURNED SANS-SERIF CAPITAL Y -2145..2149;N # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J -214A;N # So PROPERTY LINE -214B;N # Sm TURNED AMPERSAND -214C..214D;N # So [2] PER SIGN..AKTIESELSKAB -214E;N # Ll TURNED SMALL F -214F;N # So SYMBOL FOR SAMARITAN SOURCE -2150..2152;N # No [3] VULGAR FRACTION ONE SEVENTH..VULGAR FRACTION ONE TENTH -2153..2154;A # No [2] VULGAR FRACTION ONE THIRD..VULGAR FRACTION TWO THIRDS -2155..215A;N # No [6] VULGAR FRACTION ONE FIFTH..VULGAR FRACTION FIVE SIXTHS -215B..215E;A # No [4] VULGAR FRACTION ONE EIGHTH..VULGAR FRACTION SEVEN EIGHTHS -215F;N # No FRACTION NUMERATOR ONE -2160..216B;A # Nl [12] ROMAN NUMERAL ONE..ROMAN NUMERAL TWELVE -216C..216F;N # Nl [4] ROMAN NUMERAL FIFTY..ROMAN NUMERAL ONE THOUSAND -2170..2179;A # Nl [10] SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL TEN -217A..2182;N # Nl [9] SMALL ROMAN NUMERAL ELEVEN..ROMAN NUMERAL TEN THOUSAND -2183..2184;N # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C -2185..2188;N # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND -2189;A # No VULGAR FRACTION ZERO THIRDS -218A..218B;N # So [2] TURNED DIGIT TWO..TURNED DIGIT THREE -2190..2194;A # Sm [5] LEFTWARDS ARROW..LEFT RIGHT ARROW -2195..2199;A # So [5] UP DOWN ARROW..SOUTH WEST ARROW -219A..219B;N # Sm [2] LEFTWARDS ARROW WITH STROKE..RIGHTWARDS ARROW WITH STROKE -219C..219F;N # So [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW -21A0;N # Sm RIGHTWARDS TWO HEADED ARROW -21A1..21A2;N # So [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL -21A3;N # Sm RIGHTWARDS ARROW WITH TAIL -21A4..21A5;N # So [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR -21A6;N # Sm RIGHTWARDS ARROW FROM BAR -21A7..21AD;N # So [7] DOWNWARDS ARROW FROM BAR..LEFT RIGHT WAVE ARROW -21AE;N # Sm LEFT RIGHT ARROW WITH STROKE -21AF..21B7;N # So [9] DOWNWARDS ZIGZAG ARROW..CLOCKWISE TOP SEMICIRCLE ARROW -21B8..21B9;A # So [2] NORTH WEST ARROW TO LONG BAR..LEFTWARDS ARROW TO BAR OVER RIGHTWARDS ARROW TO BAR -21BA..21CD;N # So [20] ANTICLOCKWISE OPEN CIRCLE ARROW..LEFTWARDS DOUBLE ARROW WITH STROKE -21CE..21CF;N # Sm [2] LEFT RIGHT DOUBLE ARROW WITH STROKE..RIGHTWARDS DOUBLE ARROW WITH STROKE -21D0..21D1;N # So [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW -21D2;A # Sm RIGHTWARDS DOUBLE ARROW -21D3;N # So DOWNWARDS DOUBLE ARROW -21D4;A # Sm LEFT RIGHT DOUBLE ARROW -21D5..21E6;N # So [18] UP DOWN DOUBLE ARROW..LEFTWARDS WHITE ARROW -21E7;A # So UPWARDS WHITE ARROW -21E8..21F3;N # So [12] RIGHTWARDS WHITE ARROW..UP DOWN WHITE ARROW -21F4..21FF;N # Sm [12] RIGHT ARROW WITH SMALL CIRCLE..LEFT RIGHT OPEN-HEADED ARROW -2200;A # Sm FOR ALL -2201;N # Sm COMPLEMENT -2202..2203;A # Sm [2] PARTIAL DIFFERENTIAL..THERE EXISTS -2204..2206;N # Sm [3] THERE DOES NOT EXIST..INCREMENT -2207..2208;A # Sm [2] NABLA..ELEMENT OF -2209..220A;N # Sm [2] NOT AN ELEMENT OF..SMALL ELEMENT OF -220B;A # Sm CONTAINS AS MEMBER -220C..220E;N # Sm [3] DOES NOT CONTAIN AS MEMBER..END OF PROOF -220F;A # Sm N-ARY PRODUCT -2210;N # Sm N-ARY COPRODUCT -2211;A # Sm N-ARY SUMMATION -2212..2214;N # Sm [3] MINUS SIGN..DOT PLUS -2215;A # Sm DIVISION SLASH -2216..2219;N # Sm [4] SET MINUS..BULLET OPERATOR -221A;A # Sm SQUARE ROOT -221B..221C;N # Sm [2] CUBE ROOT..FOURTH ROOT -221D..2220;A # Sm [4] PROPORTIONAL TO..ANGLE -2221..2222;N # Sm [2] MEASURED ANGLE..SPHERICAL ANGLE -2223;A # Sm DIVIDES -2224;N # Sm DOES NOT DIVIDE -2225;A # Sm PARALLEL TO -2226;N # Sm NOT PARALLEL TO -2227..222C;A # Sm [6] LOGICAL AND..DOUBLE INTEGRAL -222D;N # Sm TRIPLE INTEGRAL -222E;A # Sm CONTOUR INTEGRAL -222F..2233;N # Sm [5] SURFACE INTEGRAL..ANTICLOCKWISE CONTOUR INTEGRAL -2234..2237;A # Sm [4] THEREFORE..PROPORTION -2238..223B;N # Sm [4] DOT MINUS..HOMOTHETIC -223C..223D;A # Sm [2] TILDE OPERATOR..REVERSED TILDE -223E..2247;N # Sm [10] INVERTED LAZY S..NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO -2248;A # Sm ALMOST EQUAL TO -2249..224B;N # Sm [3] NOT ALMOST EQUAL TO..TRIPLE TILDE -224C;A # Sm ALL EQUAL TO -224D..2251;N # Sm [5] EQUIVALENT TO..GEOMETRICALLY EQUAL TO -2252;A # Sm APPROXIMATELY EQUAL TO OR THE IMAGE OF -2253..225F;N # Sm [13] IMAGE OF OR APPROXIMATELY EQUAL TO..QUESTIONED EQUAL TO -2260..2261;A # Sm [2] NOT EQUAL TO..IDENTICAL TO -2262..2263;N # Sm [2] NOT IDENTICAL TO..STRICTLY EQUIVALENT TO -2264..2267;A # Sm [4] LESS-THAN OR EQUAL TO..GREATER-THAN OVER EQUAL TO -2268..2269;N # Sm [2] LESS-THAN BUT NOT EQUAL TO..GREATER-THAN BUT NOT EQUAL TO -226A..226B;A # Sm [2] MUCH LESS-THAN..MUCH GREATER-THAN -226C..226D;N # Sm [2] BETWEEN..NOT EQUIVALENT TO -226E..226F;A # Sm [2] NOT LESS-THAN..NOT GREATER-THAN -2270..2281;N # Sm [18] NEITHER LESS-THAN NOR EQUAL TO..DOES NOT SUCCEED -2282..2283;A # Sm [2] SUBSET OF..SUPERSET OF -2284..2285;N # Sm [2] NOT A SUBSET OF..NOT A SUPERSET OF -2286..2287;A # Sm [2] SUBSET OF OR EQUAL TO..SUPERSET OF OR EQUAL TO -2288..2294;N # Sm [13] NEITHER A SUBSET OF NOR EQUAL TO..SQUARE CUP -2295;A # Sm CIRCLED PLUS -2296..2298;N # Sm [3] CIRCLED MINUS..CIRCLED DIVISION SLASH -2299;A # Sm CIRCLED DOT OPERATOR -229A..22A4;N # Sm [11] CIRCLED RING OPERATOR..DOWN TACK -22A5;A # Sm UP TACK -22A6..22BE;N # Sm [25] ASSERTION..RIGHT ANGLE WITH ARC -22BF;A # Sm RIGHT TRIANGLE -22C0..22FF;N # Sm [64] N-ARY LOGICAL AND..Z NOTATION BAG MEMBERSHIP -2300..2307;N # So [8] DIAMETER SIGN..WAVY LINE -2308;N # Ps LEFT CEILING -2309;N # Pe RIGHT CEILING -230A;N # Ps LEFT FLOOR -230B;N # Pe RIGHT FLOOR -230C..2311;N # So [6] BOTTOM RIGHT CROP..SQUARE LOZENGE -2312;A # So ARC -2313..2319;N # So [7] SEGMENT..TURNED NOT SIGN -231A..231B;W # So [2] WATCH..HOURGLASS -231C..231F;N # So [4] TOP LEFT CORNER..BOTTOM RIGHT CORNER -2320..2321;N # Sm [2] TOP HALF INTEGRAL..BOTTOM HALF INTEGRAL -2322..2328;N # So [7] FROWN..KEYBOARD -2329;W # Ps LEFT-POINTING ANGLE BRACKET -232A;W # Pe RIGHT-POINTING ANGLE BRACKET -232B..237B;N # So [81] ERASE TO THE LEFT..NOT CHECK MARK -237C;N # Sm RIGHT ANGLE WITH DOWNWARDS ZIGZAG ARROW -237D..239A;N # So [30] SHOULDERED OPEN BOX..CLEAR SCREEN SYMBOL -239B..23B3;N # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM -23B4..23DB;N # So [40] TOP SQUARE BRACKET..FUSE -23DC..23E1;N # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET -23E2..23E8;N # So [7] WHITE TRAPEZIUM..DECIMAL EXPONENT SYMBOL -23E9..23EC;W # So [4] BLACK RIGHT-POINTING DOUBLE TRIANGLE..BLACK DOWN-POINTING DOUBLE TRIANGLE -23ED..23EF;N # So [3] BLACK RIGHT-POINTING DOUBLE TRIANGLE WITH VERTICAL BAR..BLACK RIGHT-POINTING TRIANGLE WITH DOUBLE VERTICAL BAR -23F0;W # So ALARM CLOCK -23F1..23F2;N # So [2] STOPWATCH..TIMER CLOCK -23F3;W # So HOURGLASS WITH FLOWING SAND -23F4..23FF;N # So [12] BLACK MEDIUM LEFT-POINTING TRIANGLE..OBSERVER EYE SYMBOL -2400..2426;N # So [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO -2440..244A;N # So [11] OCR HOOK..OCR DOUBLE BACKSLASH -2460..249B;A # No [60] CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP -249C..24E9;A # So [78] PARENTHESIZED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z -24EA;N # No CIRCLED DIGIT ZERO -24EB..24FF;A # No [21] NEGATIVE CIRCLED NUMBER ELEVEN..NEGATIVE CIRCLED DIGIT ZERO -2500..254B;A # So [76] BOX DRAWINGS LIGHT HORIZONTAL..BOX DRAWINGS HEAVY VERTICAL AND HORIZONTAL -254C..254F;N # So [4] BOX DRAWINGS LIGHT DOUBLE DASH HORIZONTAL..BOX DRAWINGS HEAVY DOUBLE DASH VERTICAL -2550..2573;A # So [36] BOX DRAWINGS DOUBLE HORIZONTAL..BOX DRAWINGS LIGHT DIAGONAL CROSS -2574..257F;N # So [12] BOX DRAWINGS LIGHT LEFT..BOX DRAWINGS HEAVY UP AND LIGHT DOWN -2580..258F;A # So [16] UPPER HALF BLOCK..LEFT ONE EIGHTH BLOCK -2590..2591;N # So [2] RIGHT HALF BLOCK..LIGHT SHADE -2592..2595;A # So [4] MEDIUM SHADE..RIGHT ONE EIGHTH BLOCK -2596..259F;N # So [10] QUADRANT LOWER LEFT..QUADRANT UPPER RIGHT AND LOWER LEFT AND LOWER RIGHT -25A0..25A1;A # So [2] BLACK SQUARE..WHITE SQUARE -25A2;N # So WHITE SQUARE WITH ROUNDED CORNERS -25A3..25A9;A # So [7] WHITE SQUARE CONTAINING BLACK SMALL SQUARE..SQUARE WITH DIAGONAL CROSSHATCH FILL -25AA..25B1;N # So [8] BLACK SMALL SQUARE..WHITE PARALLELOGRAM -25B2..25B3;A # So [2] BLACK UP-POINTING TRIANGLE..WHITE UP-POINTING TRIANGLE -25B4..25B5;N # So [2] BLACK UP-POINTING SMALL TRIANGLE..WHITE UP-POINTING SMALL TRIANGLE -25B6;A # So BLACK RIGHT-POINTING TRIANGLE -25B7;A # Sm WHITE RIGHT-POINTING TRIANGLE -25B8..25BB;N # So [4] BLACK RIGHT-POINTING SMALL TRIANGLE..WHITE RIGHT-POINTING POINTER -25BC..25BD;A # So [2] BLACK DOWN-POINTING TRIANGLE..WHITE DOWN-POINTING TRIANGLE -25BE..25BF;N # So [2] BLACK DOWN-POINTING SMALL TRIANGLE..WHITE DOWN-POINTING SMALL TRIANGLE -25C0;A # So BLACK LEFT-POINTING TRIANGLE -25C1;A # Sm WHITE LEFT-POINTING TRIANGLE -25C2..25C5;N # So [4] BLACK LEFT-POINTING SMALL TRIANGLE..WHITE LEFT-POINTING POINTER -25C6..25C8;A # So [3] BLACK DIAMOND..WHITE DIAMOND CONTAINING BLACK SMALL DIAMOND -25C9..25CA;N # So [2] FISHEYE..LOZENGE -25CB;A # So WHITE CIRCLE -25CC..25CD;N # So [2] DOTTED CIRCLE..CIRCLE WITH VERTICAL FILL -25CE..25D1;A # So [4] BULLSEYE..CIRCLE WITH RIGHT HALF BLACK -25D2..25E1;N # So [16] CIRCLE WITH LOWER HALF BLACK..LOWER HALF CIRCLE -25E2..25E5;A # So [4] BLACK LOWER RIGHT TRIANGLE..BLACK UPPER RIGHT TRIANGLE -25E6..25EE;N # So [9] WHITE BULLET..UP-POINTING TRIANGLE WITH RIGHT HALF BLACK -25EF;A # So LARGE CIRCLE -25F0..25F7;N # So [8] WHITE SQUARE WITH UPPER LEFT QUADRANT..WHITE CIRCLE WITH UPPER RIGHT QUADRANT -25F8..25FC;N # Sm [5] UPPER LEFT TRIANGLE..BLACK MEDIUM SQUARE -25FD..25FE;W # Sm [2] WHITE MEDIUM SMALL SQUARE..BLACK MEDIUM SMALL SQUARE -25FF;N # Sm LOWER RIGHT TRIANGLE -2600..2604;N # So [5] BLACK SUN WITH RAYS..COMET -2605..2606;A # So [2] BLACK STAR..WHITE STAR -2607..2608;N # So [2] LIGHTNING..THUNDERSTORM -2609;A # So SUN -260A..260D;N # So [4] ASCENDING NODE..OPPOSITION -260E..260F;A # So [2] BLACK TELEPHONE..WHITE TELEPHONE -2610..2613;N # So [4] BALLOT BOX..SALTIRE -2614..2615;W # So [2] UMBRELLA WITH RAIN DROPS..HOT BEVERAGE -2616..261B;N # So [6] WHITE SHOGI PIECE..BLACK RIGHT POINTING INDEX -261C;A # So WHITE LEFT POINTING INDEX -261D;N # So WHITE UP POINTING INDEX -261E;A # So WHITE RIGHT POINTING INDEX -261F..263F;N # So [33] WHITE DOWN POINTING INDEX..MERCURY -2640;A # So FEMALE SIGN -2641;N # So EARTH -2642;A # So MALE SIGN -2643..2647;N # So [5] JUPITER..PLUTO -2648..2653;W # So [12] ARIES..PISCES -2654..265F;N # So [12] WHITE CHESS KING..BLACK CHESS PAWN -2660..2661;A # So [2] BLACK SPADE SUIT..WHITE HEART SUIT -2662;N # So WHITE DIAMOND SUIT -2663..2665;A # So [3] BLACK CLUB SUIT..BLACK HEART SUIT -2666;N # So BLACK DIAMOND SUIT -2667..266A;A # So [4] WHITE CLUB SUIT..EIGHTH NOTE -266B;N # So BEAMED EIGHTH NOTES -266C..266D;A # So [2] BEAMED SIXTEENTH NOTES..MUSIC FLAT SIGN -266E;N # So MUSIC NATURAL SIGN -266F;A # Sm MUSIC SHARP SIGN -2670..267E;N # So [15] WEST SYRIAC CROSS..PERMANENT PAPER SIGN -267F;W # So WHEELCHAIR SYMBOL -2680..2692;N # So [19] DIE FACE-1..HAMMER AND PICK -2693;W # So ANCHOR -2694..269D;N # So [10] CROSSED SWORDS..OUTLINED WHITE STAR -269E..269F;A # So [2] THREE LINES CONVERGING RIGHT..THREE LINES CONVERGING LEFT -26A0;N # So WARNING SIGN -26A1;W # So HIGH VOLTAGE SIGN -26A2..26A9;N # So [8] DOUBLED FEMALE SIGN..HORIZONTAL MALE WITH STROKE SIGN -26AA..26AB;W # So [2] MEDIUM WHITE CIRCLE..MEDIUM BLACK CIRCLE -26AC..26BC;N # So [17] MEDIUM SMALL WHITE CIRCLE..SESQUIQUADRATE -26BD..26BE;W # So [2] SOCCER BALL..BASEBALL -26BF;A # So SQUARED KEY -26C0..26C3;N # So [4] WHITE DRAUGHTS MAN..BLACK DRAUGHTS KING -26C4..26C5;W # So [2] SNOWMAN WITHOUT SNOW..SUN BEHIND CLOUD -26C6..26CD;A # So [8] RAIN..DISABLED CAR -26CE;W # So OPHIUCHUS -26CF..26D3;A # So [5] PICK..CHAINS -26D4;W # So NO ENTRY -26D5..26E1;A # So [13] ALTERNATE ONE-WAY LEFT WAY TRAFFIC..RESTRICTED LEFT ENTRY-2 -26E2;N # So ASTRONOMICAL SYMBOL FOR URANUS -26E3;A # So HEAVY CIRCLE WITH STROKE AND TWO DOTS ABOVE -26E4..26E7;N # So [4] PENTAGRAM..INVERTED PENTAGRAM -26E8..26E9;A # So [2] BLACK CROSS ON SHIELD..SHINTO SHRINE -26EA;W # So CHURCH -26EB..26F1;A # So [7] CASTLE..UMBRELLA ON GROUND -26F2..26F3;W # So [2] FOUNTAIN..FLAG IN HOLE -26F4;A # So FERRY -26F5;W # So SAILBOAT -26F6..26F9;A # So [4] SQUARE FOUR CORNERS..PERSON WITH BALL -26FA;W # So TENT -26FB..26FC;A # So [2] JAPANESE BANK SYMBOL..HEADSTONE GRAVEYARD SYMBOL -26FD;W # So FUEL PUMP -26FE..26FF;A # So [2] CUP ON BLACK SQUARE..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE -2700..2704;N # So [5] BLACK SAFETY SCISSORS..WHITE SCISSORS -2705;W # So WHITE HEAVY CHECK MARK -2706..2709;N # So [4] TELEPHONE LOCATION SIGN..ENVELOPE -270A..270B;W # So [2] RAISED FIST..RAISED HAND -270C..2727;N # So [28] VICTORY HAND..WHITE FOUR POINTED STAR -2728;W # So SPARKLES -2729..273C;N # So [20] STRESS OUTLINED WHITE STAR..OPEN CENTRE TEARDROP-SPOKED ASTERISK -273D;A # So HEAVY TEARDROP-SPOKED ASTERISK -273E..274B;N # So [14] SIX PETALLED BLACK AND WHITE FLORETTE..HEAVY EIGHT TEARDROP-SPOKED PROPELLER ASTERISK -274C;W # So CROSS MARK -274D;N # So SHADOWED WHITE CIRCLE -274E;W # So NEGATIVE SQUARED CROSS MARK -274F..2752;N # So [4] LOWER RIGHT DROP-SHADOWED WHITE SQUARE..UPPER RIGHT SHADOWED WHITE SQUARE -2753..2755;W # So [3] BLACK QUESTION MARK ORNAMENT..WHITE EXCLAMATION MARK ORNAMENT -2756;N # So BLACK DIAMOND MINUS WHITE X -2757;W # So HEAVY EXCLAMATION MARK SYMBOL -2758..2767;N # So [16] LIGHT VERTICAL BAR..ROTATED FLORAL HEART BULLET -2768;N # Ps MEDIUM LEFT PARENTHESIS ORNAMENT -2769;N # Pe MEDIUM RIGHT PARENTHESIS ORNAMENT -276A;N # Ps MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT -276B;N # Pe MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT -276C;N # Ps MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT -276D;N # Pe MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT -276E;N # Ps HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT -276F;N # Pe HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT -2770;N # Ps HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT -2771;N # Pe HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT -2772;N # Ps LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT -2773;N # Pe LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT -2774;N # Ps MEDIUM LEFT CURLY BRACKET ORNAMENT -2775;N # Pe MEDIUM RIGHT CURLY BRACKET ORNAMENT -2776..277F;A # No [10] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED NUMBER TEN -2780..2793;N # No [20] DINGBAT CIRCLED SANS-SERIF DIGIT ONE..DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN -2794;N # So HEAVY WIDE-HEADED RIGHTWARDS ARROW -2795..2797;W # So [3] HEAVY PLUS SIGN..HEAVY DIVISION SIGN -2798..27AF;N # So [24] HEAVY SOUTH EAST ARROW..NOTCHED LOWER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW -27B0;W # So CURLY LOOP -27B1..27BE;N # So [14] NOTCHED UPPER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW..OPEN-OUTLINED RIGHTWARDS ARROW -27BF;W # So DOUBLE CURLY LOOP -27C0..27C4;N # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET -27C5;N # Ps LEFT S-SHAPED BAG DELIMITER -27C6;N # Pe RIGHT S-SHAPED BAG DELIMITER -27C7..27E5;N # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK -27E6;Na # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET -27E7;Na # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET -27E8;Na # Ps MATHEMATICAL LEFT ANGLE BRACKET -27E9;Na # Pe MATHEMATICAL RIGHT ANGLE BRACKET -27EA;Na # Ps MATHEMATICAL LEFT DOUBLE ANGLE BRACKET -27EB;Na # Pe MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET -27EC;Na # Ps MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET -27ED;Na # Pe MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET -27EE;N # Ps MATHEMATICAL LEFT FLATTENED PARENTHESIS -27EF;N # Pe MATHEMATICAL RIGHT FLATTENED PARENTHESIS -27F0..27FF;N # Sm [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW -2800..28FF;N # So [256] BRAILLE PATTERN BLANK..BRAILLE PATTERN DOTS-12345678 -2900..297F;N # Sm [128] RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE..DOWN FISH TAIL -2980..2982;N # Sm [3] TRIPLE VERTICAL BAR DELIMITER..Z NOTATION TYPE COLON -2983;N # Ps LEFT WHITE CURLY BRACKET -2984;N # Pe RIGHT WHITE CURLY BRACKET -2985;Na # Ps LEFT WHITE PARENTHESIS -2986;Na # Pe RIGHT WHITE PARENTHESIS -2987;N # Ps Z NOTATION LEFT IMAGE BRACKET -2988;N # Pe Z NOTATION RIGHT IMAGE BRACKET -2989;N # Ps Z NOTATION LEFT BINDING BRACKET -298A;N # Pe Z NOTATION RIGHT BINDING BRACKET -298B;N # Ps LEFT SQUARE BRACKET WITH UNDERBAR -298C;N # Pe RIGHT SQUARE BRACKET WITH UNDERBAR -298D;N # Ps LEFT SQUARE BRACKET WITH TICK IN TOP CORNER -298E;N # Pe RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER -298F;N # Ps LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER -2990;N # Pe RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER -2991;N # Ps LEFT ANGLE BRACKET WITH DOT -2992;N # Pe RIGHT ANGLE BRACKET WITH DOT -2993;N # Ps LEFT ARC LESS-THAN BRACKET -2994;N # Pe RIGHT ARC GREATER-THAN BRACKET -2995;N # Ps DOUBLE LEFT ARC GREATER-THAN BRACKET -2996;N # Pe DOUBLE RIGHT ARC LESS-THAN BRACKET -2997;N # Ps LEFT BLACK TORTOISE SHELL BRACKET -2998;N # Pe RIGHT BLACK TORTOISE SHELL BRACKET -2999..29D7;N # Sm [63] DOTTED FENCE..BLACK HOURGLASS -29D8;N # Ps LEFT WIGGLY FENCE -29D9;N # Pe RIGHT WIGGLY FENCE -29DA;N # Ps LEFT DOUBLE WIGGLY FENCE -29DB;N # Pe RIGHT DOUBLE WIGGLY FENCE -29DC..29FB;N # Sm [32] INCOMPLETE INFINITY..TRIPLE PLUS -29FC;N # Ps LEFT-POINTING CURVED ANGLE BRACKET -29FD;N # Pe RIGHT-POINTING CURVED ANGLE BRACKET -29FE..29FF;N # Sm [2] TINY..MINY -2A00..2AFF;N # Sm [256] N-ARY CIRCLED DOT OPERATOR..N-ARY WHITE VERTICAL BAR -2B00..2B1A;N # So [27] NORTH EAST WHITE ARROW..DOTTED SQUARE -2B1B..2B1C;W # So [2] BLACK LARGE SQUARE..WHITE LARGE SQUARE -2B1D..2B2F;N # So [19] BLACK VERY SMALL SQUARE..WHITE VERTICAL ELLIPSE -2B30..2B44;N # Sm [21] LEFT ARROW WITH SMALL CIRCLE..RIGHTWARDS ARROW THROUGH SUPERSET -2B45..2B46;N # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW -2B47..2B4C;N # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR -2B4D..2B4F;N # So [3] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..SHORT BACKSLANTED SOUTH ARROW -2B50;W # So WHITE MEDIUM STAR -2B51..2B54;N # So [4] BLACK SMALL STAR..WHITE RIGHT-POINTING PENTAGON -2B55;W # So HEAVY LARGE CIRCLE -2B56..2B59;A # So [4] HEAVY OVAL WITH OVAL INSIDE..HEAVY CIRCLED SALTIRE -2B5A..2B73;N # So [26] SLANTED NORTH ARROW WITH HOOKED HEAD..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR -2B76..2B95;N # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW -2B97..2BFF;N # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL -2C00..2C5F;N # L& [96] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC SMALL LETTER CAUDATE CHRIVI -2C60..2C7B;N # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E -2C7C..2C7D;N # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V -2C7E..2C7F;N # Lu [2] LATIN CAPITAL LETTER S WITH SWASH TAIL..LATIN CAPITAL LETTER Z WITH SWASH TAIL -2C80..2CE4;N # L& [101] COPTIC CAPITAL LETTER ALFA..COPTIC SYMBOL KAI -2CE5..2CEA;N # So [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA -2CEB..2CEE;N # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA -2CEF..2CF1;N # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS -2CF2..2CF3;N # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI -2CF9..2CFC;N # Po [4] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN VERSE DIVIDER -2CFD;N # No COPTIC FRACTION ONE HALF -2CFE..2CFF;N # Po [2] COPTIC FULL STOP..COPTIC MORPHOLOGICAL DIVIDER -2D00..2D25;N # Ll [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE -2D27;N # Ll GEORGIAN SMALL LETTER YN -2D2D;N # Ll GEORGIAN SMALL LETTER AEN -2D30..2D67;N # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO -2D6F;N # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK -2D70;N # Po TIFINAGH SEPARATOR MARK -2D7F;N # Mn TIFINAGH CONSONANT JOINER -2D80..2D96;N # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE -2DA0..2DA6;N # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO -2DA8..2DAE;N # Lo [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO -2DB0..2DB6;N # Lo [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO -2DB8..2DBE;N # Lo [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO -2DC0..2DC6;N # Lo [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO -2DC8..2DCE;N # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO -2DD0..2DD6;N # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO -2DD8..2DDE;N # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO -2DE0..2DFF;N # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS -2E00..2E01;N # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER -2E02;N # Pi LEFT SUBSTITUTION BRACKET -2E03;N # Pf RIGHT SUBSTITUTION BRACKET -2E04;N # Pi LEFT DOTTED SUBSTITUTION BRACKET -2E05;N # Pf RIGHT DOTTED SUBSTITUTION BRACKET -2E06..2E08;N # Po [3] RAISED INTERPOLATION MARKER..DOTTED TRANSPOSITION MARKER -2E09;N # Pi LEFT TRANSPOSITION BRACKET -2E0A;N # Pf RIGHT TRANSPOSITION BRACKET -2E0B;N # Po RAISED SQUARE -2E0C;N # Pi LEFT RAISED OMISSION BRACKET -2E0D;N # Pf RIGHT RAISED OMISSION BRACKET -2E0E..2E16;N # Po [9] EDITORIAL CORONIS..DOTTED RIGHT-POINTING ANGLE -2E17;N # Pd DOUBLE OBLIQUE HYPHEN -2E18..2E19;N # Po [2] INVERTED INTERROBANG..PALM BRANCH -2E1A;N # Pd HYPHEN WITH DIAERESIS -2E1B;N # Po TILDE WITH RING ABOVE -2E1C;N # Pi LEFT LOW PARAPHRASE BRACKET -2E1D;N # Pf RIGHT LOW PARAPHRASE BRACKET -2E1E..2E1F;N # Po [2] TILDE WITH DOT ABOVE..TILDE WITH DOT BELOW -2E20;N # Pi LEFT VERTICAL BAR WITH QUILL -2E21;N # Pf RIGHT VERTICAL BAR WITH QUILL -2E22;N # Ps TOP LEFT HALF BRACKET -2E23;N # Pe TOP RIGHT HALF BRACKET -2E24;N # Ps BOTTOM LEFT HALF BRACKET -2E25;N # Pe BOTTOM RIGHT HALF BRACKET -2E26;N # Ps LEFT SIDEWAYS U BRACKET -2E27;N # Pe RIGHT SIDEWAYS U BRACKET -2E28;N # Ps LEFT DOUBLE PARENTHESIS -2E29;N # Pe RIGHT DOUBLE PARENTHESIS -2E2A..2E2E;N # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK -2E2F;N # Lm VERTICAL TILDE -2E30..2E39;N # Po [10] RING POINT..TOP HALF SECTION SIGN -2E3A..2E3B;N # Pd [2] TWO-EM DASH..THREE-EM DASH -2E3C..2E3F;N # Po [4] STENOGRAPHIC FULL STOP..CAPITULUM -2E40;N # Pd DOUBLE HYPHEN -2E41;N # Po REVERSED COMMA -2E42;N # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK -2E43..2E4F;N # Po [13] DASH WITH LEFT UPTURN..CORNISH VERSE DIVIDER -2E50..2E51;N # So [2] CROSS PATTY WITH RIGHT CROSSBAR..CROSS PATTY WITH LEFT CROSSBAR -2E52..2E54;N # Po [3] TIRONIAN SIGN CAPITAL ET..MEDIEVAL QUESTION MARK -2E55;N # Ps LEFT SQUARE BRACKET WITH STROKE -2E56;N # Pe RIGHT SQUARE BRACKET WITH STROKE -2E57;N # Ps LEFT SQUARE BRACKET WITH DOUBLE STROKE -2E58;N # Pe RIGHT SQUARE BRACKET WITH DOUBLE STROKE -2E59;N # Ps TOP HALF LEFT PARENTHESIS -2E5A;N # Pe TOP HALF RIGHT PARENTHESIS -2E5B;N # Ps BOTTOM HALF LEFT PARENTHESIS -2E5C;N # Pe BOTTOM HALF RIGHT PARENTHESIS -2E5D;N # Pd OBLIQUE HYPHEN -2E80..2E99;W # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP -2E9B..2EF3;W # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE -2F00..2FD5;W # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE -2FF0..2FFB;W # So [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID -3000;F # Zs IDEOGRAPHIC SPACE -3001..3003;W # Po [3] IDEOGRAPHIC COMMA..DITTO MARK -3004;W # So JAPANESE INDUSTRIAL STANDARD SYMBOL -3005;W # Lm IDEOGRAPHIC ITERATION MARK -3006;W # Lo IDEOGRAPHIC CLOSING MARK -3007;W # Nl IDEOGRAPHIC NUMBER ZERO -3008;W # Ps LEFT ANGLE BRACKET -3009;W # Pe RIGHT ANGLE BRACKET -300A;W # Ps LEFT DOUBLE ANGLE BRACKET -300B;W # Pe RIGHT DOUBLE ANGLE BRACKET -300C;W # Ps LEFT CORNER BRACKET -300D;W # Pe RIGHT CORNER BRACKET -300E;W # Ps LEFT WHITE CORNER BRACKET -300F;W # Pe RIGHT WHITE CORNER BRACKET -3010;W # Ps LEFT BLACK LENTICULAR BRACKET -3011;W # Pe RIGHT BLACK LENTICULAR BRACKET -3012..3013;W # So [2] POSTAL MARK..GETA MARK -3014;W # Ps LEFT TORTOISE SHELL BRACKET -3015;W # Pe RIGHT TORTOISE SHELL BRACKET -3016;W # Ps LEFT WHITE LENTICULAR BRACKET -3017;W # Pe RIGHT WHITE LENTICULAR BRACKET -3018;W # Ps LEFT WHITE TORTOISE SHELL BRACKET -3019;W # Pe RIGHT WHITE TORTOISE SHELL BRACKET -301A;W # Ps LEFT WHITE SQUARE BRACKET -301B;W # Pe RIGHT WHITE SQUARE BRACKET -301C;W # Pd WAVE DASH -301D;W # Ps REVERSED DOUBLE PRIME QUOTATION MARK -301E..301F;W # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK -3020;W # So POSTAL MARK FACE -3021..3029;W # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE -302A..302D;W # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK -302E..302F;W # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK -3030;W # Pd WAVY DASH -3031..3035;W # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF -3036..3037;W # So [2] CIRCLED POSTAL MARK..IDEOGRAPHIC TELEGRAPH LINE FEED SEPARATOR SYMBOL -3038..303A;W # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY -303B;W # Lm VERTICAL IDEOGRAPHIC ITERATION MARK -303C;W # Lo MASU MARK -303D;W # Po PART ALTERNATION MARK -303E;W # So IDEOGRAPHIC VARIATION INDICATOR -303F;N # So IDEOGRAPHIC HALF FILL SPACE -3041..3096;W # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE -3099..309A;W # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK -309B..309C;W # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK -309D..309E;W # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK -309F;W # Lo HIRAGANA DIGRAPH YORI -30A0;W # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN -30A1..30FA;W # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO -30FB;W # Po KATAKANA MIDDLE DOT -30FC..30FE;W # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK -30FF;W # Lo KATAKANA DIGRAPH KOTO -3105..312F;W # Lo [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN -3131..318E;W # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE -3190..3191;W # So [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK -3192..3195;W # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK -3196..319F;W # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK -31A0..31BF;W # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH -31C0..31E3;W # So [36] CJK STROKE T..CJK STROKE Q -31F0..31FF;W # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO -3200..321E;W # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU -3220..3229;W # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN -322A..3247;W # So [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO -3248..324F;A # No [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE -3250;W # So PARTNERSHIP SIGN -3251..325F;W # No [15] CIRCLED NUMBER TWENTY ONE..CIRCLED NUMBER THIRTY FIVE -3260..327F;W # So [32] CIRCLED HANGUL KIYEOK..KOREAN STANDARD SYMBOL -3280..3289;W # No [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN -328A..32B0;W # So [39] CIRCLED IDEOGRAPH MOON..CIRCLED IDEOGRAPH NIGHT -32B1..32BF;W # No [15] CIRCLED NUMBER THIRTY SIX..CIRCLED NUMBER FIFTY -32C0..32FF;W # So [64] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..SQUARE ERA NAME REIWA -3300..33FF;W # So [256] SQUARE APAATO..SQUARE GAL -3400..4DBF;W # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF -4DC0..4DFF;N # So [64] HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION -4E00..9FFF;W # Lo [20992] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFF -A000..A014;W # Lo [21] YI SYLLABLE IT..YI SYLLABLE E -A015;W # Lm YI SYLLABLE WU -A016..A48C;W # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR -A490..A4C6;W # So [55] YI RADICAL QOT..YI RADICAL KE -A4D0..A4F7;N # Lo [40] LISU LETTER BA..LISU LETTER OE -A4F8..A4FD;N # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU -A4FE..A4FF;N # Po [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP -A500..A60B;N # Lo [268] VAI SYLLABLE EE..VAI SYLLABLE NG -A60C;N # Lm VAI SYLLABLE LENGTHENER -A60D..A60F;N # Po [3] VAI COMMA..VAI QUESTION MARK -A610..A61F;N # Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG -A620..A629;N # Nd [10] VAI DIGIT ZERO..VAI DIGIT NINE -A62A..A62B;N # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO -A640..A66D;N # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O -A66E;N # Lo CYRILLIC LETTER MULTIOCULAR O -A66F;N # Mn COMBINING CYRILLIC VZMET -A670..A672;N # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN -A673;N # Po SLAVONIC ASTERISK -A674..A67D;N # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK -A67E;N # Po CYRILLIC KAVYKA -A67F;N # Lm CYRILLIC PAYEROK -A680..A69B;N # L& [28] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER CROSSED O -A69C..A69D;N # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN -A69E..A69F;N # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E -A6A0..A6E5;N # Lo [70] BAMUM LETTER A..BAMUM LETTER KI -A6E6..A6EF;N # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM -A6F0..A6F1;N # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS -A6F2..A6F7;N # Po [6] BAMUM NJAEMLI..BAMUM QUESTION MARK -A700..A716;N # Sk [23] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR -A717..A71F;N # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK -A720..A721;N # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE -A722..A76F;N # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON -A770;N # Lm MODIFIER LETTER US -A771..A787;N # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T -A788;N # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT -A789..A78A;N # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN -A78B..A78E;N # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A78F;N # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CA;N # L& [59] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY -A7D0..A7D1;N # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G -A7D3;N # Ll LATIN SMALL LETTER DOUBLE THORN -A7D5..A7D9;N # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S -A7F2..A7F4;N # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q -A7F5..A7F6;N # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H -A7F7;N # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I -A7F8..A7F9;N # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE -A7FA;N # Ll LATIN LETTER SMALL CAPITAL TURNED M -A7FB..A7FF;N # Lo [5] LATIN EPIGRAPHIC LETTER REVERSED F..LATIN EPIGRAPHIC LETTER ARCHAIC M -A800..A801;N # Lo [2] SYLOTI NAGRI LETTER A..SYLOTI NAGRI LETTER I -A802;N # Mn SYLOTI NAGRI SIGN DVISVARA -A803..A805;N # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O -A806;N # Mn SYLOTI NAGRI SIGN HASANTA -A807..A80A;N # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO -A80B;N # Mn SYLOTI NAGRI SIGN ANUSVARA -A80C..A822;N # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO -A823..A824;N # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I -A825..A826;N # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E -A827;N # Mc SYLOTI NAGRI VOWEL SIGN OO -A828..A82B;N # So [4] SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI POETRY MARK-4 -A82C;N # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA -A830..A835;N # No [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTION THREE SIXTEENTHS -A836..A837;N # So [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK -A838;N # Sc NORTH INDIC RUPEE MARK -A839;N # So NORTH INDIC QUANTITY MARK -A840..A873;N # Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU -A874..A877;N # Po [4] PHAGS-PA SINGLE HEAD MARK..PHAGS-PA MARK DOUBLE SHAD -A880..A881;N # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA -A882..A8B3;N # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA -A8B4..A8C3;N # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU -A8C4..A8C5;N # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU -A8CE..A8CF;N # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA -A8D0..A8D9;N # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE -A8E0..A8F1;N # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA -A8F2..A8F7;N # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA -A8F8..A8FA;N # Po [3] DEVANAGARI SIGN PUSHPIKA..DEVANAGARI CARET -A8FB;N # Lo DEVANAGARI HEADSTROKE -A8FC;N # Po DEVANAGARI SIGN SIDDHAM -A8FD..A8FE;N # Lo [2] DEVANAGARI JAIN OM..DEVANAGARI LETTER AY -A8FF;N # Mn DEVANAGARI VOWEL SIGN AY -A900..A909;N # Nd [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE -A90A..A925;N # Lo [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO -A926..A92D;N # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU -A92E..A92F;N # Po [2] KAYAH LI SIGN CWI..KAYAH LI SIGN SHYA -A930..A946;N # Lo [23] REJANG LETTER KA..REJANG LETTER A -A947..A951;N # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R -A952..A953;N # Mc [2] REJANG CONSONANT SIGN H..REJANG VIRAMA -A95F;N # Po REJANG SECTION MARK -A960..A97C;W # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH -A980..A982;N # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR -A983;N # Mc JAVANESE SIGN WIGNYAN -A984..A9B2;N # Lo [47] JAVANESE LETTER A..JAVANESE LETTER HA -A9B3;N # Mn JAVANESE SIGN CECAK TELU -A9B4..A9B5;N # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG -A9B6..A9B9;N # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT -A9BA..A9BB;N # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE -A9BC..A9BD;N # Mn [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET -A9BE..A9C0;N # Mc [3] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE PANGKON -A9C1..A9CD;N # Po [13] JAVANESE LEFT RERENGGAN..JAVANESE TURNED PADA PISELEH -A9CF;N # Lm JAVANESE PANGRANGKEP -A9D0..A9D9;N # Nd [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE -A9DE..A9DF;N # Po [2] JAVANESE PADA TIRTA TUMETES..JAVANESE PADA ISEN-ISEN -A9E0..A9E4;N # Lo [5] MYANMAR LETTER SHAN GHA..MYANMAR LETTER SHAN BHA -A9E5;N # Mn MYANMAR SIGN SHAN SAW -A9E6;N # Lm MYANMAR MODIFIER LETTER SHAN REDUPLICATION -A9E7..A9EF;N # Lo [9] MYANMAR LETTER TAI LAING NYA..MYANMAR LETTER TAI LAING NNA -A9F0..A9F9;N # Nd [10] MYANMAR TAI LAING DIGIT ZERO..MYANMAR TAI LAING DIGIT NINE -A9FA..A9FE;N # Lo [5] MYANMAR LETTER TAI LAING LLA..MYANMAR LETTER TAI LAING BHA -AA00..AA28;N # Lo [41] CHAM LETTER A..CHAM LETTER HA -AA29..AA2E;N # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE -AA2F..AA30;N # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI -AA31..AA32;N # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE -AA33..AA34;N # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA -AA35..AA36;N # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA -AA40..AA42;N # Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG -AA43;N # Mn CHAM CONSONANT SIGN FINAL NG -AA44..AA4B;N # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS -AA4C;N # Mn CHAM CONSONANT SIGN FINAL M -AA4D;N # Mc CHAM CONSONANT SIGN FINAL H -AA50..AA59;N # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE -AA5C..AA5F;N # Po [4] CHAM PUNCTUATION SPIRAL..CHAM PUNCTUATION TRIPLE DANDA -AA60..AA6F;N # Lo [16] MYANMAR LETTER KHAMTI GA..MYANMAR LETTER KHAMTI FA -AA70;N # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION -AA71..AA76;N # Lo [6] MYANMAR LETTER KHAMTI XA..MYANMAR LOGOGRAM KHAMTI HM -AA77..AA79;N # So [3] MYANMAR SYMBOL AITON EXCLAMATION..MYANMAR SYMBOL AITON TWO -AA7A;N # Lo MYANMAR LETTER AITON RA -AA7B;N # Mc MYANMAR SIGN PAO KAREN TONE -AA7C;N # Mn MYANMAR SIGN TAI LAING TONE-2 -AA7D;N # Mc MYANMAR SIGN TAI LAING TONE-5 -AA7E..AA7F;N # Lo [2] MYANMAR LETTER SHWE PALAUNG CHA..MYANMAR LETTER SHWE PALAUNG SHA -AA80..AAAF;N # Lo [48] TAI VIET LETTER LOW KO..TAI VIET LETTER HIGH O -AAB0;N # Mn TAI VIET MAI KANG -AAB1;N # Lo TAI VIET VOWEL AA -AAB2..AAB4;N # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U -AAB5..AAB6;N # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O -AAB7..AAB8;N # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA -AAB9..AABD;N # Lo [5] TAI VIET VOWEL UEA..TAI VIET VOWEL AN -AABE..AABF;N # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK -AAC0;N # Lo TAI VIET TONE MAI NUENG -AAC1;N # Mn TAI VIET TONE MAI THO -AAC2;N # Lo TAI VIET TONE MAI SONG -AADB..AADC;N # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG -AADD;N # Lm TAI VIET SYMBOL SAM -AADE..AADF;N # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI -AAE0..AAEA;N # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA -AAEB;N # Mc MEETEI MAYEK VOWEL SIGN II -AAEC..AAED;N # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI -AAEE..AAEF;N # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU -AAF0..AAF1;N # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM -AAF2;N # Lo MEETEI MAYEK ANJI -AAF3..AAF4;N # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK -AAF5;N # Mc MEETEI MAYEK VOWEL SIGN VISARGA -AAF6;N # Mn MEETEI MAYEK VIRAMA -AB01..AB06;N # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO -AB09..AB0E;N # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO -AB11..AB16;N # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO -AB20..AB26;N # Lo [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO -AB28..AB2E;N # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO -AB30..AB5A;N # Ll [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG -AB5B;N # Sk MODIFIER BREVE WITH INVERTED BREVE -AB5C..AB5F;N # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK -AB60..AB68;N # Ll [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE -AB69;N # Lm MODIFIER LETTER SMALL TURNED W -AB6A..AB6B;N # Sk [2] MODIFIER LETTER LEFT TACK..MODIFIER LETTER RIGHT TACK -AB70..ABBF;N # Ll [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA -ABC0..ABE2;N # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM -ABE3..ABE4;N # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP -ABE5;N # Mn MEETEI MAYEK VOWEL SIGN ANAP -ABE6..ABE7;N # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP -ABE8;N # Mn MEETEI MAYEK VOWEL SIGN UNAP -ABE9..ABEA;N # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG -ABEB;N # Po MEETEI MAYEK CHEIKHEI -ABEC;N # Mc MEETEI MAYEK LUM IYEK -ABED;N # Mn MEETEI MAYEK APUN IYEK -ABF0..ABF9;N # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE -AC00..D7A3;W # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH -D7B0..D7C6;N # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E -D7CB..D7FB;N # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH -D800..DB7F;N # Cs [896] .. -DB80..DBFF;N # Cs [128] .. -DC00..DFFF;N # Cs [1024] .. -E000..F8FF;A # Co [6400] .. -F900..FA6D;W # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D -FA6E..FA6F;W # Cn [2] .. -FA70..FAD9;W # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 -FADA..FAFF;W # Cn [38] .. -FB00..FB06;N # Ll [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST -FB13..FB17;N # Ll [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH -FB1D;N # Lo HEBREW LETTER YOD WITH HIRIQ -FB1E;N # Mn HEBREW POINT JUDEO-SPANISH VARIKA -FB1F..FB28;N # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV -FB29;N # Sm HEBREW LETTER ALTERNATIVE PLUS SIGN -FB2A..FB36;N # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH -FB38..FB3C;N # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH -FB3E;N # Lo HEBREW LETTER MEM WITH DAGESH -FB40..FB41;N # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH -FB43..FB44;N # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH -FB46..FB4F;N # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE ALEF LAMED -FB50..FBB1;N # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM -FBB2..FBC2;N # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE -FBD3..FD3D;N # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM -FD3E;N # Pe ORNATE LEFT PARENTHESIS -FD3F;N # Ps ORNATE RIGHT PARENTHESIS -FD40..FD4F;N # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH -FD50..FD8F;N # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM -FD92..FDC7;N # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM -FDCF;N # So ARABIC LIGATURE SALAAMUHU ALAYNAA -FDF0..FDFB;N # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU -FDFC;N # Sc RIAL SIGN -FDFD..FDFF;N # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL -FE00..FE0F;A # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 -FE10..FE16;W # Po [7] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL QUESTION MARK -FE17;W # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE LENTICULAR BRACKET -FE18;W # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET -FE19;W # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS -FE20..FE2F;N # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF -FE30;W # Po PRESENTATION FORM FOR VERTICAL TWO DOT LEADER -FE31..FE32;W # Pd [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH -FE33..FE34;W # Pc [2] PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE -FE35;W # Ps PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS -FE36;W # Pe PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS -FE37;W # Ps PRESENTATION FORM FOR VERTICAL LEFT CURLY BRACKET -FE38;W # Pe PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET -FE39;W # Ps PRESENTATION FORM FOR VERTICAL LEFT TORTOISE SHELL BRACKET -FE3A;W # Pe PRESENTATION FORM FOR VERTICAL RIGHT TORTOISE SHELL BRACKET -FE3B;W # Ps PRESENTATION FORM FOR VERTICAL LEFT BLACK LENTICULAR BRACKET -FE3C;W # Pe PRESENTATION FORM FOR VERTICAL RIGHT BLACK LENTICULAR BRACKET -FE3D;W # Ps PRESENTATION FORM FOR VERTICAL LEFT DOUBLE ANGLE BRACKET -FE3E;W # Pe PRESENTATION FORM FOR VERTICAL RIGHT DOUBLE ANGLE BRACKET -FE3F;W # Ps PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET -FE40;W # Pe PRESENTATION FORM FOR VERTICAL RIGHT ANGLE BRACKET -FE41;W # Ps PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET -FE42;W # Pe PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET -FE43;W # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET -FE44;W # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET -FE45..FE46;W # Po [2] SESAME DOT..WHITE SESAME DOT -FE47;W # Ps PRESENTATION FORM FOR VERTICAL LEFT SQUARE BRACKET -FE48;W # Pe PRESENTATION FORM FOR VERTICAL RIGHT SQUARE BRACKET -FE49..FE4C;W # Po [4] DASHED OVERLINE..DOUBLE WAVY OVERLINE -FE4D..FE4F;W # Pc [3] DASHED LOW LINE..WAVY LOW LINE -FE50..FE52;W # Po [3] SMALL COMMA..SMALL FULL STOP -FE54..FE57;W # Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK -FE58;W # Pd SMALL EM DASH -FE59;W # Ps SMALL LEFT PARENTHESIS -FE5A;W # Pe SMALL RIGHT PARENTHESIS -FE5B;W # Ps SMALL LEFT CURLY BRACKET -FE5C;W # Pe SMALL RIGHT CURLY BRACKET -FE5D;W # Ps SMALL LEFT TORTOISE SHELL BRACKET -FE5E;W # Pe SMALL RIGHT TORTOISE SHELL BRACKET -FE5F..FE61;W # Po [3] SMALL NUMBER SIGN..SMALL ASTERISK -FE62;W # Sm SMALL PLUS SIGN -FE63;W # Pd SMALL HYPHEN-MINUS -FE64..FE66;W # Sm [3] SMALL LESS-THAN SIGN..SMALL EQUALS SIGN -FE68;W # Po SMALL REVERSE SOLIDUS -FE69;W # Sc SMALL DOLLAR SIGN -FE6A..FE6B;W # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT -FE70..FE74;N # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM -FE76..FEFC;N # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM -FEFF;N # Cf ZERO WIDTH NO-BREAK SPACE -FF01..FF03;F # Po [3] FULLWIDTH EXCLAMATION MARK..FULLWIDTH NUMBER SIGN -FF04;F # Sc FULLWIDTH DOLLAR SIGN -FF05..FF07;F # Po [3] FULLWIDTH PERCENT SIGN..FULLWIDTH APOSTROPHE -FF08;F # Ps FULLWIDTH LEFT PARENTHESIS -FF09;F # Pe FULLWIDTH RIGHT PARENTHESIS -FF0A;F # Po FULLWIDTH ASTERISK -FF0B;F # Sm FULLWIDTH PLUS SIGN -FF0C;F # Po FULLWIDTH COMMA -FF0D;F # Pd FULLWIDTH HYPHEN-MINUS -FF0E..FF0F;F # Po [2] FULLWIDTH FULL STOP..FULLWIDTH SOLIDUS -FF10..FF19;F # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE -FF1A..FF1B;F # Po [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON -FF1C..FF1E;F # Sm [3] FULLWIDTH LESS-THAN SIGN..FULLWIDTH GREATER-THAN SIGN -FF1F..FF20;F # Po [2] FULLWIDTH QUESTION MARK..FULLWIDTH COMMERCIAL AT -FF21..FF3A;F # Lu [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z -FF3B;F # Ps FULLWIDTH LEFT SQUARE BRACKET -FF3C;F # Po FULLWIDTH REVERSE SOLIDUS -FF3D;F # Pe FULLWIDTH RIGHT SQUARE BRACKET -FF3E;F # Sk FULLWIDTH CIRCUMFLEX ACCENT -FF3F;F # Pc FULLWIDTH LOW LINE -FF40;F # Sk FULLWIDTH GRAVE ACCENT -FF41..FF5A;F # Ll [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z -FF5B;F # Ps FULLWIDTH LEFT CURLY BRACKET -FF5C;F # Sm FULLWIDTH VERTICAL LINE -FF5D;F # Pe FULLWIDTH RIGHT CURLY BRACKET -FF5E;F # Sm FULLWIDTH TILDE -FF5F;F # Ps FULLWIDTH LEFT WHITE PARENTHESIS -FF60;F # Pe FULLWIDTH RIGHT WHITE PARENTHESIS -FF61;H # Po HALFWIDTH IDEOGRAPHIC FULL STOP -FF62;H # Ps HALFWIDTH LEFT CORNER BRACKET -FF63;H # Pe HALFWIDTH RIGHT CORNER BRACKET -FF64..FF65;H # Po [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDLE DOT -FF66..FF6F;H # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU -FF70;H # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK -FF71..FF9D;H # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N -FF9E..FF9F;H # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK -FFA0..FFBE;H # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH -FFC2..FFC7;H # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E -FFCA..FFCF;H # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE -FFD2..FFD7;H # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU -FFDA..FFDC;H # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I -FFE0..FFE1;F # Sc [2] FULLWIDTH CENT SIGN..FULLWIDTH POUND SIGN -FFE2;F # Sm FULLWIDTH NOT SIGN -FFE3;F # Sk FULLWIDTH MACRON -FFE4;F # So FULLWIDTH BROKEN BAR -FFE5..FFE6;F # Sc [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN -FFE8;H # So HALFWIDTH FORMS LIGHT VERTICAL -FFE9..FFEC;H # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW -FFED..FFEE;H # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE -FFF9..FFFB;N # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR -FFFC;N # So OBJECT REPLACEMENT CHARACTER -FFFD;A # So REPLACEMENT CHARACTER -10000..1000B;N # Lo [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE -1000D..10026;N # Lo [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO -10028..1003A;N # Lo [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO -1003C..1003D;N # Lo [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE -1003F..1004D;N # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO -10050..1005D;N # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 -10080..100FA;N # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305 -10100..10102;N # Po [3] AEGEAN WORD SEPARATOR LINE..AEGEAN CHECK MARK -10107..10133;N # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND -10137..1013F;N # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT -10140..10174;N # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS -10175..10178;N # No [4] GREEK ONE HALF SIGN..GREEK THREE QUARTERS SIGN -10179..10189;N # So [17] GREEK YEAR SIGN..GREEK TRYBLION BASE SIGN -1018A..1018B;N # No [2] GREEK ZERO SIGN..GREEK ONE QUARTER SIGN -1018C..1018E;N # So [3] GREEK SINUSOID SIGN..NOMISMA SIGN -10190..1019C;N # So [13] ROMAN SEXTANS SIGN..ASCIA SYMBOL -101A0;N # So GREEK SYMBOL TAU RHO -101D0..101FC;N # So [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND -101FD;N # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE -10280..1029C;N # Lo [29] LYCIAN LETTER A..LYCIAN LETTER X -102A0..102D0;N # Lo [49] CARIAN LETTER A..CARIAN LETTER UUU3 -102E0;N # Mn COPTIC EPACT THOUSANDS MARK -102E1..102FB;N # No [27] COPTIC EPACT DIGIT ONE..COPTIC EPACT NUMBER NINE HUNDRED -10300..1031F;N # Lo [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS -10320..10323;N # No [4] OLD ITALIC NUMERAL ONE..OLD ITALIC NUMERAL FIFTY -1032D..1032F;N # Lo [3] OLD ITALIC LETTER YE..OLD ITALIC LETTER SOUTHERN TSE -10330..10340;N # Lo [17] GOTHIC LETTER AHSA..GOTHIC LETTER PAIRTHRA -10341;N # Nl GOTHIC LETTER NINETY -10342..10349;N # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL -1034A;N # Nl GOTHIC LETTER NINE HUNDRED -10350..10375;N # Lo [38] OLD PERMIC LETTER AN..OLD PERMIC LETTER IA -10376..1037A;N # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII -10380..1039D;N # Lo [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU -1039F;N # Po UGARITIC WORD DIVIDER -103A0..103C3;N # Lo [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA -103C8..103CF;N # Lo [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH -103D0;N # Po OLD PERSIAN WORD DIVIDER -103D1..103D5;N # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED -10400..1044F;N # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW -10450..1047F;N # Lo [48] SHAVIAN LETTER PEEP..SHAVIAN LETTER YEW -10480..1049D;N # Lo [30] OSMANYA LETTER ALEF..OSMANYA LETTER OO -104A0..104A9;N # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE -104B0..104D3;N # Lu [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA -104D8..104FB;N # Ll [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA -10500..10527;N # Lo [40] ELBASAN LETTER A..ELBASAN LETTER KHE -10530..10563;N # Lo [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW -1056F;N # Po CAUCASIAN ALBANIAN CITATION MARK -10570..1057A;N # Lu [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA -1057C..1058A;N # Lu [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE -1058C..10592;N # Lu [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE -10594..10595;N # Lu [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE -10597..105A1;N # Ll [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA -105A3..105B1;N # Ll [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE -105B3..105B9;N # Ll [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE -105BB..105BC;N # Ll [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE -10600..10736;N # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 -10740..10755;N # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE -10760..10767;N # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 -10780..10785;N # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK -10787..107B0;N # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK -107B2..107BA;N # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL -10800..10805;N # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA -10808;N # Lo CYPRIOT SYLLABLE JO -1080A..10835;N # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO -10837..10838;N # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE -1083C;N # Lo CYPRIOT SYLLABLE ZA -1083F;N # Lo CYPRIOT SYLLABLE ZO -10840..10855;N # Lo [22] IMPERIAL ARAMAIC LETTER ALEPH..IMPERIAL ARAMAIC LETTER TAW -10857;N # Po IMPERIAL ARAMAIC SECTION SIGN -10858..1085F;N # No [8] IMPERIAL ARAMAIC NUMBER ONE..IMPERIAL ARAMAIC NUMBER TEN THOUSAND -10860..10876;N # Lo [23] PALMYRENE LETTER ALEPH..PALMYRENE LETTER TAW -10877..10878;N # So [2] PALMYRENE LEFT-POINTING FLEURON..PALMYRENE RIGHT-POINTING FLEURON -10879..1087F;N # No [7] PALMYRENE NUMBER ONE..PALMYRENE NUMBER TWENTY -10880..1089E;N # Lo [31] NABATAEAN LETTER FINAL ALEPH..NABATAEAN LETTER TAW -108A7..108AF;N # No [9] NABATAEAN NUMBER ONE..NABATAEAN NUMBER ONE HUNDRED -108E0..108F2;N # Lo [19] HATRAN LETTER ALEPH..HATRAN LETTER QOPH -108F4..108F5;N # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW -108FB..108FF;N # No [5] HATRAN NUMBER ONE..HATRAN NUMBER ONE HUNDRED -10900..10915;N # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU -10916..1091B;N # No [6] PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER THREE -1091F;N # Po PHOENICIAN WORD SEPARATOR -10920..10939;N # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C -1093F;N # Po LYDIAN TRIANGULAR MARK -10980..1099F;N # Lo [32] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC HIEROGLYPHIC SYMBOL VIDJ-2 -109A0..109B7;N # Lo [24] MEROITIC CURSIVE LETTER A..MEROITIC CURSIVE LETTER DA -109BC..109BD;N # No [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF -109BE..109BF;N # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN -109C0..109CF;N # No [16] MEROITIC CURSIVE NUMBER ONE..MEROITIC CURSIVE NUMBER SEVENTY -109D2..109FF;N # No [46] MEROITIC CURSIVE NUMBER ONE HUNDRED..MEROITIC CURSIVE FRACTION TEN TWELFTHS -10A00;N # Lo KHAROSHTHI LETTER A -10A01..10A03;N # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R -10A05..10A06;N # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O -10A0C..10A0F;N # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA -10A10..10A13;N # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA -10A15..10A17;N # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA -10A19..10A35;N # Lo [29] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER VHA -10A38..10A3A;N # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW -10A3F;N # Mn KHAROSHTHI VIRAMA -10A40..10A48;N # No [9] KHAROSHTHI DIGIT ONE..KHAROSHTHI FRACTION ONE HALF -10A50..10A58;N # Po [9] KHAROSHTHI PUNCTUATION DOT..KHAROSHTHI PUNCTUATION LINES -10A60..10A7C;N # Lo [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH -10A7D..10A7E;N # No [2] OLD SOUTH ARABIAN NUMBER ONE..OLD SOUTH ARABIAN NUMBER FIFTY -10A7F;N # Po OLD SOUTH ARABIAN NUMERIC INDICATOR -10A80..10A9C;N # Lo [29] OLD NORTH ARABIAN LETTER HEH..OLD NORTH ARABIAN LETTER ZAH -10A9D..10A9F;N # No [3] OLD NORTH ARABIAN NUMBER ONE..OLD NORTH ARABIAN NUMBER TWENTY -10AC0..10AC7;N # Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW -10AC8;N # So MANICHAEAN SIGN UD -10AC9..10AE4;N # Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW -10AE5..10AE6;N # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW -10AEB..10AEF;N # No [5] MANICHAEAN NUMBER ONE..MANICHAEAN NUMBER ONE HUNDRED -10AF0..10AF6;N # Po [7] MANICHAEAN PUNCTUATION STAR..MANICHAEAN PUNCTUATION LINE FILLER -10B00..10B35;N # Lo [54] AVESTAN LETTER A..AVESTAN LETTER HE -10B39..10B3F;N # Po [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION -10B40..10B55;N # Lo [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW -10B58..10B5F;N # No [8] INSCRIPTIONAL PARTHIAN NUMBER ONE..INSCRIPTIONAL PARTHIAN NUMBER ONE THOUSAND -10B60..10B72;N # Lo [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW -10B78..10B7F;N # No [8] INSCRIPTIONAL PAHLAVI NUMBER ONE..INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND -10B80..10B91;N # Lo [18] PSALTER PAHLAVI LETTER ALEPH..PSALTER PAHLAVI LETTER TAW -10B99..10B9C;N # Po [4] PSALTER PAHLAVI SECTION MARK..PSALTER PAHLAVI FOUR DOTS WITH DOT -10BA9..10BAF;N # No [7] PSALTER PAHLAVI NUMBER ONE..PSALTER PAHLAVI NUMBER ONE HUNDRED -10C00..10C48;N # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH -10C80..10CB2;N # Lu [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US -10CC0..10CF2;N # Ll [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US -10CFA..10CFF;N # No [6] OLD HUNGARIAN NUMBER ONE..OLD HUNGARIAN NUMBER ONE THOUSAND -10D00..10D23;N # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA -10D24..10D27;N # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI -10D30..10D39;N # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE -10E60..10E7E;N # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS -10E80..10EA9;N # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET -10EAB..10EAC;N # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EAD;N # Pd YEZIDI HYPHENATION MARK -10EB0..10EB1;N # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE -10EFD..10EFF;N # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA -10F00..10F1C;N # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL -10F1D..10F26;N # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF -10F27;N # Lo OLD SOGDIAN LIGATURE AYIN-DALETH -10F30..10F45;N # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN -10F46..10F50;N # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW -10F51..10F54;N # No [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED -10F55..10F59;N # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT -10F70..10F81;N # Lo [18] OLD UYGHUR LETTER ALEPH..OLD UYGHUR LETTER LESH -10F82..10F85;N # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW -10F86..10F89;N # Po [4] OLD UYGHUR PUNCTUATION BAR..OLD UYGHUR PUNCTUATION FOUR DOTS -10FB0..10FC4;N # Lo [21] CHORASMIAN LETTER ALEPH..CHORASMIAN LETTER TAW -10FC5..10FCB;N # No [7] CHORASMIAN NUMBER ONE..CHORASMIAN NUMBER ONE HUNDRED -10FE0..10FF6;N # Lo [23] ELYMAIC LETTER ALEPH..ELYMAIC LIGATURE ZAYIN-YODH -11000;N # Mc BRAHMI SIGN CANDRABINDU -11001;N # Mn BRAHMI SIGN ANUSVARA -11002;N # Mc BRAHMI SIGN VISARGA -11003..11037;N # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA -11038..11046;N # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA -11047..1104D;N # Po [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS -11052..11065;N # No [20] BRAHMI NUMBER ONE..BRAHMI NUMBER ONE THOUSAND -11066..1106F;N # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE -11070;N # Mn BRAHMI SIGN OLD TAMIL VIRAMA -11071..11072;N # Lo [2] BRAHMI LETTER OLD TAMIL SHORT E..BRAHMI LETTER OLD TAMIL SHORT O -11073..11074;N # Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O -11075;N # Lo BRAHMI LETTER OLD TAMIL LLA -1107F;N # Mn BRAHMI NUMBER JOINER -11080..11081;N # Mn [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA -11082;N # Mc KAITHI SIGN VISARGA -11083..110AF;N # Lo [45] KAITHI LETTER A..KAITHI LETTER HA -110B0..110B2;N # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II -110B3..110B6;N # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI -110B7..110B8;N # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU -110B9..110BA;N # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA -110BB..110BC;N # Po [2] KAITHI ABBREVIATION SIGN..KAITHI ENUMERATION SIGN -110BD;N # Cf KAITHI NUMBER SIGN -110BE..110C1;N # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA -110C2;N # Mn KAITHI VOWEL SIGN VOCALIC R -110CD;N # Cf KAITHI NUMBER SIGN ABOVE -110D0..110E8;N # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE -110F0..110F9;N # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE -11100..11102;N # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA -11103..11126;N # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA -11127..1112B;N # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU -1112C;N # Mc CHAKMA VOWEL SIGN E -1112D..11134;N # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA -11136..1113F;N # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE -11140..11143;N # Po [4] CHAKMA SECTION MARK..CHAKMA QUESTION MARK -11144;N # Lo CHAKMA LETTER LHAA -11145..11146;N # Mc [2] CHAKMA VOWEL SIGN AA..CHAKMA VOWEL SIGN EI -11147;N # Lo CHAKMA LETTER VAA -11150..11172;N # Lo [35] MAHAJANI LETTER A..MAHAJANI LETTER RRA -11173;N # Mn MAHAJANI SIGN NUKTA -11174..11175;N # Po [2] MAHAJANI ABBREVIATION SIGN..MAHAJANI SECTION MARK -11176;N # Lo MAHAJANI LIGATURE SHRI -11180..11181;N # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA -11182;N # Mc SHARADA SIGN VISARGA -11183..111B2;N # Lo [48] SHARADA LETTER A..SHARADA LETTER HA -111B3..111B5;N # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II -111B6..111BE;N # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O -111BF..111C0;N # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA -111C1..111C4;N # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM -111C5..111C8;N # Po [4] SHARADA DANDA..SHARADA SEPARATOR -111C9..111CC;N # Mn [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK -111CD;N # Po SHARADA SUTRA MARK -111CE;N # Mc SHARADA VOWEL SIGN PRISHTHAMATRA E -111CF;N # Mn SHARADA SIGN INVERTED CANDRABINDU -111D0..111D9;N # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE -111DA;N # Lo SHARADA EKAM -111DB;N # Po SHARADA SIGN SIDDHAM -111DC;N # Lo SHARADA HEADSTROKE -111DD..111DF;N # Po [3] SHARADA CONTINUATION SIGN..SHARADA SECTION MARK-2 -111E1..111F4;N # No [20] SINHALA ARCHAIC DIGIT ONE..SINHALA ARCHAIC NUMBER ONE THOUSAND -11200..11211;N # Lo [18] KHOJKI LETTER A..KHOJKI LETTER JJA -11213..1122B;N # Lo [25] KHOJKI LETTER NYA..KHOJKI LETTER LLA -1122C..1122E;N # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II -1122F..11231;N # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI -11232..11233;N # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU -11234;N # Mn KHOJKI SIGN ANUSVARA -11235;N # Mc KHOJKI SIGN VIRAMA -11236..11237;N # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA -11238..1123D;N # Po [6] KHOJKI DANDA..KHOJKI ABBREVIATION SIGN -1123E;N # Mn KHOJKI SIGN SUKUN -1123F..11240;N # Lo [2] KHOJKI LETTER QA..KHOJKI LETTER SHORT I -11241;N # Mn KHOJKI VOWEL SIGN VOCALIC R -11280..11286;N # Lo [7] MULTANI LETTER A..MULTANI LETTER GA -11288;N # Lo MULTANI LETTER GHA -1128A..1128D;N # Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA -1128F..1129D;N # Lo [15] MULTANI LETTER NYA..MULTANI LETTER BA -1129F..112A8;N # Lo [10] MULTANI LETTER BHA..MULTANI LETTER RHA -112A9;N # Po MULTANI SECTION MARK -112B0..112DE;N # Lo [47] KHUDAWADI LETTER A..KHUDAWADI LETTER HA -112DF;N # Mn KHUDAWADI SIGN ANUSVARA -112E0..112E2;N # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II -112E3..112EA;N # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA -112F0..112F9;N # Nd [10] KHUDAWADI DIGIT ZERO..KHUDAWADI DIGIT NINE -11300..11301;N # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU -11302..11303;N # Mc [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA -11305..1130C;N # Lo [8] GRANTHA LETTER A..GRANTHA LETTER VOCALIC L -1130F..11310;N # Lo [2] GRANTHA LETTER EE..GRANTHA LETTER AI -11313..11328;N # Lo [22] GRANTHA LETTER OO..GRANTHA LETTER NA -1132A..11330;N # Lo [7] GRANTHA LETTER PA..GRANTHA LETTER RA -11332..11333;N # Lo [2] GRANTHA LETTER LA..GRANTHA LETTER LLA -11335..11339;N # Lo [5] GRANTHA LETTER VA..GRANTHA LETTER HA -1133B..1133C;N # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA -1133D;N # Lo GRANTHA SIGN AVAGRAHA -1133E..1133F;N # Mc [2] GRANTHA VOWEL SIGN AA..GRANTHA VOWEL SIGN I -11340;N # Mn GRANTHA VOWEL SIGN II -11341..11344;N # Mc [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR -11347..11348;N # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI -1134B..1134D;N # Mc [3] GRANTHA VOWEL SIGN OO..GRANTHA SIGN VIRAMA -11350;N # Lo GRANTHA OM -11357;N # Mc GRANTHA AU LENGTH MARK -1135D..11361;N # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL -11362..11363;N # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL -11366..1136C;N # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX -11370..11374;N # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA -11400..11434;N # Lo [53] NEWA LETTER A..NEWA LETTER HA -11435..11437;N # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II -11438..1143F;N # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI -11440..11441;N # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU -11442..11444;N # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA -11445;N # Mc NEWA SIGN VISARGA -11446;N # Mn NEWA SIGN NUKTA -11447..1144A;N # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI -1144B..1144F;N # Po [5] NEWA DANDA..NEWA ABBREVIATION SIGN -11450..11459;N # Nd [10] NEWA DIGIT ZERO..NEWA DIGIT NINE -1145A..1145B;N # Po [2] NEWA DOUBLE COMMA..NEWA PLACEHOLDER MARK -1145D;N # Po NEWA INSERTION SIGN -1145E;N # Mn NEWA SANDHI MARK -1145F..11461;N # Lo [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA -11480..114AF;N # Lo [48] TIRHUTA ANJI..TIRHUTA LETTER HA -114B0..114B2;N # Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II -114B3..114B8;N # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL -114B9;N # Mc TIRHUTA VOWEL SIGN E -114BA;N # Mn TIRHUTA VOWEL SIGN SHORT E -114BB..114BE;N # Mc [4] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN AU -114BF..114C0;N # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA -114C1;N # Mc TIRHUTA SIGN VISARGA -114C2..114C3;N # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA -114C4..114C5;N # Lo [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG -114C6;N # Po TIRHUTA ABBREVIATION SIGN -114C7;N # Lo TIRHUTA OM -114D0..114D9;N # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE -11580..115AE;N # Lo [47] SIDDHAM LETTER A..SIDDHAM LETTER HA -115AF..115B1;N # Mc [3] SIDDHAM VOWEL SIGN AA..SIDDHAM VOWEL SIGN II -115B2..115B5;N # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR -115B8..115BB;N # Mc [4] SIDDHAM VOWEL SIGN E..SIDDHAM VOWEL SIGN AU -115BC..115BD;N # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA -115BE;N # Mc SIDDHAM SIGN VISARGA -115BF..115C0;N # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA -115C1..115D7;N # Po [23] SIDDHAM SIGN SIDDHAM..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES -115D8..115DB;N # Lo [4] SIDDHAM LETTER THREE-CIRCLE ALTERNATE I..SIDDHAM LETTER ALTERNATE U -115DC..115DD;N # Mn [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU -11600..1162F;N # Lo [48] MODI LETTER A..MODI LETTER LLA -11630..11632;N # Mc [3] MODI VOWEL SIGN AA..MODI VOWEL SIGN II -11633..1163A;N # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI -1163B..1163C;N # Mc [2] MODI VOWEL SIGN O..MODI VOWEL SIGN AU -1163D;N # Mn MODI SIGN ANUSVARA -1163E;N # Mc MODI SIGN VISARGA -1163F..11640;N # Mn [2] MODI SIGN VIRAMA..MODI SIGN ARDHACANDRA -11641..11643;N # Po [3] MODI DANDA..MODI ABBREVIATION SIGN -11644;N # Lo MODI SIGN HUVA -11650..11659;N # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE -11660..1166C;N # Po [13] MONGOLIAN BIRGA WITH ORNAMENT..MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT -11680..116AA;N # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA -116AB;N # Mn TAKRI SIGN ANUSVARA -116AC;N # Mc TAKRI SIGN VISARGA -116AD;N # Mn TAKRI VOWEL SIGN AA -116AE..116AF;N # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II -116B0..116B5;N # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU -116B6;N # Mc TAKRI SIGN VIRAMA -116B7;N # Mn TAKRI SIGN NUKTA -116B8;N # Lo TAKRI LETTER ARCHAIC KHA -116B9;N # Po TAKRI ABBREVIATION SIGN -116C0..116C9;N # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE -11700..1171A;N # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA -1171D..1171F;N # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA -11720..11721;N # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA -11722..11725;N # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU -11726;N # Mc AHOM VOWEL SIGN E -11727..1172B;N # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER -11730..11739;N # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE -1173A..1173B;N # No [2] AHOM NUMBER TEN..AHOM NUMBER TWENTY -1173C..1173E;N # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI -1173F;N # So AHOM SYMBOL VI -11740..11746;N # Lo [7] AHOM LETTER CA..AHOM LETTER LLA -11800..1182B;N # Lo [44] DOGRA LETTER A..DOGRA LETTER RRA -1182C..1182E;N # Mc [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II -1182F..11837;N # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA -11838;N # Mc DOGRA SIGN VISARGA -11839..1183A;N # Mn [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA -1183B;N # Po DOGRA ABBREVIATION SIGN -118A0..118DF;N # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO -118E0..118E9;N # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE -118EA..118F2;N # No [9] WARANG CITI NUMBER TEN..WARANG CITI NUMBER NINETY -118FF;N # Lo WARANG CITI OM -11900..11906;N # Lo [7] DIVES AKURU LETTER A..DIVES AKURU LETTER E -11909;N # Lo DIVES AKURU LETTER O -1190C..11913;N # Lo [8] DIVES AKURU LETTER KA..DIVES AKURU LETTER JA -11915..11916;N # Lo [2] DIVES AKURU LETTER NYA..DIVES AKURU LETTER TTA -11918..1192F;N # Lo [24] DIVES AKURU LETTER DDA..DIVES AKURU LETTER ZA -11930..11935;N # Mc [6] DIVES AKURU VOWEL SIGN AA..DIVES AKURU VOWEL SIGN E -11937..11938;N # Mc [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O -1193B..1193C;N # Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU -1193D;N # Mc DIVES AKURU SIGN HALANTA -1193E;N # Mn DIVES AKURU VIRAMA -1193F;N # Lo DIVES AKURU PREFIXED NASAL SIGN -11940;N # Mc DIVES AKURU MEDIAL YA -11941;N # Lo DIVES AKURU INITIAL RA -11942;N # Mc DIVES AKURU MEDIAL RA -11943;N # Mn DIVES AKURU SIGN NUKTA -11944..11946;N # Po [3] DIVES AKURU DOUBLE DANDA..DIVES AKURU END OF TEXT MARK -11950..11959;N # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE -119A0..119A7;N # Lo [8] NANDINAGARI LETTER A..NANDINAGARI LETTER VOCALIC RR -119AA..119D0;N # Lo [39] NANDINAGARI LETTER E..NANDINAGARI LETTER RRA -119D1..119D3;N # Mc [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II -119D4..119D7;N # Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR -119DA..119DB;N # Mn [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI -119DC..119DF;N # Mc [4] NANDINAGARI VOWEL SIGN O..NANDINAGARI SIGN VISARGA -119E0;N # Mn NANDINAGARI SIGN VIRAMA -119E1;N # Lo NANDINAGARI SIGN AVAGRAHA -119E2;N # Po NANDINAGARI SIGN SIDDHAM -119E3;N # Lo NANDINAGARI HEADSTROKE -119E4;N # Mc NANDINAGARI VOWEL SIGN PRISHTHAMATRA E -11A00;N # Lo ZANABAZAR SQUARE LETTER A -11A01..11A0A;N # Mn [10] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL LENGTH MARK -11A0B..11A32;N # Lo [40] ZANABAZAR SQUARE LETTER KA..ZANABAZAR SQUARE LETTER KSSA -11A33..11A38;N # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA -11A39;N # Mc ZANABAZAR SQUARE SIGN VISARGA -11A3A;N # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA -11A3B..11A3E;N # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA -11A3F..11A46;N # Po [8] ZANABAZAR SQUARE INITIAL HEAD MARK..ZANABAZAR SQUARE CLOSING DOUBLE-LINED HEAD MARK -11A47;N # Mn ZANABAZAR SQUARE SUBJOINER -11A50;N # Lo SOYOMBO LETTER A -11A51..11A56;N # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE -11A57..11A58;N # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU -11A59..11A5B;N # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK -11A5C..11A89;N # Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA -11A8A..11A96;N # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA -11A97;N # Mc SOYOMBO SIGN VISARGA -11A98..11A99;N # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER -11A9A..11A9C;N # Po [3] SOYOMBO MARK TSHEG..SOYOMBO MARK DOUBLE SHAD -11A9D;N # Lo SOYOMBO MARK PLUTA -11A9E..11AA2;N # Po [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2 -11AB0..11ABF;N # Lo [16] CANADIAN SYLLABICS NATTILIK HI..CANADIAN SYLLABICS SPA -11AC0..11AF8;N # Lo [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL -11B00..11B09;N # Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU -11C00..11C08;N # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L -11C0A..11C2E;N # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA -11C2F;N # Mc BHAIKSUKI VOWEL SIGN AA -11C30..11C36;N # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L -11C38..11C3D;N # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA -11C3E;N # Mc BHAIKSUKI SIGN VISARGA -11C3F;N # Mn BHAIKSUKI SIGN VIRAMA -11C40;N # Lo BHAIKSUKI SIGN AVAGRAHA -11C41..11C45;N # Po [5] BHAIKSUKI DANDA..BHAIKSUKI GAP FILLER-2 -11C50..11C59;N # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE -11C5A..11C6C;N # No [19] BHAIKSUKI NUMBER ONE..BHAIKSUKI HUNDREDS UNIT MARK -11C70..11C71;N # Po [2] MARCHEN HEAD MARK..MARCHEN MARK SHAD -11C72..11C8F;N # Lo [30] MARCHEN LETTER KA..MARCHEN LETTER A -11C92..11CA7;N # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA -11CA9;N # Mc MARCHEN SUBJOINED LETTER YA -11CAA..11CB0;N # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA -11CB1;N # Mc MARCHEN VOWEL SIGN I -11CB2..11CB3;N # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E -11CB4;N # Mc MARCHEN VOWEL SIGN O -11CB5..11CB6;N # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU -11D00..11D06;N # Lo [7] MASARAM GONDI LETTER A..MASARAM GONDI LETTER E -11D08..11D09;N # Lo [2] MASARAM GONDI LETTER AI..MASARAM GONDI LETTER O -11D0B..11D30;N # Lo [38] MASARAM GONDI LETTER AU..MASARAM GONDI LETTER TRA -11D31..11D36;N # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R -11D3A;N # Mn MASARAM GONDI VOWEL SIGN E -11D3C..11D3D;N # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O -11D3F..11D45;N # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA -11D46;N # Lo MASARAM GONDI REPHA -11D47;N # Mn MASARAM GONDI RA-KARA -11D50..11D59;N # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE -11D60..11D65;N # Lo [6] GUNJALA GONDI LETTER A..GUNJALA GONDI LETTER UU -11D67..11D68;N # Lo [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI -11D6A..11D89;N # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA -11D8A..11D8E;N # Mc [5] GUNJALA GONDI VOWEL SIGN AA..GUNJALA GONDI VOWEL SIGN UU -11D90..11D91;N # Mn [2] GUNJALA GONDI VOWEL SIGN EE..GUNJALA GONDI VOWEL SIGN AI -11D93..11D94;N # Mc [2] GUNJALA GONDI VOWEL SIGN OO..GUNJALA GONDI VOWEL SIGN AU -11D95;N # Mn GUNJALA GONDI SIGN ANUSVARA -11D96;N # Mc GUNJALA GONDI SIGN VISARGA -11D97;N # Mn GUNJALA GONDI VIRAMA -11D98;N # Lo GUNJALA GONDI OM -11DA0..11DA9;N # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE -11EE0..11EF2;N # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA -11EF3..11EF4;N # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U -11EF5..11EF6;N # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O -11EF7..11EF8;N # Po [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION -11F00..11F01;N # Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA -11F02;N # Lo KAWI SIGN REPHA -11F03;N # Mc KAWI SIGN VISARGA -11F04..11F10;N # Lo [13] KAWI LETTER A..KAWI LETTER O -11F12..11F33;N # Lo [34] KAWI LETTER KA..KAWI LETTER JNYA -11F34..11F35;N # Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA -11F36..11F3A;N # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R -11F3E..11F3F;N # Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI -11F40;N # Mn KAWI VOWEL SIGN EU -11F41;N # Mc KAWI SIGN KILLER -11F42;N # Mn KAWI CONJOINER -11F43..11F4F;N # Po [13] KAWI DANDA..KAWI PUNCTUATION CLOSING SPIRAL -11F50..11F59;N # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE -11FB0;N # Lo LISU LETTER YHA -11FC0..11FD4;N # No [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH -11FD5..11FDC;N # So [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI -11FDD..11FE0;N # Sc [4] TAMIL SIGN KAACU..TAMIL SIGN VARAAKAN -11FE1..11FF1;N # So [17] TAMIL SIGN PAARAM..TAMIL SIGN VAKAIYARAA -11FFF;N # Po TAMIL PUNCTUATION END OF TEXT -12000..12399;N # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U -12400..1246E;N # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM -12470..12474;N # Po [5] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON -12480..12543;N # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU -12F90..12FF0;N # Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 -12FF1..12FF2;N # Po [2] CYPRO-MINOAN SIGN CM301..CYPRO-MINOAN SIGN CM302 -13000..1342F;N # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D -13430..13440;N # Cf [17] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY -13441..13446;N # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN -13447..13455;N # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED -14400..14646;N # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 -16800..16A38;N # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ -16A40..16A5E;N # Lo [31] MRO LETTER TA..MRO LETTER TEK -16A60..16A69;N # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE -16A6E..16A6F;N # Po [2] MRO DANDA..MRO DOUBLE DANDA -16A70..16ABE;N # Lo [79] TANGSA LETTER OZ..TANGSA LETTER ZA -16AC0..16AC9;N # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE -16AD0..16AED;N # Lo [30] BASSA VAH LETTER ENNI..BASSA VAH LETTER I -16AF0..16AF4;N # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE -16AF5;N # Po BASSA VAH FULL STOP -16B00..16B2F;N # Lo [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU -16B30..16B36;N # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM -16B37..16B3B;N # Po [5] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN VOS FEEM -16B3C..16B3F;N # So [4] PAHAWH HMONG SIGN XYEEM NTXIV..PAHAWH HMONG SIGN XYEEM FAIB -16B40..16B43;N # Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM -16B44;N # Po PAHAWH HMONG SIGN XAUS -16B45;N # So PAHAWH HMONG SIGN CIM TSOV ROG -16B50..16B59;N # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE -16B5B..16B61;N # No [7] PAHAWH HMONG NUMBER TENS..PAHAWH HMONG NUMBER TRILLIONS -16B63..16B77;N # Lo [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS -16B7D..16B8F;N # Lo [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ -16E40..16E7F;N # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y -16E80..16E96;N # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM -16E97..16E9A;N # Po [4] MEDEFAIDRIN COMMA..MEDEFAIDRIN EXCLAMATION OH -16F00..16F4A;N # Lo [75] MIAO LETTER PA..MIAO LETTER RTE -16F4F;N # Mn MIAO SIGN CONSONANT MODIFIER BAR -16F50;N # Lo MIAO LETTER NASALIZATION -16F51..16F87;N # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI -16F8F..16F92;N # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW -16F93..16F9F;N # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 -16FE0..16FE1;W # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK -16FE2;W # Po OLD CHINESE HOOK MARK -16FE3;W # Lm OLD CHINESE ITERATION MARK -16FE4;W # Mn KHITAN SMALL SCRIPT FILLER -16FF0..16FF1;W # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY -17000..187F7;W # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18AFF;W # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 -18B00..18CD5;W # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08;W # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 -1AFF0..1AFF3;W # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 -1AFF5..1AFFB;W # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 -1AFFD..1AFFE;W # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 -1B000..1B0FF;W # Lo [256] KATAKANA LETTER ARCHAIC E..HENTAIGANA LETTER RE-2 -1B100..1B122;W # Lo [35] HENTAIGANA LETTER RE-3..KATAKANA LETTER ARCHAIC WU -1B132;W # Lo HIRAGANA LETTER SMALL KO -1B150..1B152;W # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO -1B155;W # Lo KATAKANA LETTER SMALL KO -1B164..1B167;W # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N -1B170..1B2FB;W # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB -1BC00..1BC6A;N # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M -1BC70..1BC7C;N # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK -1BC80..1BC88;N # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL -1BC90..1BC99;N # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW -1BC9C;N # So DUPLOYAN SIGN O WITH CROSS -1BC9D..1BC9E;N # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK -1BC9F;N # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP -1BCA0..1BCA3;N # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP -1CF00..1CF2D;N # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT -1CF30..1CF46;N # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG -1CF50..1CFC3;N # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK -1D000..1D0F5;N # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO -1D100..1D126;N # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 -1D129..1D164;N # So [60] MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE -1D165..1D166;N # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM -1D167..1D169;N # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 -1D16A..1D16C;N # So [3] MUSICAL SYMBOL FINGERED TREMOLO-1..MUSICAL SYMBOL FINGERED TREMOLO-3 -1D16D..1D172;N # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 -1D173..1D17A;N # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE -1D17B..1D182;N # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE -1D183..1D184;N # So [2] MUSICAL SYMBOL ARPEGGIATO UP..MUSICAL SYMBOL ARPEGGIATO DOWN -1D185..1D18B;N # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE -1D18C..1D1A9;N # So [30] MUSICAL SYMBOL RINFORZANDO..MUSICAL SYMBOL DEGREE SLASH -1D1AA..1D1AD;N # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO -1D1AE..1D1EA;N # So [61] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL KORON -1D200..1D241;N # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54 -1D242..1D244;N # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME -1D245;N # So GREEK MUSICAL LEIMMA -1D2C0..1D2D3;N # No [20] KAKTOVIK NUMERAL ZERO..KAKTOVIK NUMERAL NINETEEN -1D2E0..1D2F3;N # No [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN -1D300..1D356;N # So [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING -1D360..1D378;N # No [25] COUNTING ROD UNIT DIGIT ONE..TALLY MARK FIVE -1D400..1D454;N # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G -1D456..1D49C;N # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A -1D49E..1D49F;N # Lu [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D -1D4A2;N # Lu MATHEMATICAL SCRIPT CAPITAL G -1D4A5..1D4A6;N # Lu [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K -1D4A9..1D4AC;N # Lu [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q -1D4AE..1D4B9;N # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D -1D4BB;N # Ll MATHEMATICAL SCRIPT SMALL F -1D4BD..1D4C3;N # Ll [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N -1D4C5..1D505;N # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B -1D507..1D50A;N # Lu [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G -1D50D..1D514;N # Lu [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q -1D516..1D51C;N # Lu [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y -1D51E..1D539;N # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B -1D53B..1D53E;N # Lu [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G -1D540..1D544;N # Lu [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M -1D546;N # Lu MATHEMATICAL DOUBLE-STRUCK CAPITAL O -1D54A..1D550;N # Lu [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y -1D552..1D6A5;N # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J -1D6A8..1D6C0;N # Lu [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA -1D6C1;N # Sm MATHEMATICAL BOLD NABLA -1D6C2..1D6DA;N # Ll [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA -1D6DB;N # Sm MATHEMATICAL BOLD PARTIAL DIFFERENTIAL -1D6DC..1D6FA;N # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA -1D6FB;N # Sm MATHEMATICAL ITALIC NABLA -1D6FC..1D714;N # Ll [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA -1D715;N # Sm MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL -1D716..1D734;N # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA -1D735;N # Sm MATHEMATICAL BOLD ITALIC NABLA -1D736..1D74E;N # Ll [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA -1D74F;N # Sm MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL -1D750..1D76E;N # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA -1D76F;N # Sm MATHEMATICAL SANS-SERIF BOLD NABLA -1D770..1D788;N # Ll [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA -1D789;N # Sm MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL -1D78A..1D7A8;N # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA -1D7A9;N # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA -1D7AA..1D7C2;N # Ll [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA -1D7C3;N # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL -1D7C4..1D7CB;N # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA -1D7CE..1D7FF;N # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE -1D800..1D9FF;N # So [512] SIGNWRITING HAND-FIST INDEX..SIGNWRITING HEAD -1DA00..1DA36;N # Mn [55] SIGNWRITING HEAD RIM..SIGNWRITING AIR SUCKING IN -1DA37..1DA3A;N # So [4] SIGNWRITING AIR BLOW SMALL ROTATIONS..SIGNWRITING BREATH EXHALE -1DA3B..1DA6C;N # Mn [50] SIGNWRITING MOUTH CLOSED NEUTRAL..SIGNWRITING EXCITEMENT -1DA6D..1DA74;N # So [8] SIGNWRITING SHOULDER HIP SPINE..SIGNWRITING TORSO-FLOORPLANE TWISTING -1DA75;N # Mn SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS -1DA76..1DA83;N # So [14] SIGNWRITING LIMB COMBINATION..SIGNWRITING LOCATION DEPTH -1DA84;N # Mn SIGNWRITING LOCATION HEAD NECK -1DA85..1DA86;N # So [2] SIGNWRITING LOCATION TORSO..SIGNWRITING LOCATION LIMBS DIGITS -1DA87..1DA8B;N # Po [5] SIGNWRITING COMMA..SIGNWRITING PARENTHESIS -1DA9B..1DA9F;N # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6 -1DAA1..1DAAF;N # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16 -1DF00..1DF09;N # Ll [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK -1DF0A;N # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK -1DF0B..1DF1E;N # Ll [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2A;N # Ll [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK -1E000..1E006;N # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE -1E008..1E018;N # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU -1E01B..1E021;N # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI -1E023..1E024;N # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS -1E026..1E02A;N # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA -1E030..1E06D;N # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE -1E08F;N # Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I -1E100..1E12C;N # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W -1E130..1E136;N # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D -1E137..1E13D;N # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER -1E140..1E149;N # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE -1E14E;N # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ -1E14F;N # So NYIAKENG PUACHUE HMONG CIRCLED CA -1E290..1E2AD;N # Lo [30] TOTO LETTER PA..TOTO LETTER A -1E2AE;N # Mn TOTO SIGN RISING TONE -1E2C0..1E2EB;N # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH -1E2EC..1E2EF;N # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI -1E2F0..1E2F9;N # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE -1E2FF;N # Sc WANCHO NGUN SIGN -1E4D0..1E4EA;N # Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL -1E4EB;N # Lm NAG MUNDARI SIGN OJOD -1E4EC..1E4EF;N # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH -1E4F0..1E4F9;N # Nd [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE -1E7E0..1E7E6;N # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO -1E7E8..1E7EB;N # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE -1E7ED..1E7EE;N # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE -1E7F0..1E7FE;N # Lo [15] ETHIOPIC SYLLABLE GURAGE QWI..ETHIOPIC SYLLABLE GURAGE PWEE -1E800..1E8C4;N # Lo [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON -1E8C7..1E8CF;N # No [9] MENDE KIKAKUI DIGIT ONE..MENDE KIKAKUI DIGIT NINE -1E8D0..1E8D6;N # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS -1E900..1E943;N # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA -1E944..1E94A;N # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA -1E94B;N # Lm ADLAM NASALIZATION MARK -1E950..1E959;N # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE -1E95E..1E95F;N # Po [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK -1EC71..1ECAB;N # No [59] INDIC SIYAQ NUMBER ONE..INDIC SIYAQ NUMBER PREFIXED NINE -1ECAC;N # So INDIC SIYAQ PLACEHOLDER -1ECAD..1ECAF;N # No [3] INDIC SIYAQ FRACTION ONE QUARTER..INDIC SIYAQ FRACTION THREE QUARTERS -1ECB0;N # Sc INDIC SIYAQ RUPEE MARK -1ECB1..1ECB4;N # No [4] INDIC SIYAQ NUMBER ALTERNATE ONE..INDIC SIYAQ ALTERNATE LAKH MARK -1ED01..1ED2D;N # No [45] OTTOMAN SIYAQ NUMBER ONE..OTTOMAN SIYAQ NUMBER NINETY THOUSAND -1ED2E;N # So OTTOMAN SIYAQ MARRATAN -1ED2F..1ED3D;N # No [15] OTTOMAN SIYAQ ALTERNATE NUMBER TWO..OTTOMAN SIYAQ FRACTION ONE SIXTH -1EE00..1EE03;N # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL -1EE05..1EE1F;N # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF -1EE21..1EE22;N # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM -1EE24;N # Lo ARABIC MATHEMATICAL INITIAL HEH -1EE27;N # Lo ARABIC MATHEMATICAL INITIAL HAH -1EE29..1EE32;N # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF -1EE34..1EE37;N # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH -1EE39;N # Lo ARABIC MATHEMATICAL INITIAL DAD -1EE3B;N # Lo ARABIC MATHEMATICAL INITIAL GHAIN -1EE42;N # Lo ARABIC MATHEMATICAL TAILED JEEM -1EE47;N # Lo ARABIC MATHEMATICAL TAILED HAH -1EE49;N # Lo ARABIC MATHEMATICAL TAILED YEH -1EE4B;N # Lo ARABIC MATHEMATICAL TAILED LAM -1EE4D..1EE4F;N # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN -1EE51..1EE52;N # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF -1EE54;N # Lo ARABIC MATHEMATICAL TAILED SHEEN -1EE57;N # Lo ARABIC MATHEMATICAL TAILED KHAH -1EE59;N # Lo ARABIC MATHEMATICAL TAILED DAD -1EE5B;N # Lo ARABIC MATHEMATICAL TAILED GHAIN -1EE5D;N # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON -1EE5F;N # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF -1EE61..1EE62;N # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM -1EE64;N # Lo ARABIC MATHEMATICAL STRETCHED HEH -1EE67..1EE6A;N # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF -1EE6C..1EE72;N # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF -1EE74..1EE77;N # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH -1EE79..1EE7C;N # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH -1EE7E;N # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH -1EE80..1EE89;N # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH -1EE8B..1EE9B;N # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN -1EEA1..1EEA3;N # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL -1EEA5..1EEA9;N # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH -1EEAB..1EEBB;N # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN -1EEF0..1EEF1;N # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL -1F000..1F003;N # So [4] MAHJONG TILE EAST WIND..MAHJONG TILE NORTH WIND -1F004;W # So MAHJONG TILE RED DRAGON -1F005..1F02B;N # So [39] MAHJONG TILE GREEN DRAGON..MAHJONG TILE BACK -1F030..1F093;N # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06 -1F0A0..1F0AE;N # So [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES -1F0B1..1F0BF;N # So [15] PLAYING CARD ACE OF HEARTS..PLAYING CARD RED JOKER -1F0C1..1F0CE;N # So [14] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD KING OF DIAMONDS -1F0CF;W # So PLAYING CARD BLACK JOKER -1F0D1..1F0F5;N # So [37] PLAYING CARD ACE OF CLUBS..PLAYING CARD TRUMP-21 -1F100..1F10A;A # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA -1F10B..1F10C;N # No [2] DINGBAT CIRCLED SANS-SERIF DIGIT ZERO..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO -1F10D..1F10F;N # So [3] CIRCLED ZERO WITH SLASH..CIRCLED DOLLAR SIGN WITH OVERLAID BACKSLASH -1F110..1F12D;A # So [30] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED CD -1F12E..1F12F;N # So [2] CIRCLED WZ..COPYLEFT SYMBOL -1F130..1F169;A # So [58] SQUARED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z -1F16A..1F16F;N # So [6] RAISED MC SIGN..CIRCLED HUMAN FIGURE -1F170..1F18D;A # So [30] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED SA -1F18E;W # So NEGATIVE SQUARED AB -1F18F..1F190;A # So [2] NEGATIVE SQUARED WC..SQUARE DJ -1F191..1F19A;W # So [10] SQUARED CL..SQUARED VS -1F19B..1F1AC;A # So [18] SQUARED THREE D..SQUARED VOD -1F1AD;N # So MASK WORK SYMBOL -1F1E6..1F1FF;N # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z -1F200..1F202;W # So [3] SQUARE HIRAGANA HOKA..SQUARED KATAKANA SA -1F210..1F23B;W # So [44] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-914D -1F240..1F248;W # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 -1F250..1F251;W # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT -1F260..1F265;W # So [6] ROUNDED SYMBOL FOR FU..ROUNDED SYMBOL FOR CAI -1F300..1F320;W # So [33] CYCLONE..SHOOTING STAR -1F321..1F32C;N # So [12] THERMOMETER..WIND BLOWING FACE -1F32D..1F335;W # So [9] HOT DOG..CACTUS -1F336;N # So HOT PEPPER -1F337..1F37C;W # So [70] TULIP..BABY BOTTLE -1F37D;N # So FORK AND KNIFE WITH PLATE -1F37E..1F393;W # So [22] BOTTLE WITH POPPING CORK..GRADUATION CAP -1F394..1F39F;N # So [12] HEART WITH TIP ON THE LEFT..ADMISSION TICKETS -1F3A0..1F3CA;W # So [43] CAROUSEL HORSE..SWIMMER -1F3CB..1F3CE;N # So [4] WEIGHT LIFTER..RACING CAR -1F3CF..1F3D3;W # So [5] CRICKET BAT AND BALL..TABLE TENNIS PADDLE AND BALL -1F3D4..1F3DF;N # So [12] SNOW CAPPED MOUNTAIN..STADIUM -1F3E0..1F3F0;W # So [17] HOUSE BUILDING..EUROPEAN CASTLE -1F3F1..1F3F3;N # So [3] WHITE PENNANT..WAVING WHITE FLAG -1F3F4;W # So WAVING BLACK FLAG -1F3F5..1F3F7;N # So [3] ROSETTE..LABEL -1F3F8..1F3FA;W # So [3] BADMINTON RACQUET AND SHUTTLECOCK..AMPHORA -1F3FB..1F3FF;W # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6 -1F400..1F43E;W # So [63] RAT..PAW PRINTS -1F43F;N # So CHIPMUNK -1F440;W # So EYES -1F441;N # So EYE -1F442..1F4FC;W # So [187] EAR..VIDEOCASSETTE -1F4FD..1F4FE;N # So [2] FILM PROJECTOR..PORTABLE STEREO -1F4FF..1F53D;W # So [63] PRAYER BEADS..DOWN-POINTING SMALL RED TRIANGLE -1F53E..1F54A;N # So [13] LOWER RIGHT SHADOWED WHITE CIRCLE..DOVE OF PEACE -1F54B..1F54E;W # So [4] KAABA..MENORAH WITH NINE BRANCHES -1F54F;N # So BOWL OF HYGIEIA -1F550..1F567;W # So [24] CLOCK FACE ONE OCLOCK..CLOCK FACE TWELVE-THIRTY -1F568..1F579;N # So [18] RIGHT SPEAKER..JOYSTICK -1F57A;W # So MAN DANCING -1F57B..1F594;N # So [26] LEFT HAND TELEPHONE RECEIVER..REVERSED VICTORY HAND -1F595..1F596;W # So [2] REVERSED HAND WITH MIDDLE FINGER EXTENDED..RAISED HAND WITH PART BETWEEN MIDDLE AND RING FINGERS -1F597..1F5A3;N # So [13] WHITE DOWN POINTING LEFT HAND INDEX..BLACK DOWN POINTING BACKHAND INDEX -1F5A4;W # So BLACK HEART -1F5A5..1F5FA;N # So [86] DESKTOP COMPUTER..WORLD MAP -1F5FB..1F5FF;W # So [5] MOUNT FUJI..MOYAI -1F600..1F64F;W # So [80] GRINNING FACE..PERSON WITH FOLDED HANDS -1F650..1F67F;N # So [48] NORTH WEST POINTING LEAF..REVERSE CHECKER BOARD -1F680..1F6C5;W # So [70] ROCKET..LEFT LUGGAGE -1F6C6..1F6CB;N # So [6] TRIANGLE WITH ROUNDED CORNERS..COUCH AND LAMP -1F6CC;W # So SLEEPING ACCOMMODATION -1F6CD..1F6CF;N # So [3] SHOPPING BAGS..BED -1F6D0..1F6D2;W # So [3] PLACE OF WORSHIP..SHOPPING TROLLEY -1F6D3..1F6D4;N # So [2] STUPA..PAGODA -1F6D5..1F6D7;W # So [3] HINDU TEMPLE..ELEVATOR -1F6DC..1F6DF;W # So [4] WIRELESS..RING BUOY -1F6E0..1F6EA;N # So [11] HAMMER AND WRENCH..NORTHEAST-POINTING AIRPLANE -1F6EB..1F6EC;W # So [2] AIRPLANE DEPARTURE..AIRPLANE ARRIVING -1F6F0..1F6F3;N # So [4] SATELLITE..PASSENGER SHIP -1F6F4..1F6FC;W # So [9] SCOOTER..ROLLER SKATE -1F700..1F776;N # So [119] ALCHEMICAL SYMBOL FOR QUINTESSENCE..LUNAR ECLIPSE -1F77B..1F77F;N # So [5] HAUMEA..ORCUS -1F780..1F7D9;N # So [90] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..NINE POINTED WHITE STAR -1F7E0..1F7EB;W # So [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE -1F7F0;W # So HEAVY EQUALS SIGN -1F800..1F80B;N # So [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD -1F810..1F847;N # So [56] LEFTWARDS ARROW WITH SMALL EQUILATERAL ARROWHEAD..DOWNWARDS HEAVY ARROW -1F850..1F859;N # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW -1F860..1F887;N # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW -1F890..1F8AD;N # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS -1F8B0..1F8B1;N # So [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST -1F900..1F90B;N # So [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT -1F90C..1F93A;W # So [47] PINCHED FINGERS..FENCER -1F93B;N # So MODERN PENTATHLON -1F93C..1F945;W # So [10] WRESTLERS..GOAL NET -1F946;N # So RIFLE -1F947..1F9FF;W # So [185] FIRST PLACE MEDAL..NAZAR AMULET -1FA00..1FA53;N # So [84] NEUTRAL CHESS KING..BLACK CHESS KNIGHT-BISHOP -1FA60..1FA6D;N # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER -1FA70..1FA7C;W # So [13] BALLET SHOES..CRUTCH -1FA80..1FA88;W # So [9] YO-YO..FLUTE -1FA90..1FABD;W # So [46] RINGED PLANET..WING -1FABE;W # Cn -1FABF..1FAC5;W # So [7] GOOSE..PERSON WITH CROWN -1FACE..1FADB;W # So [14] MOOSE..PEA POD -1FAE0..1FAE8;W # So [9] MELTING FACE..SHAKING FACE -1FAF0..1FAF8;W # So [9] HAND WITH INDEX FINGER AND THUMB CROSSED..RIGHTWARDS PUSHING HAND -1FB00..1FB92;N # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK -1FB94..1FBCA;N # So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON -1FBF0..1FBF9;N # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE -20000..2A6DF;W # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF -2A6E0..2A6FF;W # Cn [32] .. -2A700..2B738;W # Lo [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738 -2B739..2B73F;W # Cn [7] .. -2B740..2B81D;W # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D -2B81E..2B81F;W # Cn [2] .. -2B820..2CEA1;W # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 -2CEA2..2CEAF;W # Cn [14] .. -2CEB0..2EBE0;W # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 -2EBE1..2F7FF;W # Cn [3103] .. -2F800..2FA1D;W # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -2FA1E..2FA1F;W # Cn [2] .. -2FA20..2FFFD;W # Cn [1502] .. -30000..3134A;W # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A -3134B..3134F;W # Cn [5] .. -31350..323AF;W # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -323B0..3FFFD;W # Cn [56398] .. -E0001;N # Cf LANGUAGE TAG -E0020..E007F;N # Cf [96] TAG SPACE..CANCEL TAG -E0100..E01EF;A # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -F0000..FFFFD;A # Co [65534] .. -100000..10FFFD;A # Co [65534] .. +0000..001F ; N # Cc [32] .. +0020 ; Na # Zs SPACE +0021..0023 ; Na # Po [3] EXCLAMATION MARK..NUMBER SIGN +0024 ; Na # Sc DOLLAR SIGN +0025..0027 ; Na # Po [3] PERCENT SIGN..APOSTROPHE +0028 ; Na # Ps LEFT PARENTHESIS +0029 ; Na # Pe RIGHT PARENTHESIS +002A ; Na # Po ASTERISK +002B ; Na # Sm PLUS SIGN +002C ; Na # Po COMMA +002D ; Na # Pd HYPHEN-MINUS +002E..002F ; Na # Po [2] FULL STOP..SOLIDUS +0030..0039 ; Na # Nd [10] DIGIT ZERO..DIGIT NINE +003A..003B ; Na # Po [2] COLON..SEMICOLON +003C..003E ; Na # Sm [3] LESS-THAN SIGN..GREATER-THAN SIGN +003F..0040 ; Na # Po [2] QUESTION MARK..COMMERCIAL AT +0041..005A ; Na # Lu [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z +005B ; Na # Ps LEFT SQUARE BRACKET +005C ; Na # Po REVERSE SOLIDUS +005D ; Na # Pe RIGHT SQUARE BRACKET +005E ; Na # Sk CIRCUMFLEX ACCENT +005F ; Na # Pc LOW LINE +0060 ; Na # Sk GRAVE ACCENT +0061..007A ; Na # Ll [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z +007B ; Na # Ps LEFT CURLY BRACKET +007C ; Na # Sm VERTICAL LINE +007D ; Na # Pe RIGHT CURLY BRACKET +007E ; Na # Sm TILDE +007F ; N # Cc +0080..009F ; N # Cc [32] .. +00A0 ; N # Zs NO-BREAK SPACE +00A1 ; A # Po INVERTED EXCLAMATION MARK +00A2..00A3 ; Na # Sc [2] CENT SIGN..POUND SIGN +00A4 ; A # Sc CURRENCY SIGN +00A5 ; Na # Sc YEN SIGN +00A6 ; Na # So BROKEN BAR +00A7 ; A # Po SECTION SIGN +00A8 ; A # Sk DIAERESIS +00A9 ; N # So COPYRIGHT SIGN +00AA ; A # Lo FEMININE ORDINAL INDICATOR +00AB ; N # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +00AC ; Na # Sm NOT SIGN +00AD ; A # Cf SOFT HYPHEN +00AE ; A # So REGISTERED SIGN +00AF ; Na # Sk MACRON +00B0 ; A # So DEGREE SIGN +00B1 ; A # Sm PLUS-MINUS SIGN +00B2..00B3 ; A # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE +00B4 ; A # Sk ACUTE ACCENT +00B5 ; N # Ll MICRO SIGN +00B6..00B7 ; A # Po [2] PILCROW SIGN..MIDDLE DOT +00B8 ; A # Sk CEDILLA +00B9 ; A # No SUPERSCRIPT ONE +00BA ; A # Lo MASCULINE ORDINAL INDICATOR +00BB ; N # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +00BC..00BE ; A # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS +00BF ; A # Po INVERTED QUESTION MARK +00C0..00C5 ; N # Lu [6] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER A WITH RING ABOVE +00C6 ; A # Lu LATIN CAPITAL LETTER AE +00C7..00CF ; N # Lu [9] LATIN CAPITAL LETTER C WITH CEDILLA..LATIN CAPITAL LETTER I WITH DIAERESIS +00D0 ; A # Lu LATIN CAPITAL LETTER ETH +00D1..00D6 ; N # Lu [6] LATIN CAPITAL LETTER N WITH TILDE..LATIN CAPITAL LETTER O WITH DIAERESIS +00D7 ; A # Sm MULTIPLICATION SIGN +00D8 ; A # Lu LATIN CAPITAL LETTER O WITH STROKE +00D9..00DD ; N # Lu [5] LATIN CAPITAL LETTER U WITH GRAVE..LATIN CAPITAL LETTER Y WITH ACUTE +00DE..00E1 ; A # L& [4] LATIN CAPITAL LETTER THORN..LATIN SMALL LETTER A WITH ACUTE +00E2..00E5 ; N # Ll [4] LATIN SMALL LETTER A WITH CIRCUMFLEX..LATIN SMALL LETTER A WITH RING ABOVE +00E6 ; A # Ll LATIN SMALL LETTER AE +00E7 ; N # Ll LATIN SMALL LETTER C WITH CEDILLA +00E8..00EA ; A # Ll [3] LATIN SMALL LETTER E WITH GRAVE..LATIN SMALL LETTER E WITH CIRCUMFLEX +00EB ; N # Ll LATIN SMALL LETTER E WITH DIAERESIS +00EC..00ED ; A # Ll [2] LATIN SMALL LETTER I WITH GRAVE..LATIN SMALL LETTER I WITH ACUTE +00EE..00EF ; N # Ll [2] LATIN SMALL LETTER I WITH CIRCUMFLEX..LATIN SMALL LETTER I WITH DIAERESIS +00F0 ; A # Ll LATIN SMALL LETTER ETH +00F1 ; N # Ll LATIN SMALL LETTER N WITH TILDE +00F2..00F3 ; A # Ll [2] LATIN SMALL LETTER O WITH GRAVE..LATIN SMALL LETTER O WITH ACUTE +00F4..00F6 ; N # Ll [3] LATIN SMALL LETTER O WITH CIRCUMFLEX..LATIN SMALL LETTER O WITH DIAERESIS +00F7 ; A # Sm DIVISION SIGN +00F8..00FA ; A # Ll [3] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER U WITH ACUTE +00FB ; N # Ll LATIN SMALL LETTER U WITH CIRCUMFLEX +00FC ; A # Ll LATIN SMALL LETTER U WITH DIAERESIS +00FD ; N # Ll LATIN SMALL LETTER Y WITH ACUTE +00FE ; A # Ll LATIN SMALL LETTER THORN +00FF ; N # Ll LATIN SMALL LETTER Y WITH DIAERESIS +0100 ; N # Lu LATIN CAPITAL LETTER A WITH MACRON +0101 ; A # Ll LATIN SMALL LETTER A WITH MACRON +0102..0110 ; N # L& [15] LATIN CAPITAL LETTER A WITH BREVE..LATIN CAPITAL LETTER D WITH STROKE +0111 ; A # Ll LATIN SMALL LETTER D WITH STROKE +0112 ; N # Lu LATIN CAPITAL LETTER E WITH MACRON +0113 ; A # Ll LATIN SMALL LETTER E WITH MACRON +0114..011A ; N # L& [7] LATIN CAPITAL LETTER E WITH BREVE..LATIN CAPITAL LETTER E WITH CARON +011B ; A # Ll LATIN SMALL LETTER E WITH CARON +011C..0125 ; N # L& [10] LATIN CAPITAL LETTER G WITH CIRCUMFLEX..LATIN SMALL LETTER H WITH CIRCUMFLEX +0126..0127 ; A # L& [2] LATIN CAPITAL LETTER H WITH STROKE..LATIN SMALL LETTER H WITH STROKE +0128..012A ; N # L& [3] LATIN CAPITAL LETTER I WITH TILDE..LATIN CAPITAL LETTER I WITH MACRON +012B ; A # Ll LATIN SMALL LETTER I WITH MACRON +012C..0130 ; N # L& [5] LATIN CAPITAL LETTER I WITH BREVE..LATIN CAPITAL LETTER I WITH DOT ABOVE +0131..0133 ; A # L& [3] LATIN SMALL LETTER DOTLESS I..LATIN SMALL LIGATURE IJ +0134..0137 ; N # L& [4] LATIN CAPITAL LETTER J WITH CIRCUMFLEX..LATIN SMALL LETTER K WITH CEDILLA +0138 ; A # Ll LATIN SMALL LETTER KRA +0139..013E ; N # L& [6] LATIN CAPITAL LETTER L WITH ACUTE..LATIN SMALL LETTER L WITH CARON +013F..0142 ; A # L& [4] LATIN CAPITAL LETTER L WITH MIDDLE DOT..LATIN SMALL LETTER L WITH STROKE +0143 ; N # Lu LATIN CAPITAL LETTER N WITH ACUTE +0144 ; A # Ll LATIN SMALL LETTER N WITH ACUTE +0145..0147 ; N # L& [3] LATIN CAPITAL LETTER N WITH CEDILLA..LATIN CAPITAL LETTER N WITH CARON +0148..014B ; A # L& [4] LATIN SMALL LETTER N WITH CARON..LATIN SMALL LETTER ENG +014C ; N # Lu LATIN CAPITAL LETTER O WITH MACRON +014D ; A # Ll LATIN SMALL LETTER O WITH MACRON +014E..0151 ; N # L& [4] LATIN CAPITAL LETTER O WITH BREVE..LATIN SMALL LETTER O WITH DOUBLE ACUTE +0152..0153 ; A # L& [2] LATIN CAPITAL LIGATURE OE..LATIN SMALL LIGATURE OE +0154..0165 ; N # L& [18] LATIN CAPITAL LETTER R WITH ACUTE..LATIN SMALL LETTER T WITH CARON +0166..0167 ; A # L& [2] LATIN CAPITAL LETTER T WITH STROKE..LATIN SMALL LETTER T WITH STROKE +0168..016A ; N # L& [3] LATIN CAPITAL LETTER U WITH TILDE..LATIN CAPITAL LETTER U WITH MACRON +016B ; A # Ll LATIN SMALL LETTER U WITH MACRON +016C..017F ; N # L& [20] LATIN CAPITAL LETTER U WITH BREVE..LATIN SMALL LETTER LONG S +0180..01BA ; N # L& [59] LATIN SMALL LETTER B WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL +01BB ; N # Lo LATIN LETTER TWO WITH STROKE +01BC..01BF ; N # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN +01C0..01C3 ; N # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK +01C4..01CD ; N # L& [10] LATIN CAPITAL LETTER DZ WITH CARON..LATIN CAPITAL LETTER A WITH CARON +01CE ; A # Ll LATIN SMALL LETTER A WITH CARON +01CF ; N # Lu LATIN CAPITAL LETTER I WITH CARON +01D0 ; A # Ll LATIN SMALL LETTER I WITH CARON +01D1 ; N # Lu LATIN CAPITAL LETTER O WITH CARON +01D2 ; A # Ll LATIN SMALL LETTER O WITH CARON +01D3 ; N # Lu LATIN CAPITAL LETTER U WITH CARON +01D4 ; A # Ll LATIN SMALL LETTER U WITH CARON +01D5 ; N # Lu LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON +01D6 ; A # Ll LATIN SMALL LETTER U WITH DIAERESIS AND MACRON +01D7 ; N # Lu LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE +01D8 ; A # Ll LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE +01D9 ; N # Lu LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON +01DA ; A # Ll LATIN SMALL LETTER U WITH DIAERESIS AND CARON +01DB ; N # Lu LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE +01DC ; A # Ll LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE +01DD..024F ; N # L& [115] LATIN SMALL LETTER TURNED E..LATIN SMALL LETTER Y WITH STROKE +0250 ; N # Ll LATIN SMALL LETTER TURNED A +0251 ; A # Ll LATIN SMALL LETTER ALPHA +0252..0260 ; N # Ll [15] LATIN SMALL LETTER TURNED ALPHA..LATIN SMALL LETTER G WITH HOOK +0261 ; A # Ll LATIN SMALL LETTER SCRIPT G +0262..0293 ; N # Ll [50] LATIN LETTER SMALL CAPITAL G..LATIN SMALL LETTER EZH WITH CURL +0294 ; N # Lo LATIN LETTER GLOTTAL STOP +0295..02AF ; N # Ll [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +02B0..02C1 ; N # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP +02C2..02C3 ; N # Sk [2] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER RIGHT ARROWHEAD +02C4 ; A # Sk MODIFIER LETTER UP ARROWHEAD +02C5 ; N # Sk MODIFIER LETTER DOWN ARROWHEAD +02C6 ; N # Lm MODIFIER LETTER CIRCUMFLEX ACCENT +02C7 ; A # Lm CARON +02C8 ; N # Lm MODIFIER LETTER VERTICAL LINE +02C9..02CB ; A # Lm [3] MODIFIER LETTER MACRON..MODIFIER LETTER GRAVE ACCENT +02CC ; N # Lm MODIFIER LETTER LOW VERTICAL LINE +02CD ; A # Lm MODIFIER LETTER LOW MACRON +02CE..02CF ; N # Lm [2] MODIFIER LETTER LOW GRAVE ACCENT..MODIFIER LETTER LOW ACUTE ACCENT +02D0 ; A # Lm MODIFIER LETTER TRIANGULAR COLON +02D1 ; N # Lm MODIFIER LETTER HALF TRIANGULAR COLON +02D2..02D7 ; N # Sk [6] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER MINUS SIGN +02D8..02DB ; A # Sk [4] BREVE..OGONEK +02DC ; N # Sk SMALL TILDE +02DD ; A # Sk DOUBLE ACUTE ACCENT +02DE ; N # Sk MODIFIER LETTER RHOTIC HOOK +02DF ; A # Sk MODIFIER LETTER CROSS ACCENT +02E0..02E4 ; N # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +02E5..02EB ; N # Sk [7] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK +02EC ; N # Lm MODIFIER LETTER VOICING +02ED ; N # Sk MODIFIER LETTER UNASPIRATED +02EE ; N # Lm MODIFIER LETTER DOUBLE APOSTROPHE +02EF..02FF ; N # Sk [17] MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW +0300..036F ; A # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X +0370..0373 ; N # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI +0374 ; N # Lm GREEK NUMERAL SIGN +0375 ; N # Sk GREEK LOWER NUMERAL SIGN +0376..0377 ; N # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA +037A ; N # Lm GREEK YPOGEGRAMMENI +037B..037D ; N # Ll [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL +037E ; N # Po GREEK QUESTION MARK +037F ; N # Lu GREEK CAPITAL LETTER YOT +0384..0385 ; N # Sk [2] GREEK TONOS..GREEK DIALYTIKA TONOS +0386 ; N # Lu GREEK CAPITAL LETTER ALPHA WITH TONOS +0387 ; N # Po GREEK ANO TELEIA +0388..038A ; N # Lu [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS +038C ; N # Lu GREEK CAPITAL LETTER OMICRON WITH TONOS +038E..0390 ; N # L& [3] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS +0391..03A1 ; A # Lu [17] GREEK CAPITAL LETTER ALPHA..GREEK CAPITAL LETTER RHO +03A3..03A9 ; A # Lu [7] GREEK CAPITAL LETTER SIGMA..GREEK CAPITAL LETTER OMEGA +03AA..03B0 ; N # L& [7] GREEK CAPITAL LETTER IOTA WITH DIALYTIKA..GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS +03B1..03C1 ; A # Ll [17] GREEK SMALL LETTER ALPHA..GREEK SMALL LETTER RHO +03C2 ; N # Ll GREEK SMALL LETTER FINAL SIGMA +03C3..03C9 ; A # Ll [7] GREEK SMALL LETTER SIGMA..GREEK SMALL LETTER OMEGA +03CA..03F5 ; N # L& [44] GREEK SMALL LETTER IOTA WITH DIALYTIKA..GREEK LUNATE EPSILON SYMBOL +03F6 ; N # Sm GREEK REVERSED LUNATE EPSILON SYMBOL +03F7..03FF ; N # L& [9] GREEK CAPITAL LETTER SHO..GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL +0400 ; N # Lu CYRILLIC CAPITAL LETTER IE WITH GRAVE +0401 ; A # Lu CYRILLIC CAPITAL LETTER IO +0402..040F ; N # Lu [14] CYRILLIC CAPITAL LETTER DJE..CYRILLIC CAPITAL LETTER DZHE +0410..044F ; A # L& [64] CYRILLIC CAPITAL LETTER A..CYRILLIC SMALL LETTER YA +0450 ; N # Ll CYRILLIC SMALL LETTER IE WITH GRAVE +0451 ; A # Ll CYRILLIC SMALL LETTER IO +0452..0481 ; N # L& [48] CYRILLIC SMALL LETTER DJE..CYRILLIC SMALL LETTER KOPPA +0482 ; N # So CYRILLIC THOUSANDS SIGN +0483..0487 ; N # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE +0488..0489 ; N # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN +048A..04FF ; N # L& [118] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER HA WITH STROKE +0500..052F ; N # L& [48] CYRILLIC CAPITAL LETTER KOMI DE..CYRILLIC SMALL LETTER EL WITH DESCENDER +0531..0556 ; N # Lu [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH +0559 ; N # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING +055A..055F ; N # Po [6] ARMENIAN APOSTROPHE..ARMENIAN ABBREVIATION MARK +0560..0588 ; N # Ll [41] ARMENIAN SMALL LETTER TURNED AYB..ARMENIAN SMALL LETTER YI WITH STROKE +0589 ; N # Po ARMENIAN FULL STOP +058A ; N # Pd ARMENIAN HYPHEN +058D..058E ; N # So [2] RIGHT-FACING ARMENIAN ETERNITY SIGN..LEFT-FACING ARMENIAN ETERNITY SIGN +058F ; N # Sc ARMENIAN DRAM SIGN +0591..05BD ; N # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG +05BE ; N # Pd HEBREW PUNCTUATION MAQAF +05BF ; N # Mn HEBREW POINT RAFE +05C0 ; N # Po HEBREW PUNCTUATION PASEQ +05C1..05C2 ; N # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C3 ; N # Po HEBREW PUNCTUATION SOF PASUQ +05C4..05C5 ; N # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT +05C6 ; N # Po HEBREW PUNCTUATION NUN HAFUKHA +05C7 ; N # Mn HEBREW POINT QAMATS QATAN +05D0..05EA ; N # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV +05EF..05F2 ; N # Lo [4] HEBREW YOD TRIANGLE..HEBREW LIGATURE YIDDISH DOUBLE YOD +05F3..05F4 ; N # Po [2] HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM +0600..0605 ; N # Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE +0606..0608 ; N # Sm [3] ARABIC-INDIC CUBE ROOT..ARABIC RAY +0609..060A ; N # Po [2] ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN +060B ; N # Sc AFGHANI SIGN +060C..060D ; N # Po [2] ARABIC COMMA..ARABIC DATE SEPARATOR +060E..060F ; N # So [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA +0610..061A ; N # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA +061B ; N # Po ARABIC SEMICOLON +061C ; N # Cf ARABIC LETTER MARK +061D..061F ; N # Po [3] ARABIC END OF TEXT MARK..ARABIC QUESTION MARK +0620..063F ; N # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE +0640 ; N # Lm ARABIC TATWEEL +0641..064A ; N # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH +064B..065F ; N # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW +0660..0669 ; N # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE +066A..066D ; N # Po [4] ARABIC PERCENT SIGN..ARABIC FIVE POINTED STAR +066E..066F ; N # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF +0670 ; N # Mn ARABIC LETTER SUPERSCRIPT ALEF +0671..06D3 ; N # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE +06D4 ; N # Po ARABIC FULL STOP +06D5 ; N # Lo ARABIC LETTER AE +06D6..06DC ; N # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN +06DD ; N # Cf ARABIC END OF AYAH +06DE ; N # So ARABIC START OF RUB EL HIZB +06DF..06E4 ; N # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA +06E5..06E6 ; N # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH +06E7..06E8 ; N # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON +06E9 ; N # So ARABIC PLACE OF SAJDAH +06EA..06ED ; N # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM +06EE..06EF ; N # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V +06F0..06F9 ; N # Nd [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE +06FA..06FC ; N # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW +06FD..06FE ; N # So [2] ARABIC SIGN SINDHI AMPERSAND..ARABIC SIGN SINDHI POSTPOSITION MEN +06FF ; N # Lo ARABIC LETTER HEH WITH INVERTED V +0700..070D ; N # Po [14] SYRIAC END OF PARAGRAPH..SYRIAC HARKLEAN ASTERISCUS +070F ; N # Cf SYRIAC ABBREVIATION MARK +0710 ; N # Lo SYRIAC LETTER ALAPH +0711 ; N # Mn SYRIAC LETTER SUPERSCRIPT ALAPH +0712..072F ; N # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH +0730..074A ; N # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH +074D..074F ; N # Lo [3] SYRIAC LETTER SOGDIAN ZHAIN..SYRIAC LETTER SOGDIAN FE +0750..077F ; N # Lo [48] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS ABOVE +0780..07A5 ; N # Lo [38] THAANA LETTER HAA..THAANA LETTER WAAVU +07A6..07B0 ; N # Mn [11] THAANA ABAFILI..THAANA SUKUN +07B1 ; N # Lo THAANA LETTER NAA +07C0..07C9 ; N # Nd [10] NKO DIGIT ZERO..NKO DIGIT NINE +07CA..07EA ; N # Lo [33] NKO LETTER A..NKO LETTER JONA RA +07EB..07F3 ; N # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE +07F4..07F5 ; N # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE +07F6 ; N # So NKO SYMBOL OO DENNEN +07F7..07F9 ; N # Po [3] NKO SYMBOL GBAKURUNEN..NKO EXCLAMATION MARK +07FA ; N # Lm NKO LAJANYALAN +07FD ; N # Mn NKO DANTAYALAN +07FE..07FF ; N # Sc [2] NKO DOROME SIGN..NKO TAMAN SIGN +0800..0815 ; N # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF +0816..0819 ; N # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH +081A ; N # Lm SAMARITAN MODIFIER LETTER EPENTHETIC YUT +081B..0823 ; N # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A +0824 ; N # Lm SAMARITAN MODIFIER LETTER SHORT A +0825..0827 ; N # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U +0828 ; N # Lm SAMARITAN MODIFIER LETTER I +0829..082D ; N # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA +0830..083E ; N # Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU +0840..0858 ; N # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +0859..085B ; N # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +085E ; N # Po MANDAIC PUNCTUATION +0860..086A ; N # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA +0870..0887 ; N # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT +0888 ; N # Sk ARABIC RAISED ROUND DOT +0889..088E ; N # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0890..0891 ; N # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE +0898..089F ; N # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +08A0..08C8 ; N # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF +08C9 ; N # Lm ARABIC SMALL FARSI YEH +08CA..08E1 ; N # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA +08E2 ; N # Cf ARABIC DISPUTED END OF AYAH +08E3..08FF ; N # Mn [29] ARABIC TURNED DAMMA BELOW..ARABIC MARK SIDEWAYS NOON GHUNNA +0900..0902 ; N # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA +0903 ; N # Mc DEVANAGARI SIGN VISARGA +0904..0939 ; N # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA +093A ; N # Mn DEVANAGARI VOWEL SIGN OE +093B ; N # Mc DEVANAGARI VOWEL SIGN OOE +093C ; N # Mn DEVANAGARI SIGN NUKTA +093D ; N # Lo DEVANAGARI SIGN AVAGRAHA +093E..0940 ; N # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II +0941..0948 ; N # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI +0949..094C ; N # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU +094D ; N # Mn DEVANAGARI SIGN VIRAMA +094E..094F ; N # Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW +0950 ; N # Lo DEVANAGARI OM +0951..0957 ; N # Mn [7] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN UUE +0958..0961 ; N # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL +0962..0963 ; N # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL +0964..0965 ; N # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA +0966..096F ; N # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE +0970 ; N # Po DEVANAGARI ABBREVIATION SIGN +0971 ; N # Lm DEVANAGARI SIGN HIGH SPACING DOT +0972..097F ; N # Lo [14] DEVANAGARI LETTER CANDRA A..DEVANAGARI LETTER BBA +0980 ; N # Lo BENGALI ANJI +0981 ; N # Mn BENGALI SIGN CANDRABINDU +0982..0983 ; N # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA +0985..098C ; N # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L +098F..0990 ; N # Lo [2] BENGALI LETTER E..BENGALI LETTER AI +0993..09A8 ; N # Lo [22] BENGALI LETTER O..BENGALI LETTER NA +09AA..09B0 ; N # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA +09B2 ; N # Lo BENGALI LETTER LA +09B6..09B9 ; N # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA +09BC ; N # Mn BENGALI SIGN NUKTA +09BD ; N # Lo BENGALI SIGN AVAGRAHA +09BE..09C0 ; N # Mc [3] BENGALI VOWEL SIGN AA..BENGALI VOWEL SIGN II +09C1..09C4 ; N # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR +09C7..09C8 ; N # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI +09CB..09CC ; N # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU +09CD ; N # Mn BENGALI SIGN VIRAMA +09CE ; N # Lo BENGALI LETTER KHANDA TA +09D7 ; N # Mc BENGALI AU LENGTH MARK +09DC..09DD ; N # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA +09DF..09E1 ; N # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL +09E2..09E3 ; N # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL +09E6..09EF ; N # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE +09F0..09F1 ; N # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL +09F2..09F3 ; N # Sc [2] BENGALI RUPEE MARK..BENGALI RUPEE SIGN +09F4..09F9 ; N # No [6] BENGALI CURRENCY NUMERATOR ONE..BENGALI CURRENCY DENOMINATOR SIXTEEN +09FA ; N # So BENGALI ISSHAR +09FB ; N # Sc BENGALI GANDA MARK +09FC ; N # Lo BENGALI LETTER VEDIC ANUSVARA +09FD ; N # Po BENGALI ABBREVIATION SIGN +09FE ; N # Mn BENGALI SANDHI MARK +0A01..0A02 ; N # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI +0A03 ; N # Mc GURMUKHI SIGN VISARGA +0A05..0A0A ; N # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU +0A0F..0A10 ; N # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI +0A13..0A28 ; N # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA +0A2A..0A30 ; N # Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA +0A32..0A33 ; N # Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA +0A35..0A36 ; N # Lo [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA +0A38..0A39 ; N # Lo [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA +0A3C ; N # Mn GURMUKHI SIGN NUKTA +0A3E..0A40 ; N # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II +0A41..0A42 ; N # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU +0A47..0A48 ; N # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI +0A4B..0A4D ; N # Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA +0A51 ; N # Mn GURMUKHI SIGN UDAAT +0A59..0A5C ; N # Lo [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA +0A5E ; N # Lo GURMUKHI LETTER FA +0A66..0A6F ; N # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE +0A70..0A71 ; N # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK +0A72..0A74 ; N # Lo [3] GURMUKHI IRI..GURMUKHI EK ONKAR +0A75 ; N # Mn GURMUKHI SIGN YAKASH +0A76 ; N # Po GURMUKHI ABBREVIATION SIGN +0A81..0A82 ; N # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA +0A83 ; N # Mc GUJARATI SIGN VISARGA +0A85..0A8D ; N # Lo [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E +0A8F..0A91 ; N # Lo [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O +0A93..0AA8 ; N # Lo [22] GUJARATI LETTER O..GUJARATI LETTER NA +0AAA..0AB0 ; N # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA +0AB2..0AB3 ; N # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA +0AB5..0AB9 ; N # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA +0ABC ; N # Mn GUJARATI SIGN NUKTA +0ABD ; N # Lo GUJARATI SIGN AVAGRAHA +0ABE..0AC0 ; N # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II +0AC1..0AC5 ; N # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E +0AC7..0AC8 ; N # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI +0AC9 ; N # Mc GUJARATI VOWEL SIGN CANDRA O +0ACB..0ACC ; N # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU +0ACD ; N # Mn GUJARATI SIGN VIRAMA +0AD0 ; N # Lo GUJARATI OM +0AE0..0AE1 ; N # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL +0AE2..0AE3 ; N # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL +0AE6..0AEF ; N # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE +0AF0 ; N # Po GUJARATI ABBREVIATION SIGN +0AF1 ; N # Sc GUJARATI RUPEE SIGN +0AF9 ; N # Lo GUJARATI LETTER ZHA +0AFA..0AFF ; N # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE +0B01 ; N # Mn ORIYA SIGN CANDRABINDU +0B02..0B03 ; N # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA +0B05..0B0C ; N # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L +0B0F..0B10 ; N # Lo [2] ORIYA LETTER E..ORIYA LETTER AI +0B13..0B28 ; N # Lo [22] ORIYA LETTER O..ORIYA LETTER NA +0B2A..0B30 ; N # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA +0B32..0B33 ; N # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA +0B35..0B39 ; N # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA +0B3C ; N # Mn ORIYA SIGN NUKTA +0B3D ; N # Lo ORIYA SIGN AVAGRAHA +0B3E ; N # Mc ORIYA VOWEL SIGN AA +0B3F ; N # Mn ORIYA VOWEL SIGN I +0B40 ; N # Mc ORIYA VOWEL SIGN II +0B41..0B44 ; N # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR +0B47..0B48 ; N # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI +0B4B..0B4C ; N # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU +0B4D ; N # Mn ORIYA SIGN VIRAMA +0B55..0B56 ; N # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B57 ; N # Mc ORIYA AU LENGTH MARK +0B5C..0B5D ; N # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA +0B5F..0B61 ; N # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL +0B62..0B63 ; N # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL +0B66..0B6F ; N # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE +0B70 ; N # So ORIYA ISSHAR +0B71 ; N # Lo ORIYA LETTER WA +0B72..0B77 ; N # No [6] ORIYA FRACTION ONE QUARTER..ORIYA FRACTION THREE SIXTEENTHS +0B82 ; N # Mn TAMIL SIGN ANUSVARA +0B83 ; N # Lo TAMIL SIGN VISARGA +0B85..0B8A ; N # Lo [6] TAMIL LETTER A..TAMIL LETTER UU +0B8E..0B90 ; N # Lo [3] TAMIL LETTER E..TAMIL LETTER AI +0B92..0B95 ; N # Lo [4] TAMIL LETTER O..TAMIL LETTER KA +0B99..0B9A ; N # Lo [2] TAMIL LETTER NGA..TAMIL LETTER CA +0B9C ; N # Lo TAMIL LETTER JA +0B9E..0B9F ; N # Lo [2] TAMIL LETTER NYA..TAMIL LETTER TTA +0BA3..0BA4 ; N # Lo [2] TAMIL LETTER NNA..TAMIL LETTER TA +0BA8..0BAA ; N # Lo [3] TAMIL LETTER NA..TAMIL LETTER PA +0BAE..0BB9 ; N # Lo [12] TAMIL LETTER MA..TAMIL LETTER HA +0BBE..0BBF ; N # Mc [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I +0BC0 ; N # Mn TAMIL VOWEL SIGN II +0BC1..0BC2 ; N # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU +0BC6..0BC8 ; N # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI +0BCA..0BCC ; N # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU +0BCD ; N # Mn TAMIL SIGN VIRAMA +0BD0 ; N # Lo TAMIL OM +0BD7 ; N # Mc TAMIL AU LENGTH MARK +0BE6..0BEF ; N # Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE +0BF0..0BF2 ; N # No [3] TAMIL NUMBER TEN..TAMIL NUMBER ONE THOUSAND +0BF3..0BF8 ; N # So [6] TAMIL DAY SIGN..TAMIL AS ABOVE SIGN +0BF9 ; N # Sc TAMIL RUPEE SIGN +0BFA ; N # So TAMIL NUMBER SIGN +0C00 ; N # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE +0C01..0C03 ; N # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA +0C04 ; N # Mn TELUGU SIGN COMBINING ANUSVARA ABOVE +0C05..0C0C ; N # Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L +0C0E..0C10 ; N # Lo [3] TELUGU LETTER E..TELUGU LETTER AI +0C12..0C28 ; N # Lo [23] TELUGU LETTER O..TELUGU LETTER NA +0C2A..0C39 ; N # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA +0C3C ; N # Mn TELUGU SIGN NUKTA +0C3D ; N # Lo TELUGU SIGN AVAGRAHA +0C3E..0C40 ; N # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II +0C41..0C44 ; N # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR +0C46..0C48 ; N # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI +0C4A..0C4D ; N # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA +0C55..0C56 ; N # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK +0C58..0C5A ; N # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA +0C5D ; N # Lo TELUGU LETTER NAKAARA POLLU +0C60..0C61 ; N # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL +0C62..0C63 ; N # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL +0C66..0C6F ; N # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE +0C77 ; N # Po TELUGU SIGN SIDDHAM +0C78..0C7E ; N # No [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR +0C7F ; N # So TELUGU SIGN TUUMU +0C80 ; N # Lo KANNADA SIGN SPACING CANDRABINDU +0C81 ; N # Mn KANNADA SIGN CANDRABINDU +0C82..0C83 ; N # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA +0C84 ; N # Po KANNADA SIGN SIDDHAM +0C85..0C8C ; N # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L +0C8E..0C90 ; N # Lo [3] KANNADA LETTER E..KANNADA LETTER AI +0C92..0CA8 ; N # Lo [23] KANNADA LETTER O..KANNADA LETTER NA +0CAA..0CB3 ; N # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA +0CB5..0CB9 ; N # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA +0CBC ; N # Mn KANNADA SIGN NUKTA +0CBD ; N # Lo KANNADA SIGN AVAGRAHA +0CBE ; N # Mc KANNADA VOWEL SIGN AA +0CBF ; N # Mn KANNADA VOWEL SIGN I +0CC0..0CC4 ; N # Mc [5] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN VOCALIC RR +0CC6 ; N # Mn KANNADA VOWEL SIGN E +0CC7..0CC8 ; N # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI +0CCA..0CCB ; N # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO +0CCC..0CCD ; N # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA +0CD5..0CD6 ; N # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK +0CDD..0CDE ; N # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CE0..0CE1 ; N # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL +0CE2..0CE3 ; N # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL +0CE6..0CEF ; N # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE +0CF1..0CF2 ; N # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA +0CF3 ; N # Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT +0D00..0D01 ; N # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU +0D02..0D03 ; N # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA +0D04..0D0C ; N # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L +0D0E..0D10 ; N # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI +0D12..0D3A ; N # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA +0D3B..0D3C ; N # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA +0D3D ; N # Lo MALAYALAM SIGN AVAGRAHA +0D3E..0D40 ; N # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II +0D41..0D44 ; N # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR +0D46..0D48 ; N # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI +0D4A..0D4C ; N # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU +0D4D ; N # Mn MALAYALAM SIGN VIRAMA +0D4E ; N # Lo MALAYALAM LETTER DOT REPH +0D4F ; N # So MALAYALAM SIGN PARA +0D54..0D56 ; N # Lo [3] MALAYALAM LETTER CHILLU M..MALAYALAM LETTER CHILLU LLL +0D57 ; N # Mc MALAYALAM AU LENGTH MARK +0D58..0D5E ; N # No [7] MALAYALAM FRACTION ONE ONE-HUNDRED-AND-SIXTIETH..MALAYALAM FRACTION ONE FIFTH +0D5F..0D61 ; N # Lo [3] MALAYALAM LETTER ARCHAIC II..MALAYALAM LETTER VOCALIC LL +0D62..0D63 ; N # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL +0D66..0D6F ; N # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE +0D70..0D78 ; N # No [9] MALAYALAM NUMBER TEN..MALAYALAM FRACTION THREE SIXTEENTHS +0D79 ; N # So MALAYALAM DATE MARK +0D7A..0D7F ; N # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K +0D81 ; N # Mn SINHALA SIGN CANDRABINDU +0D82..0D83 ; N # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA +0D85..0D96 ; N # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA +0D9A..0DB1 ; N # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA +0DB3..0DBB ; N # Lo [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA +0DBD ; N # Lo SINHALA LETTER DANTAJA LAYANNA +0DC0..0DC6 ; N # Lo [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA +0DCA ; N # Mn SINHALA SIGN AL-LAKUNA +0DCF..0DD1 ; N # Mc [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA +0DD2..0DD4 ; N # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA +0DD6 ; N # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA +0DD8..0DDF ; N # Mc [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA +0DE6..0DEF ; N # Nd [10] SINHALA LITH DIGIT ZERO..SINHALA LITH DIGIT NINE +0DF2..0DF3 ; N # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA +0DF4 ; N # Po SINHALA PUNCTUATION KUNDDALIYA +0E01..0E30 ; N # Lo [48] THAI CHARACTER KO KAI..THAI CHARACTER SARA A +0E31 ; N # Mn THAI CHARACTER MAI HAN-AKAT +0E32..0E33 ; N # Lo [2] THAI CHARACTER SARA AA..THAI CHARACTER SARA AM +0E34..0E3A ; N # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU +0E3F ; N # Sc THAI CURRENCY SYMBOL BAHT +0E40..0E45 ; N # Lo [6] THAI CHARACTER SARA E..THAI CHARACTER LAKKHANGYAO +0E46 ; N # Lm THAI CHARACTER MAIYAMOK +0E47..0E4E ; N # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN +0E4F ; N # Po THAI CHARACTER FONGMAN +0E50..0E59 ; N # Nd [10] THAI DIGIT ZERO..THAI DIGIT NINE +0E5A..0E5B ; N # Po [2] THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT +0E81..0E82 ; N # Lo [2] LAO LETTER KO..LAO LETTER KHO SUNG +0E84 ; N # Lo LAO LETTER KHO TAM +0E86..0E8A ; N # Lo [5] LAO LETTER PALI GHA..LAO LETTER SO TAM +0E8C..0EA3 ; N # Lo [24] LAO LETTER PALI JHA..LAO LETTER LO LING +0EA5 ; N # Lo LAO LETTER LO LOOT +0EA7..0EB0 ; N # Lo [10] LAO LETTER WO..LAO VOWEL SIGN A +0EB1 ; N # Mn LAO VOWEL SIGN MAI KAN +0EB2..0EB3 ; N # Lo [2] LAO VOWEL SIGN AA..LAO VOWEL SIGN AM +0EB4..0EBC ; N # Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO +0EBD ; N # Lo LAO SEMIVOWEL SIGN NYO +0EC0..0EC4 ; N # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI +0EC6 ; N # Lm LAO KO LA +0EC8..0ECE ; N # Mn [7] LAO TONE MAI EK..LAO YAMAKKAN +0ED0..0ED9 ; N # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE +0EDC..0EDF ; N # Lo [4] LAO HO NO..LAO LETTER KHMU NYO +0F00 ; N # Lo TIBETAN SYLLABLE OM +0F01..0F03 ; N # So [3] TIBETAN MARK GTER YIG MGO TRUNCATED A..TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA +0F04..0F12 ; N # Po [15] TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK RGYA GRAM SHAD +0F13 ; N # So TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN +0F14 ; N # Po TIBETAN MARK GTER TSHEG +0F15..0F17 ; N # So [3] TIBETAN LOGOTYPE SIGN CHAD RTAGS..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS +0F18..0F19 ; N # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS +0F1A..0F1F ; N # So [6] TIBETAN SIGN RDEL DKAR GCIG..TIBETAN SIGN RDEL DKAR RDEL NAG +0F20..0F29 ; N # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE +0F2A..0F33 ; N # No [10] TIBETAN DIGIT HALF ONE..TIBETAN DIGIT HALF ZERO +0F34 ; N # So TIBETAN MARK BSDUS RTAGS +0F35 ; N # Mn TIBETAN MARK NGAS BZUNG NYI ZLA +0F36 ; N # So TIBETAN MARK CARET -DZUD RTAGS BZHI MIG CAN +0F37 ; N # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS +0F38 ; N # So TIBETAN MARK CHE MGO +0F39 ; N # Mn TIBETAN MARK TSA -PHRU +0F3A ; N # Ps TIBETAN MARK GUG RTAGS GYON +0F3B ; N # Pe TIBETAN MARK GUG RTAGS GYAS +0F3C ; N # Ps TIBETAN MARK ANG KHANG GYON +0F3D ; N # Pe TIBETAN MARK ANG KHANG GYAS +0F3E..0F3F ; N # Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES +0F40..0F47 ; N # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA +0F49..0F6C ; N # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA +0F71..0F7E ; N # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO +0F7F ; N # Mc TIBETAN SIGN RNAM BCAD +0F80..0F84 ; N # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA +0F85 ; N # Po TIBETAN MARK PALUTA +0F86..0F87 ; N # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS +0F88..0F8C ; N # Lo [5] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN INVERTED MCHU CAN +0F8D..0F97 ; N # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA +0F99..0FBC ; N # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA +0FBE..0FC5 ; N # So [8] TIBETAN KU RU KHA..TIBETAN SYMBOL RDO RJE +0FC6 ; N # Mn TIBETAN SYMBOL PADMA GDAN +0FC7..0FCC ; N # So [6] TIBETAN SYMBOL RDO RJE RGYA GRAM..TIBETAN SYMBOL NOR BU BZHI -KHYIL +0FCE..0FCF ; N # So [2] TIBETAN SIGN RDEL NAG RDEL DKAR..TIBETAN SIGN RDEL NAG GSUM +0FD0..0FD4 ; N # Po [5] TIBETAN MARK BSKA- SHOG GI MGO RGYAN..TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA +0FD5..0FD8 ; N # So [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS +0FD9..0FDA ; N # Po [2] TIBETAN MARK LEADING MCHAN RTAGS..TIBETAN MARK TRAILING MCHAN RTAGS +1000..102A ; N # Lo [43] MYANMAR LETTER KA..MYANMAR LETTER AU +102B..102C ; N # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA +102D..1030 ; N # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU +1031 ; N # Mc MYANMAR VOWEL SIGN E +1032..1037 ; N # Mn [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW +1038 ; N # Mc MYANMAR SIGN VISARGA +1039..103A ; N # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT +103B..103C ; N # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA +103D..103E ; N # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA +103F ; N # Lo MYANMAR LETTER GREAT SA +1040..1049 ; N # Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE +104A..104F ; N # Po [6] MYANMAR SIGN LITTLE SECTION..MYANMAR SYMBOL GENITIVE +1050..1055 ; N # Lo [6] MYANMAR LETTER SHA..MYANMAR LETTER VOCALIC LL +1056..1057 ; N # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR +1058..1059 ; N # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL +105A..105D ; N # Lo [4] MYANMAR LETTER MON NGA..MYANMAR LETTER MON BBE +105E..1060 ; N # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA +1061 ; N # Lo MYANMAR LETTER SGAW KAREN SHA +1062..1064 ; N # Mc [3] MYANMAR VOWEL SIGN SGAW KAREN EU..MYANMAR TONE MARK SGAW KAREN KE PHO +1065..1066 ; N # Lo [2] MYANMAR LETTER WESTERN PWO KAREN THA..MYANMAR LETTER WESTERN PWO KAREN PWA +1067..106D ; N # Mc [7] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR SIGN WESTERN PWO KAREN TONE-5 +106E..1070 ; N # Lo [3] MYANMAR LETTER EASTERN PWO KAREN NNA..MYANMAR LETTER EASTERN PWO KAREN GHWA +1071..1074 ; N # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE +1075..1081 ; N # Lo [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA +1082 ; N # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA +1083..1084 ; N # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E +1085..1086 ; N # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y +1087..108C ; N # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3 +108D ; N # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE +108E ; N # Lo MYANMAR LETTER RUMAI PALAUNG FA +108F ; N # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5 +1090..1099 ; N # Nd [10] MYANMAR SHAN DIGIT ZERO..MYANMAR SHAN DIGIT NINE +109A..109C ; N # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A +109D ; N # Mn MYANMAR VOWEL SIGN AITON AI +109E..109F ; N # So [2] MYANMAR SYMBOL SHAN ONE..MYANMAR SYMBOL SHAN EXCLAMATION +10A0..10C5 ; N # Lu [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; N # Lu GEORGIAN CAPITAL LETTER YN +10CD ; N # Lu GEORGIAN CAPITAL LETTER AEN +10D0..10FA ; N # Ll [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN +10FB ; N # Po GEORGIAN PARAGRAPH SEPARATOR +10FC ; N # Lm MODIFIER LETTER GEORGIAN NAR +10FD..10FF ; N # Ll [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN +1100..115F ; W # Lo [96] HANGUL CHOSEONG KIYEOK..HANGUL CHOSEONG FILLER +1160..11FF ; N # Lo [160] HANGUL JUNGSEONG FILLER..HANGUL JONGSEONG SSANGNIEUN +1200..1248 ; N # Lo [73] ETHIOPIC SYLLABLE HA..ETHIOPIC SYLLABLE QWA +124A..124D ; N # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE +1250..1256 ; N # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO +1258 ; N # Lo ETHIOPIC SYLLABLE QHWA +125A..125D ; N # Lo [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE +1260..1288 ; N # Lo [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA +128A..128D ; N # Lo [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE +1290..12B0 ; N # Lo [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA +12B2..12B5 ; N # Lo [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE +12B8..12BE ; N # Lo [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO +12C0 ; N # Lo ETHIOPIC SYLLABLE KXWA +12C2..12C5 ; N # Lo [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE +12C8..12D6 ; N # Lo [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O +12D8..1310 ; N # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA +1312..1315 ; N # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE +1318..135A ; N # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA +135D..135F ; N # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK +1360..1368 ; N # Po [9] ETHIOPIC SECTION MARK..ETHIOPIC PARAGRAPH SEPARATOR +1369..137C ; N # No [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND +1380..138F ; N # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE +1390..1399 ; N # So [10] ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MARK KURT +13A0..13F5 ; N # Lu [86] CHEROKEE LETTER A..CHEROKEE LETTER MV +13F8..13FD ; N # Ll [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV +1400 ; N # Pd CANADIAN SYLLABICS HYPHEN +1401..166C ; N # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA +166D ; N # So CANADIAN SYLLABICS CHI SIGN +166E ; N # Po CANADIAN SYLLABICS FULL STOP +166F..167F ; N # Lo [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W +1680 ; N # Zs OGHAM SPACE MARK +1681..169A ; N # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH +169B ; N # Ps OGHAM FEATHER MARK +169C ; N # Pe OGHAM REVERSED FEATHER MARK +16A0..16EA ; N # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X +16EB..16ED ; N # Po [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION +16EE..16F0 ; N # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL +16F1..16F8 ; N # Lo [8] RUNIC LETTER K..RUNIC LETTER FRANKS CASKET AESC +1700..1711 ; N # Lo [18] TAGALOG LETTER A..TAGALOG LETTER HA +1712..1714 ; N # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA +1715 ; N # Mc TAGALOG SIGN PAMUDPOD +171F ; N # Lo TAGALOG LETTER ARCHAIC RA +1720..1731 ; N # Lo [18] HANUNOO LETTER A..HANUNOO LETTER HA +1732..1733 ; N # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U +1734 ; N # Mc HANUNOO SIGN PAMUDPOD +1735..1736 ; N # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION +1740..1751 ; N # Lo [18] BUHID LETTER A..BUHID LETTER HA +1752..1753 ; N # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U +1760..176C ; N # Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA +176E..1770 ; N # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA +1772..1773 ; N # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +1780..17B3 ; N # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU +17B4..17B5 ; N # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B6 ; N # Mc KHMER VOWEL SIGN AA +17B7..17BD ; N # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA +17BE..17C5 ; N # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU +17C6 ; N # Mn KHMER SIGN NIKAHIT +17C7..17C8 ; N # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU +17C9..17D3 ; N # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT +17D4..17D6 ; N # Po [3] KHMER SIGN KHAN..KHMER SIGN CAMNUC PII KUUH +17D7 ; N # Lm KHMER SIGN LEK TOO +17D8..17DA ; N # Po [3] KHMER SIGN BEYYAL..KHMER SIGN KOOMUUT +17DB ; N # Sc KHMER CURRENCY SYMBOL RIEL +17DC ; N # Lo KHMER SIGN AVAKRAHASANYA +17DD ; N # Mn KHMER SIGN ATTHACAN +17E0..17E9 ; N # Nd [10] KHMER DIGIT ZERO..KHMER DIGIT NINE +17F0..17F9 ; N # No [10] KHMER SYMBOL LEK ATTAK SON..KHMER SYMBOL LEK ATTAK PRAM-BUON +1800..1805 ; N # Po [6] MONGOLIAN BIRGA..MONGOLIAN FOUR DOTS +1806 ; N # Pd MONGOLIAN TODO SOFT HYPHEN +1807..180A ; N # Po [4] MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER..MONGOLIAN NIRUGU +180B..180D ; N # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +180E ; N # Cf MONGOLIAN VOWEL SEPARATOR +180F ; N # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR +1810..1819 ; N # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE +1820..1842 ; N # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI +1843 ; N # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN +1844..1878 ; N # Lo [53] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER CHA WITH TWO DOTS +1880..1884 ; N # Lo [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA +1885..1886 ; N # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +1887..18A8 ; N # Lo [34] MONGOLIAN LETTER ALI GALI A..MONGOLIAN LETTER MANCHU ALI GALI BHA +18A9 ; N # Mn MONGOLIAN LETTER ALI GALI DAGALGA +18AA ; N # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA +18B0..18F5 ; N # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S +1900..191E ; N # Lo [31] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER TRA +1920..1922 ; N # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U +1923..1926 ; N # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU +1927..1928 ; N # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O +1929..192B ; N # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA +1930..1931 ; N # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA +1932 ; N # Mn LIMBU SMALL LETTER ANUSVARA +1933..1938 ; N # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA +1939..193B ; N # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I +1940 ; N # So LIMBU SIGN LOO +1944..1945 ; N # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK +1946..194F ; N # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE +1950..196D ; N # Lo [30] TAI LE LETTER KA..TAI LE LETTER AI +1970..1974 ; N # Lo [5] TAI LE LETTER TONE-2..TAI LE LETTER TONE-6 +1980..19AB ; N # Lo [44] NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW SUA +19B0..19C9 ; N # Lo [26] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE TONE MARK-2 +19D0..19D9 ; N # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE +19DA ; N # No NEW TAI LUE THAM DIGIT ONE +19DE..19DF ; N # So [2] NEW TAI LUE SIGN LAE..NEW TAI LUE SIGN LAEV +19E0..19FF ; N # So [32] KHMER SYMBOL PATHAMASAT..KHMER SYMBOL DAP-PRAM ROC +1A00..1A16 ; N # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA +1A17..1A18 ; N # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U +1A19..1A1A ; N # Mc [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O +1A1B ; N # Mn BUGINESE VOWEL SIGN AE +1A1E..1A1F ; N # Po [2] BUGINESE PALLAWA..BUGINESE END OF SECTION +1A20..1A54 ; N # Lo [53] TAI THAM LETTER HIGH KA..TAI THAM LETTER GREAT SA +1A55 ; N # Mc TAI THAM CONSONANT SIGN MEDIAL RA +1A56 ; N # Mn TAI THAM CONSONANT SIGN MEDIAL LA +1A57 ; N # Mc TAI THAM CONSONANT SIGN LA TANG LAI +1A58..1A5E ; N # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA +1A60 ; N # Mn TAI THAM SIGN SAKOT +1A61 ; N # Mc TAI THAM VOWEL SIGN A +1A62 ; N # Mn TAI THAM VOWEL SIGN MAI SAT +1A63..1A64 ; N # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA +1A65..1A6C ; N # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW +1A6D..1A72 ; N # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI +1A73..1A7C ; N # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN +1A7F ; N # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT +1A80..1A89 ; N # Nd [10] TAI THAM HORA DIGIT ZERO..TAI THAM HORA DIGIT NINE +1A90..1A99 ; N # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE +1AA0..1AA6 ; N # Po [7] TAI THAM SIGN WIANG..TAI THAM SIGN REVERSED ROTATED RANA +1AA7 ; N # Lm TAI THAM SIGN MAI YAMOK +1AA8..1AAD ; N # Po [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG +1AB0..1ABD ; N # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW +1ABE ; N # Me COMBINING PARENTHESES OVERLAY +1ABF..1ACE ; N # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1B00..1B03 ; N # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG +1B04 ; N # Mc BALINESE SIGN BISAH +1B05..1B33 ; N # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA +1B34 ; N # Mn BALINESE SIGN REREKAN +1B35 ; N # Mc BALINESE VOWEL SIGN TEDUNG +1B36..1B3A ; N # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA +1B3B ; N # Mc BALINESE VOWEL SIGN RA REPA TEDUNG +1B3C ; N # Mn BALINESE VOWEL SIGN LA LENGA +1B3D..1B41 ; N # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG +1B42 ; N # Mn BALINESE VOWEL SIGN PEPET +1B43..1B44 ; N # Mc [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG +1B45..1B4C ; N # Lo [8] BALINESE LETTER KAF SASAK..BALINESE LETTER ARCHAIC JNYA +1B50..1B59 ; N # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE +1B5A..1B60 ; N # Po [7] BALINESE PANTI..BALINESE PAMENENG +1B61..1B6A ; N # So [10] BALINESE MUSICAL SYMBOL DONG..BALINESE MUSICAL SYMBOL DANG GEDE +1B6B..1B73 ; N # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG +1B74..1B7C ; N # So [9] BALINESE MUSICAL SYMBOL RIGHT-HAND OPEN DUG..BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING +1B7D..1B7E ; N # Po [2] BALINESE PANTI LANTANG..BALINESE PAMADA LANTANG +1B80..1B81 ; N # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR +1B82 ; N # Mc SUNDANESE SIGN PANGWISAD +1B83..1BA0 ; N # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA +1BA1 ; N # Mc SUNDANESE CONSONANT SIGN PAMINGKAL +1BA2..1BA5 ; N # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU +1BA6..1BA7 ; N # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG +1BA8..1BA9 ; N # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAA ; N # Mc SUNDANESE SIGN PAMAAEH +1BAB..1BAD ; N # Mn [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA +1BAE..1BAF ; N # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA +1BB0..1BB9 ; N # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE +1BBA..1BBF ; N # Lo [6] SUNDANESE AVAGRAHA..SUNDANESE LETTER FINAL M +1BC0..1BE5 ; N # Lo [38] BATAK LETTER A..BATAK LETTER U +1BE6 ; N # Mn BATAK SIGN TOMPI +1BE7 ; N # Mc BATAK VOWEL SIGN E +1BE8..1BE9 ; N # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE +1BEA..1BEC ; N # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O +1BED ; N # Mn BATAK VOWEL SIGN KARO O +1BEE ; N # Mc BATAK VOWEL SIGN U +1BEF..1BF1 ; N # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H +1BF2..1BF3 ; N # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN +1BFC..1BFF ; N # Po [4] BATAK SYMBOL BINDU NA METEK..BATAK SYMBOL BINDU PANGOLAT +1C00..1C23 ; N # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A +1C24..1C2B ; N # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU +1C2C..1C33 ; N # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T +1C34..1C35 ; N # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG +1C36..1C37 ; N # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA +1C3B..1C3F ; N # Po [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK +1C40..1C49 ; N # Nd [10] LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE +1C4D..1C4F ; N # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA +1C50..1C59 ; N # Nd [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE +1C5A..1C77 ; N # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH +1C78..1C7D ; N # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD +1C7E..1C7F ; N # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD +1C80..1C88 ; N # Ll [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C90..1CBA ; N # Lu [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN +1CBD..1CBF ; N # Lu [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN +1CC0..1CC7 ; N # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA +1CD0..1CD2 ; N # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA +1CD3 ; N # Po VEDIC SIGN NIHSHVASA +1CD4..1CE0 ; N # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA +1CE1 ; N # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA +1CE2..1CE8 ; N # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL +1CE9..1CEC ; N # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL +1CED ; N # Mn VEDIC SIGN TIRYAK +1CEE..1CF3 ; N # Lo [6] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF4 ; N # Mn VEDIC TONE CANDRA ABOVE +1CF5..1CF6 ; N # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA +1CF7 ; N # Mc VEDIC SIGN ATIKRAMA +1CF8..1CF9 ; N # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE +1CFA ; N # Lo VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA +1D00..1D2B ; N # Ll [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL +1D2C..1D6A ; N # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; N # Ll [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G +1D78 ; N # Lm MODIFIER LETTER CYRILLIC EN +1D79..1D7F ; N # Ll [7] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER UPSILON WITH STROKE +1D80..1D9A ; N # Ll [27] LATIN SMALL LETTER B WITH PALATAL HOOK..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK +1D9B..1DBF ; N # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA +1DC0..1DFF ; N # Mn [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW +1E00..1EFF ; N # L& [256] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER Y WITH LOOP +1F00..1F15 ; N # L& [22] GREEK SMALL LETTER ALPHA WITH PSILI..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA +1F18..1F1D ; N # Lu [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F20..1F45 ; N # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA +1F48..1F4D ; N # Lu [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F50..1F57 ; N # Ll [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F59 ; N # Lu GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B ; N # Lu GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D ; N # Lu GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F..1F7D ; N # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA +1F80..1FB4 ; N # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6..1FBC ; N # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBD ; N # Sk GREEK KORONIS +1FBE ; N # Ll GREEK PROSGEGRAMMENI +1FBF..1FC1 ; N # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI +1FC2..1FC4 ; N # Ll [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6..1FCC ; N # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FCD..1FCF ; N # Sk [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI +1FD0..1FD3 ; N # Ll [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6..1FDB ; N # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA +1FDD..1FDF ; N # Sk [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI +1FE0..1FEC ; N # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA +1FED..1FEF ; N # Sk [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA +1FF2..1FF4 ; N # Ll [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6..1FFC ; N # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +1FFD..1FFE ; N # Sk [2] GREEK OXIA..GREEK DASIA +2000..200A ; N # Zs [11] EN QUAD..HAIR SPACE +200B..200F ; N # Cf [5] ZERO WIDTH SPACE..RIGHT-TO-LEFT MARK +2010 ; A # Pd HYPHEN +2011..2012 ; N # Pd [2] NON-BREAKING HYPHEN..FIGURE DASH +2013..2015 ; A # Pd [3] EN DASH..HORIZONTAL BAR +2016 ; A # Po DOUBLE VERTICAL LINE +2017 ; N # Po DOUBLE LOW LINE +2018 ; A # Pi LEFT SINGLE QUOTATION MARK +2019 ; A # Pf RIGHT SINGLE QUOTATION MARK +201A ; N # Ps SINGLE LOW-9 QUOTATION MARK +201B ; N # Pi SINGLE HIGH-REVERSED-9 QUOTATION MARK +201C ; A # Pi LEFT DOUBLE QUOTATION MARK +201D ; A # Pf RIGHT DOUBLE QUOTATION MARK +201E ; N # Ps DOUBLE LOW-9 QUOTATION MARK +201F ; N # Pi DOUBLE HIGH-REVERSED-9 QUOTATION MARK +2020..2022 ; A # Po [3] DAGGER..BULLET +2023 ; N # Po TRIANGULAR BULLET +2024..2027 ; A # Po [4] ONE DOT LEADER..HYPHENATION POINT +2028 ; N # Zl LINE SEPARATOR +2029 ; N # Zp PARAGRAPH SEPARATOR +202A..202E ; N # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE +202F ; N # Zs NARROW NO-BREAK SPACE +2030 ; A # Po PER MILLE SIGN +2031 ; N # Po PER TEN THOUSAND SIGN +2032..2033 ; A # Po [2] PRIME..DOUBLE PRIME +2034 ; N # Po TRIPLE PRIME +2035 ; A # Po REVERSED PRIME +2036..2038 ; N # Po [3] REVERSED DOUBLE PRIME..CARET +2039 ; N # Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK +203A ; N # Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +203B ; A # Po REFERENCE MARK +203C..203D ; N # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG +203E ; A # Po OVERLINE +203F..2040 ; N # Pc [2] UNDERTIE..CHARACTER TIE +2041..2043 ; N # Po [3] CARET INSERTION POINT..HYPHEN BULLET +2044 ; N # Sm FRACTION SLASH +2045 ; N # Ps LEFT SQUARE BRACKET WITH QUILL +2046 ; N # Pe RIGHT SQUARE BRACKET WITH QUILL +2047..2051 ; N # Po [11] DOUBLE QUESTION MARK..TWO ASTERISKS ALIGNED VERTICALLY +2052 ; N # Sm COMMERCIAL MINUS SIGN +2053 ; N # Po SWUNG DASH +2054 ; N # Pc INVERTED UNDERTIE +2055..205E ; N # Po [10] FLOWER PUNCTUATION MARK..VERTICAL FOUR DOTS +205F ; N # Zs MEDIUM MATHEMATICAL SPACE +2060..2064 ; N # Cf [5] WORD JOINER..INVISIBLE PLUS +2066..206F ; N # Cf [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIGIT SHAPES +2070 ; N # No SUPERSCRIPT ZERO +2071 ; N # Lm SUPERSCRIPT LATIN SMALL LETTER I +2074 ; A # No SUPERSCRIPT FOUR +2075..2079 ; N # No [5] SUPERSCRIPT FIVE..SUPERSCRIPT NINE +207A..207C ; N # Sm [3] SUPERSCRIPT PLUS SIGN..SUPERSCRIPT EQUALS SIGN +207D ; N # Ps SUPERSCRIPT LEFT PARENTHESIS +207E ; N # Pe SUPERSCRIPT RIGHT PARENTHESIS +207F ; A # Lm SUPERSCRIPT LATIN SMALL LETTER N +2080 ; N # No SUBSCRIPT ZERO +2081..2084 ; A # No [4] SUBSCRIPT ONE..SUBSCRIPT FOUR +2085..2089 ; N # No [5] SUBSCRIPT FIVE..SUBSCRIPT NINE +208A..208C ; N # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN +208D ; N # Ps SUBSCRIPT LEFT PARENTHESIS +208E ; N # Pe SUBSCRIPT RIGHT PARENTHESIS +2090..209C ; N # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +20A0..20A8 ; N # Sc [9] EURO-CURRENCY SIGN..RUPEE SIGN +20A9 ; H # Sc WON SIGN +20AA..20AB ; N # Sc [2] NEW SHEQEL SIGN..DONG SIGN +20AC ; A # Sc EURO SIGN +20AD..20C0 ; N # Sc [20] KIP SIGN..SOM SIGN +20D0..20DC ; N # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE +20DD..20E0 ; N # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH +20E1 ; N # Mn COMBINING LEFT RIGHT ARROW ABOVE +20E2..20E4 ; N # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE +20E5..20F0 ; N # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE +2100..2101 ; N # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT +2102 ; N # Lu DOUBLE-STRUCK CAPITAL C +2103 ; A # So DEGREE CELSIUS +2104 ; N # So CENTRE LINE SYMBOL +2105 ; A # So CARE OF +2106 ; N # So CADA UNA +2107 ; N # Lu EULER CONSTANT +2108 ; N # So SCRUPLE +2109 ; A # So DEGREE FAHRENHEIT +210A..2112 ; N # L& [9] SCRIPT SMALL G..SCRIPT CAPITAL L +2113 ; A # Ll SCRIPT SMALL L +2114 ; N # So L B BAR SYMBOL +2115 ; N # Lu DOUBLE-STRUCK CAPITAL N +2116 ; A # So NUMERO SIGN +2117 ; N # So SOUND RECORDING COPYRIGHT +2118 ; N # Sm SCRIPT CAPITAL P +2119..211D ; N # Lu [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R +211E..2120 ; N # So [3] PRESCRIPTION TAKE..SERVICE MARK +2121..2122 ; A # So [2] TELEPHONE SIGN..TRADE MARK SIGN +2123 ; N # So VERSICLE +2124 ; N # Lu DOUBLE-STRUCK CAPITAL Z +2125 ; N # So OUNCE SIGN +2126 ; A # Lu OHM SIGN +2127 ; N # So INVERTED OHM SIGN +2128 ; N # Lu BLACK-LETTER CAPITAL Z +2129 ; N # So TURNED GREEK SMALL LETTER IOTA +212A ; N # Lu KELVIN SIGN +212B ; A # Lu ANGSTROM SIGN +212C..212D ; N # Lu [2] SCRIPT CAPITAL B..BLACK-LETTER CAPITAL C +212E ; N # So ESTIMATED SYMBOL +212F..2134 ; N # L& [6] SCRIPT SMALL E..SCRIPT SMALL O +2135..2138 ; N # Lo [4] ALEF SYMBOL..DALET SYMBOL +2139 ; N # Ll INFORMATION SOURCE +213A..213B ; N # So [2] ROTATED CAPITAL Q..FACSIMILE SIGN +213C..213F ; N # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI +2140..2144 ; N # Sm [5] DOUBLE-STRUCK N-ARY SUMMATION..TURNED SANS-SERIF CAPITAL Y +2145..2149 ; N # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J +214A ; N # So PROPERTY LINE +214B ; N # Sm TURNED AMPERSAND +214C..214D ; N # So [2] PER SIGN..AKTIESELSKAB +214E ; N # Ll TURNED SMALL F +214F ; N # So SYMBOL FOR SAMARITAN SOURCE +2150..2152 ; N # No [3] VULGAR FRACTION ONE SEVENTH..VULGAR FRACTION ONE TENTH +2153..2154 ; A # No [2] VULGAR FRACTION ONE THIRD..VULGAR FRACTION TWO THIRDS +2155..215A ; N # No [6] VULGAR FRACTION ONE FIFTH..VULGAR FRACTION FIVE SIXTHS +215B..215E ; A # No [4] VULGAR FRACTION ONE EIGHTH..VULGAR FRACTION SEVEN EIGHTHS +215F ; N # No FRACTION NUMERATOR ONE +2160..216B ; A # Nl [12] ROMAN NUMERAL ONE..ROMAN NUMERAL TWELVE +216C..216F ; N # Nl [4] ROMAN NUMERAL FIFTY..ROMAN NUMERAL ONE THOUSAND +2170..2179 ; A # Nl [10] SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL TEN +217A..2182 ; N # Nl [9] SMALL ROMAN NUMERAL ELEVEN..ROMAN NUMERAL TEN THOUSAND +2183..2184 ; N # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C +2185..2188 ; N # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND +2189 ; A # No VULGAR FRACTION ZERO THIRDS +218A..218B ; N # So [2] TURNED DIGIT TWO..TURNED DIGIT THREE +2190..2194 ; A # Sm [5] LEFTWARDS ARROW..LEFT RIGHT ARROW +2195..2199 ; A # So [5] UP DOWN ARROW..SOUTH WEST ARROW +219A..219B ; N # Sm [2] LEFTWARDS ARROW WITH STROKE..RIGHTWARDS ARROW WITH STROKE +219C..219F ; N # So [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW +21A0 ; N # Sm RIGHTWARDS TWO HEADED ARROW +21A1..21A2 ; N # So [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL +21A3 ; N # Sm RIGHTWARDS ARROW WITH TAIL +21A4..21A5 ; N # So [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR +21A6 ; N # Sm RIGHTWARDS ARROW FROM BAR +21A7..21AD ; N # So [7] DOWNWARDS ARROW FROM BAR..LEFT RIGHT WAVE ARROW +21AE ; N # Sm LEFT RIGHT ARROW WITH STROKE +21AF..21B7 ; N # So [9] DOWNWARDS ZIGZAG ARROW..CLOCKWISE TOP SEMICIRCLE ARROW +21B8..21B9 ; A # So [2] NORTH WEST ARROW TO LONG BAR..LEFTWARDS ARROW TO BAR OVER RIGHTWARDS ARROW TO BAR +21BA..21CD ; N # So [20] ANTICLOCKWISE OPEN CIRCLE ARROW..LEFTWARDS DOUBLE ARROW WITH STROKE +21CE..21CF ; N # Sm [2] LEFT RIGHT DOUBLE ARROW WITH STROKE..RIGHTWARDS DOUBLE ARROW WITH STROKE +21D0..21D1 ; N # So [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW +21D2 ; A # Sm RIGHTWARDS DOUBLE ARROW +21D3 ; N # So DOWNWARDS DOUBLE ARROW +21D4 ; A # Sm LEFT RIGHT DOUBLE ARROW +21D5..21E6 ; N # So [18] UP DOWN DOUBLE ARROW..LEFTWARDS WHITE ARROW +21E7 ; A # So UPWARDS WHITE ARROW +21E8..21F3 ; N # So [12] RIGHTWARDS WHITE ARROW..UP DOWN WHITE ARROW +21F4..21FF ; N # Sm [12] RIGHT ARROW WITH SMALL CIRCLE..LEFT RIGHT OPEN-HEADED ARROW +2200 ; A # Sm FOR ALL +2201 ; N # Sm COMPLEMENT +2202..2203 ; A # Sm [2] PARTIAL DIFFERENTIAL..THERE EXISTS +2204..2206 ; N # Sm [3] THERE DOES NOT EXIST..INCREMENT +2207..2208 ; A # Sm [2] NABLA..ELEMENT OF +2209..220A ; N # Sm [2] NOT AN ELEMENT OF..SMALL ELEMENT OF +220B ; A # Sm CONTAINS AS MEMBER +220C..220E ; N # Sm [3] DOES NOT CONTAIN AS MEMBER..END OF PROOF +220F ; A # Sm N-ARY PRODUCT +2210 ; N # Sm N-ARY COPRODUCT +2211 ; A # Sm N-ARY SUMMATION +2212..2214 ; N # Sm [3] MINUS SIGN..DOT PLUS +2215 ; A # Sm DIVISION SLASH +2216..2219 ; N # Sm [4] SET MINUS..BULLET OPERATOR +221A ; A # Sm SQUARE ROOT +221B..221C ; N # Sm [2] CUBE ROOT..FOURTH ROOT +221D..2220 ; A # Sm [4] PROPORTIONAL TO..ANGLE +2221..2222 ; N # Sm [2] MEASURED ANGLE..SPHERICAL ANGLE +2223 ; A # Sm DIVIDES +2224 ; N # Sm DOES NOT DIVIDE +2225 ; A # Sm PARALLEL TO +2226 ; N # Sm NOT PARALLEL TO +2227..222C ; A # Sm [6] LOGICAL AND..DOUBLE INTEGRAL +222D ; N # Sm TRIPLE INTEGRAL +222E ; A # Sm CONTOUR INTEGRAL +222F..2233 ; N # Sm [5] SURFACE INTEGRAL..ANTICLOCKWISE CONTOUR INTEGRAL +2234..2237 ; A # Sm [4] THEREFORE..PROPORTION +2238..223B ; N # Sm [4] DOT MINUS..HOMOTHETIC +223C..223D ; A # Sm [2] TILDE OPERATOR..REVERSED TILDE +223E..2247 ; N # Sm [10] INVERTED LAZY S..NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO +2248 ; A # Sm ALMOST EQUAL TO +2249..224B ; N # Sm [3] NOT ALMOST EQUAL TO..TRIPLE TILDE +224C ; A # Sm ALL EQUAL TO +224D..2251 ; N # Sm [5] EQUIVALENT TO..GEOMETRICALLY EQUAL TO +2252 ; A # Sm APPROXIMATELY EQUAL TO OR THE IMAGE OF +2253..225F ; N # Sm [13] IMAGE OF OR APPROXIMATELY EQUAL TO..QUESTIONED EQUAL TO +2260..2261 ; A # Sm [2] NOT EQUAL TO..IDENTICAL TO +2262..2263 ; N # Sm [2] NOT IDENTICAL TO..STRICTLY EQUIVALENT TO +2264..2267 ; A # Sm [4] LESS-THAN OR EQUAL TO..GREATER-THAN OVER EQUAL TO +2268..2269 ; N # Sm [2] LESS-THAN BUT NOT EQUAL TO..GREATER-THAN BUT NOT EQUAL TO +226A..226B ; A # Sm [2] MUCH LESS-THAN..MUCH GREATER-THAN +226C..226D ; N # Sm [2] BETWEEN..NOT EQUIVALENT TO +226E..226F ; A # Sm [2] NOT LESS-THAN..NOT GREATER-THAN +2270..2281 ; N # Sm [18] NEITHER LESS-THAN NOR EQUAL TO..DOES NOT SUCCEED +2282..2283 ; A # Sm [2] SUBSET OF..SUPERSET OF +2284..2285 ; N # Sm [2] NOT A SUBSET OF..NOT A SUPERSET OF +2286..2287 ; A # Sm [2] SUBSET OF OR EQUAL TO..SUPERSET OF OR EQUAL TO +2288..2294 ; N # Sm [13] NEITHER A SUBSET OF NOR EQUAL TO..SQUARE CUP +2295 ; A # Sm CIRCLED PLUS +2296..2298 ; N # Sm [3] CIRCLED MINUS..CIRCLED DIVISION SLASH +2299 ; A # Sm CIRCLED DOT OPERATOR +229A..22A4 ; N # Sm [11] CIRCLED RING OPERATOR..DOWN TACK +22A5 ; A # Sm UP TACK +22A6..22BE ; N # Sm [25] ASSERTION..RIGHT ANGLE WITH ARC +22BF ; A # Sm RIGHT TRIANGLE +22C0..22FF ; N # Sm [64] N-ARY LOGICAL AND..Z NOTATION BAG MEMBERSHIP +2300..2307 ; N # So [8] DIAMETER SIGN..WAVY LINE +2308 ; N # Ps LEFT CEILING +2309 ; N # Pe RIGHT CEILING +230A ; N # Ps LEFT FLOOR +230B ; N # Pe RIGHT FLOOR +230C..2311 ; N # So [6] BOTTOM RIGHT CROP..SQUARE LOZENGE +2312 ; A # So ARC +2313..2319 ; N # So [7] SEGMENT..TURNED NOT SIGN +231A..231B ; W # So [2] WATCH..HOURGLASS +231C..231F ; N # So [4] TOP LEFT CORNER..BOTTOM RIGHT CORNER +2320..2321 ; N # Sm [2] TOP HALF INTEGRAL..BOTTOM HALF INTEGRAL +2322..2328 ; N # So [7] FROWN..KEYBOARD +2329 ; W # Ps LEFT-POINTING ANGLE BRACKET +232A ; W # Pe RIGHT-POINTING ANGLE BRACKET +232B..237B ; N # So [81] ERASE TO THE LEFT..NOT CHECK MARK +237C ; N # Sm RIGHT ANGLE WITH DOWNWARDS ZIGZAG ARROW +237D..239A ; N # So [30] SHOULDERED OPEN BOX..CLEAR SCREEN SYMBOL +239B..23B3 ; N # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM +23B4..23DB ; N # So [40] TOP SQUARE BRACKET..FUSE +23DC..23E1 ; N # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET +23E2..23E8 ; N # So [7] WHITE TRAPEZIUM..DECIMAL EXPONENT SYMBOL +23E9..23EC ; W # So [4] BLACK RIGHT-POINTING DOUBLE TRIANGLE..BLACK DOWN-POINTING DOUBLE TRIANGLE +23ED..23EF ; N # So [3] BLACK RIGHT-POINTING DOUBLE TRIANGLE WITH VERTICAL BAR..BLACK RIGHT-POINTING TRIANGLE WITH DOUBLE VERTICAL BAR +23F0 ; W # So ALARM CLOCK +23F1..23F2 ; N # So [2] STOPWATCH..TIMER CLOCK +23F3 ; W # So HOURGLASS WITH FLOWING SAND +23F4..23FF ; N # So [12] BLACK MEDIUM LEFT-POINTING TRIANGLE..OBSERVER EYE SYMBOL +2400..2426 ; N # So [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO +2440..244A ; N # So [11] OCR HOOK..OCR DOUBLE BACKSLASH +2460..249B ; A # No [60] CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP +249C..24E9 ; A # So [78] PARENTHESIZED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z +24EA ; N # No CIRCLED DIGIT ZERO +24EB..24FF ; A # No [21] NEGATIVE CIRCLED NUMBER ELEVEN..NEGATIVE CIRCLED DIGIT ZERO +2500..254B ; A # So [76] BOX DRAWINGS LIGHT HORIZONTAL..BOX DRAWINGS HEAVY VERTICAL AND HORIZONTAL +254C..254F ; N # So [4] BOX DRAWINGS LIGHT DOUBLE DASH HORIZONTAL..BOX DRAWINGS HEAVY DOUBLE DASH VERTICAL +2550..2573 ; A # So [36] BOX DRAWINGS DOUBLE HORIZONTAL..BOX DRAWINGS LIGHT DIAGONAL CROSS +2574..257F ; N # So [12] BOX DRAWINGS LIGHT LEFT..BOX DRAWINGS HEAVY UP AND LIGHT DOWN +2580..258F ; A # So [16] UPPER HALF BLOCK..LEFT ONE EIGHTH BLOCK +2590..2591 ; N # So [2] RIGHT HALF BLOCK..LIGHT SHADE +2592..2595 ; A # So [4] MEDIUM SHADE..RIGHT ONE EIGHTH BLOCK +2596..259F ; N # So [10] QUADRANT LOWER LEFT..QUADRANT UPPER RIGHT AND LOWER LEFT AND LOWER RIGHT +25A0..25A1 ; A # So [2] BLACK SQUARE..WHITE SQUARE +25A2 ; N # So WHITE SQUARE WITH ROUNDED CORNERS +25A3..25A9 ; A # So [7] WHITE SQUARE CONTAINING BLACK SMALL SQUARE..SQUARE WITH DIAGONAL CROSSHATCH FILL +25AA..25B1 ; N # So [8] BLACK SMALL SQUARE..WHITE PARALLELOGRAM +25B2..25B3 ; A # So [2] BLACK UP-POINTING TRIANGLE..WHITE UP-POINTING TRIANGLE +25B4..25B5 ; N # So [2] BLACK UP-POINTING SMALL TRIANGLE..WHITE UP-POINTING SMALL TRIANGLE +25B6 ; A # So BLACK RIGHT-POINTING TRIANGLE +25B7 ; A # Sm WHITE RIGHT-POINTING TRIANGLE +25B8..25BB ; N # So [4] BLACK RIGHT-POINTING SMALL TRIANGLE..WHITE RIGHT-POINTING POINTER +25BC..25BD ; A # So [2] BLACK DOWN-POINTING TRIANGLE..WHITE DOWN-POINTING TRIANGLE +25BE..25BF ; N # So [2] BLACK DOWN-POINTING SMALL TRIANGLE..WHITE DOWN-POINTING SMALL TRIANGLE +25C0 ; A # So BLACK LEFT-POINTING TRIANGLE +25C1 ; A # Sm WHITE LEFT-POINTING TRIANGLE +25C2..25C5 ; N # So [4] BLACK LEFT-POINTING SMALL TRIANGLE..WHITE LEFT-POINTING POINTER +25C6..25C8 ; A # So [3] BLACK DIAMOND..WHITE DIAMOND CONTAINING BLACK SMALL DIAMOND +25C9..25CA ; N # So [2] FISHEYE..LOZENGE +25CB ; A # So WHITE CIRCLE +25CC..25CD ; N # So [2] DOTTED CIRCLE..CIRCLE WITH VERTICAL FILL +25CE..25D1 ; A # So [4] BULLSEYE..CIRCLE WITH RIGHT HALF BLACK +25D2..25E1 ; N # So [16] CIRCLE WITH LOWER HALF BLACK..LOWER HALF CIRCLE +25E2..25E5 ; A # So [4] BLACK LOWER RIGHT TRIANGLE..BLACK UPPER RIGHT TRIANGLE +25E6..25EE ; N # So [9] WHITE BULLET..UP-POINTING TRIANGLE WITH RIGHT HALF BLACK +25EF ; A # So LARGE CIRCLE +25F0..25F7 ; N # So [8] WHITE SQUARE WITH UPPER LEFT QUADRANT..WHITE CIRCLE WITH UPPER RIGHT QUADRANT +25F8..25FC ; N # Sm [5] UPPER LEFT TRIANGLE..BLACK MEDIUM SQUARE +25FD..25FE ; W # Sm [2] WHITE MEDIUM SMALL SQUARE..BLACK MEDIUM SMALL SQUARE +25FF ; N # Sm LOWER RIGHT TRIANGLE +2600..2604 ; N # So [5] BLACK SUN WITH RAYS..COMET +2605..2606 ; A # So [2] BLACK STAR..WHITE STAR +2607..2608 ; N # So [2] LIGHTNING..THUNDERSTORM +2609 ; A # So SUN +260A..260D ; N # So [4] ASCENDING NODE..OPPOSITION +260E..260F ; A # So [2] BLACK TELEPHONE..WHITE TELEPHONE +2610..2613 ; N # So [4] BALLOT BOX..SALTIRE +2614..2615 ; W # So [2] UMBRELLA WITH RAIN DROPS..HOT BEVERAGE +2616..261B ; N # So [6] WHITE SHOGI PIECE..BLACK RIGHT POINTING INDEX +261C ; A # So WHITE LEFT POINTING INDEX +261D ; N # So WHITE UP POINTING INDEX +261E ; A # So WHITE RIGHT POINTING INDEX +261F..263F ; N # So [33] WHITE DOWN POINTING INDEX..MERCURY +2640 ; A # So FEMALE SIGN +2641 ; N # So EARTH +2642 ; A # So MALE SIGN +2643..2647 ; N # So [5] JUPITER..PLUTO +2648..2653 ; W # So [12] ARIES..PISCES +2654..265F ; N # So [12] WHITE CHESS KING..BLACK CHESS PAWN +2660..2661 ; A # So [2] BLACK SPADE SUIT..WHITE HEART SUIT +2662 ; N # So WHITE DIAMOND SUIT +2663..2665 ; A # So [3] BLACK CLUB SUIT..BLACK HEART SUIT +2666 ; N # So BLACK DIAMOND SUIT +2667..266A ; A # So [4] WHITE CLUB SUIT..EIGHTH NOTE +266B ; N # So BEAMED EIGHTH NOTES +266C..266D ; A # So [2] BEAMED SIXTEENTH NOTES..MUSIC FLAT SIGN +266E ; N # So MUSIC NATURAL SIGN +266F ; A # Sm MUSIC SHARP SIGN +2670..267E ; N # So [15] WEST SYRIAC CROSS..PERMANENT PAPER SIGN +267F ; W # So WHEELCHAIR SYMBOL +2680..2692 ; N # So [19] DIE FACE-1..HAMMER AND PICK +2693 ; W # So ANCHOR +2694..269D ; N # So [10] CROSSED SWORDS..OUTLINED WHITE STAR +269E..269F ; A # So [2] THREE LINES CONVERGING RIGHT..THREE LINES CONVERGING LEFT +26A0 ; N # So WARNING SIGN +26A1 ; W # So HIGH VOLTAGE SIGN +26A2..26A9 ; N # So [8] DOUBLED FEMALE SIGN..HORIZONTAL MALE WITH STROKE SIGN +26AA..26AB ; W # So [2] MEDIUM WHITE CIRCLE..MEDIUM BLACK CIRCLE +26AC..26BC ; N # So [17] MEDIUM SMALL WHITE CIRCLE..SESQUIQUADRATE +26BD..26BE ; W # So [2] SOCCER BALL..BASEBALL +26BF ; A # So SQUARED KEY +26C0..26C3 ; N # So [4] WHITE DRAUGHTS MAN..BLACK DRAUGHTS KING +26C4..26C5 ; W # So [2] SNOWMAN WITHOUT SNOW..SUN BEHIND CLOUD +26C6..26CD ; A # So [8] RAIN..DISABLED CAR +26CE ; W # So OPHIUCHUS +26CF..26D3 ; A # So [5] PICK..CHAINS +26D4 ; W # So NO ENTRY +26D5..26E1 ; A # So [13] ALTERNATE ONE-WAY LEFT WAY TRAFFIC..RESTRICTED LEFT ENTRY-2 +26E2 ; N # So ASTRONOMICAL SYMBOL FOR URANUS +26E3 ; A # So HEAVY CIRCLE WITH STROKE AND TWO DOTS ABOVE +26E4..26E7 ; N # So [4] PENTAGRAM..INVERTED PENTAGRAM +26E8..26E9 ; A # So [2] BLACK CROSS ON SHIELD..SHINTO SHRINE +26EA ; W # So CHURCH +26EB..26F1 ; A # So [7] CASTLE..UMBRELLA ON GROUND +26F2..26F3 ; W # So [2] FOUNTAIN..FLAG IN HOLE +26F4 ; A # So FERRY +26F5 ; W # So SAILBOAT +26F6..26F9 ; A # So [4] SQUARE FOUR CORNERS..PERSON WITH BALL +26FA ; W # So TENT +26FB..26FC ; A # So [2] JAPANESE BANK SYMBOL..HEADSTONE GRAVEYARD SYMBOL +26FD ; W # So FUEL PUMP +26FE..26FF ; A # So [2] CUP ON BLACK SQUARE..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE +2700..2704 ; N # So [5] BLACK SAFETY SCISSORS..WHITE SCISSORS +2705 ; W # So WHITE HEAVY CHECK MARK +2706..2709 ; N # So [4] TELEPHONE LOCATION SIGN..ENVELOPE +270A..270B ; W # So [2] RAISED FIST..RAISED HAND +270C..2727 ; N # So [28] VICTORY HAND..WHITE FOUR POINTED STAR +2728 ; W # So SPARKLES +2729..273C ; N # So [20] STRESS OUTLINED WHITE STAR..OPEN CENTRE TEARDROP-SPOKED ASTERISK +273D ; A # So HEAVY TEARDROP-SPOKED ASTERISK +273E..274B ; N # So [14] SIX PETALLED BLACK AND WHITE FLORETTE..HEAVY EIGHT TEARDROP-SPOKED PROPELLER ASTERISK +274C ; W # So CROSS MARK +274D ; N # So SHADOWED WHITE CIRCLE +274E ; W # So NEGATIVE SQUARED CROSS MARK +274F..2752 ; N # So [4] LOWER RIGHT DROP-SHADOWED WHITE SQUARE..UPPER RIGHT SHADOWED WHITE SQUARE +2753..2755 ; W # So [3] BLACK QUESTION MARK ORNAMENT..WHITE EXCLAMATION MARK ORNAMENT +2756 ; N # So BLACK DIAMOND MINUS WHITE X +2757 ; W # So HEAVY EXCLAMATION MARK SYMBOL +2758..2767 ; N # So [16] LIGHT VERTICAL BAR..ROTATED FLORAL HEART BULLET +2768 ; N # Ps MEDIUM LEFT PARENTHESIS ORNAMENT +2769 ; N # Pe MEDIUM RIGHT PARENTHESIS ORNAMENT +276A ; N # Ps MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT +276B ; N # Pe MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT +276C ; N # Ps MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT +276D ; N # Pe MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT +276E ; N # Ps HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT +276F ; N # Pe HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT +2770 ; N # Ps HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT +2771 ; N # Pe HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT +2772 ; N # Ps LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT +2773 ; N # Pe LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT +2774 ; N # Ps MEDIUM LEFT CURLY BRACKET ORNAMENT +2775 ; N # Pe MEDIUM RIGHT CURLY BRACKET ORNAMENT +2776..277F ; A # No [10] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED NUMBER TEN +2780..2793 ; N # No [20] DINGBAT CIRCLED SANS-SERIF DIGIT ONE..DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN +2794 ; N # So HEAVY WIDE-HEADED RIGHTWARDS ARROW +2795..2797 ; W # So [3] HEAVY PLUS SIGN..HEAVY DIVISION SIGN +2798..27AF ; N # So [24] HEAVY SOUTH EAST ARROW..NOTCHED LOWER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW +27B0 ; W # So CURLY LOOP +27B1..27BE ; N # So [14] NOTCHED UPPER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW..OPEN-OUTLINED RIGHTWARDS ARROW +27BF ; W # So DOUBLE CURLY LOOP +27C0..27C4 ; N # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET +27C5 ; N # Ps LEFT S-SHAPED BAG DELIMITER +27C6 ; N # Pe RIGHT S-SHAPED BAG DELIMITER +27C7..27E5 ; N # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK +27E6 ; Na # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET +27E7 ; Na # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET +27E8 ; Na # Ps MATHEMATICAL LEFT ANGLE BRACKET +27E9 ; Na # Pe MATHEMATICAL RIGHT ANGLE BRACKET +27EA ; Na # Ps MATHEMATICAL LEFT DOUBLE ANGLE BRACKET +27EB ; Na # Pe MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET +27EC ; Na # Ps MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET +27ED ; Na # Pe MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET +27EE ; N # Ps MATHEMATICAL LEFT FLATTENED PARENTHESIS +27EF ; N # Pe MATHEMATICAL RIGHT FLATTENED PARENTHESIS +27F0..27FF ; N # Sm [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW +2800..28FF ; N # So [256] BRAILLE PATTERN BLANK..BRAILLE PATTERN DOTS-12345678 +2900..297F ; N # Sm [128] RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE..DOWN FISH TAIL +2980..2982 ; N # Sm [3] TRIPLE VERTICAL BAR DELIMITER..Z NOTATION TYPE COLON +2983 ; N # Ps LEFT WHITE CURLY BRACKET +2984 ; N # Pe RIGHT WHITE CURLY BRACKET +2985 ; Na # Ps LEFT WHITE PARENTHESIS +2986 ; Na # Pe RIGHT WHITE PARENTHESIS +2987 ; N # Ps Z NOTATION LEFT IMAGE BRACKET +2988 ; N # Pe Z NOTATION RIGHT IMAGE BRACKET +2989 ; N # Ps Z NOTATION LEFT BINDING BRACKET +298A ; N # Pe Z NOTATION RIGHT BINDING BRACKET +298B ; N # Ps LEFT SQUARE BRACKET WITH UNDERBAR +298C ; N # Pe RIGHT SQUARE BRACKET WITH UNDERBAR +298D ; N # Ps LEFT SQUARE BRACKET WITH TICK IN TOP CORNER +298E ; N # Pe RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +298F ; N # Ps LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +2990 ; N # Pe RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER +2991 ; N # Ps LEFT ANGLE BRACKET WITH DOT +2992 ; N # Pe RIGHT ANGLE BRACKET WITH DOT +2993 ; N # Ps LEFT ARC LESS-THAN BRACKET +2994 ; N # Pe RIGHT ARC GREATER-THAN BRACKET +2995 ; N # Ps DOUBLE LEFT ARC GREATER-THAN BRACKET +2996 ; N # Pe DOUBLE RIGHT ARC LESS-THAN BRACKET +2997 ; N # Ps LEFT BLACK TORTOISE SHELL BRACKET +2998 ; N # Pe RIGHT BLACK TORTOISE SHELL BRACKET +2999..29D7 ; N # Sm [63] DOTTED FENCE..BLACK HOURGLASS +29D8 ; N # Ps LEFT WIGGLY FENCE +29D9 ; N # Pe RIGHT WIGGLY FENCE +29DA ; N # Ps LEFT DOUBLE WIGGLY FENCE +29DB ; N # Pe RIGHT DOUBLE WIGGLY FENCE +29DC..29FB ; N # Sm [32] INCOMPLETE INFINITY..TRIPLE PLUS +29FC ; N # Ps LEFT-POINTING CURVED ANGLE BRACKET +29FD ; N # Pe RIGHT-POINTING CURVED ANGLE BRACKET +29FE..29FF ; N # Sm [2] TINY..MINY +2A00..2AFF ; N # Sm [256] N-ARY CIRCLED DOT OPERATOR..N-ARY WHITE VERTICAL BAR +2B00..2B1A ; N # So [27] NORTH EAST WHITE ARROW..DOTTED SQUARE +2B1B..2B1C ; W # So [2] BLACK LARGE SQUARE..WHITE LARGE SQUARE +2B1D..2B2F ; N # So [19] BLACK VERY SMALL SQUARE..WHITE VERTICAL ELLIPSE +2B30..2B44 ; N # Sm [21] LEFT ARROW WITH SMALL CIRCLE..RIGHTWARDS ARROW THROUGH SUPERSET +2B45..2B46 ; N # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW +2B47..2B4C ; N # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR +2B4D..2B4F ; N # So [3] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..SHORT BACKSLANTED SOUTH ARROW +2B50 ; W # So WHITE MEDIUM STAR +2B51..2B54 ; N # So [4] BLACK SMALL STAR..WHITE RIGHT-POINTING PENTAGON +2B55 ; W # So HEAVY LARGE CIRCLE +2B56..2B59 ; A # So [4] HEAVY OVAL WITH OVAL INSIDE..HEAVY CIRCLED SALTIRE +2B5A..2B73 ; N # So [26] SLANTED NORTH ARROW WITH HOOKED HEAD..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR +2B76..2B95 ; N # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW +2B97..2BFF ; N # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2C00..2C5F ; N # L& [96] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC SMALL LETTER CAUDATE CHRIVI +2C60..2C7B ; N # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; N # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V +2C7E..2C7F ; N # Lu [2] LATIN CAPITAL LETTER S WITH SWASH TAIL..LATIN CAPITAL LETTER Z WITH SWASH TAIL +2C80..2CE4 ; N # L& [101] COPTIC CAPITAL LETTER ALFA..COPTIC SYMBOL KAI +2CE5..2CEA ; N # So [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA +2CEB..2CEE ; N # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CEF..2CF1 ; N # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2CF2..2CF3 ; N # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI +2CF9..2CFC ; N # Po [4] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN VERSE DIVIDER +2CFD ; N # No COPTIC FRACTION ONE HALF +2CFE..2CFF ; N # Po [2] COPTIC FULL STOP..COPTIC MORPHOLOGICAL DIVIDER +2D00..2D25 ; N # Ll [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; N # Ll GEORGIAN SMALL LETTER YN +2D2D ; N # Ll GEORGIAN SMALL LETTER AEN +2D30..2D67 ; N # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO +2D6F ; N # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK +2D70 ; N # Po TIFINAGH SEPARATOR MARK +2D7F ; N # Mn TIFINAGH CONSONANT JOINER +2D80..2D96 ; N # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE +2DA0..2DA6 ; N # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO +2DA8..2DAE ; N # Lo [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO +2DB0..2DB6 ; N # Lo [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO +2DB8..2DBE ; N # Lo [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO +2DC0..2DC6 ; N # Lo [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO +2DC8..2DCE ; N # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO +2DD0..2DD6 ; N # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO +2DD8..2DDE ; N # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO +2DE0..2DFF ; N # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS +2E00..2E01 ; N # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER +2E02 ; N # Pi LEFT SUBSTITUTION BRACKET +2E03 ; N # Pf RIGHT SUBSTITUTION BRACKET +2E04 ; N # Pi LEFT DOTTED SUBSTITUTION BRACKET +2E05 ; N # Pf RIGHT DOTTED SUBSTITUTION BRACKET +2E06..2E08 ; N # Po [3] RAISED INTERPOLATION MARKER..DOTTED TRANSPOSITION MARKER +2E09 ; N # Pi LEFT TRANSPOSITION BRACKET +2E0A ; N # Pf RIGHT TRANSPOSITION BRACKET +2E0B ; N # Po RAISED SQUARE +2E0C ; N # Pi LEFT RAISED OMISSION BRACKET +2E0D ; N # Pf RIGHT RAISED OMISSION BRACKET +2E0E..2E16 ; N # Po [9] EDITORIAL CORONIS..DOTTED RIGHT-POINTING ANGLE +2E17 ; N # Pd DOUBLE OBLIQUE HYPHEN +2E18..2E19 ; N # Po [2] INVERTED INTERROBANG..PALM BRANCH +2E1A ; N # Pd HYPHEN WITH DIAERESIS +2E1B ; N # Po TILDE WITH RING ABOVE +2E1C ; N # Pi LEFT LOW PARAPHRASE BRACKET +2E1D ; N # Pf RIGHT LOW PARAPHRASE BRACKET +2E1E..2E1F ; N # Po [2] TILDE WITH DOT ABOVE..TILDE WITH DOT BELOW +2E20 ; N # Pi LEFT VERTICAL BAR WITH QUILL +2E21 ; N # Pf RIGHT VERTICAL BAR WITH QUILL +2E22 ; N # Ps TOP LEFT HALF BRACKET +2E23 ; N # Pe TOP RIGHT HALF BRACKET +2E24 ; N # Ps BOTTOM LEFT HALF BRACKET +2E25 ; N # Pe BOTTOM RIGHT HALF BRACKET +2E26 ; N # Ps LEFT SIDEWAYS U BRACKET +2E27 ; N # Pe RIGHT SIDEWAYS U BRACKET +2E28 ; N # Ps LEFT DOUBLE PARENTHESIS +2E29 ; N # Pe RIGHT DOUBLE PARENTHESIS +2E2A..2E2E ; N # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK +2E2F ; N # Lm VERTICAL TILDE +2E30..2E39 ; N # Po [10] RING POINT..TOP HALF SECTION SIGN +2E3A..2E3B ; N # Pd [2] TWO-EM DASH..THREE-EM DASH +2E3C..2E3F ; N # Po [4] STENOGRAPHIC FULL STOP..CAPITULUM +2E40 ; N # Pd DOUBLE HYPHEN +2E41 ; N # Po REVERSED COMMA +2E42 ; N # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK +2E43..2E4F ; N # Po [13] DASH WITH LEFT UPTURN..CORNISH VERSE DIVIDER +2E50..2E51 ; N # So [2] CROSS PATTY WITH RIGHT CROSSBAR..CROSS PATTY WITH LEFT CROSSBAR +2E52..2E54 ; N # Po [3] TIRONIAN SIGN CAPITAL ET..MEDIEVAL QUESTION MARK +2E55 ; N # Ps LEFT SQUARE BRACKET WITH STROKE +2E56 ; N # Pe RIGHT SQUARE BRACKET WITH STROKE +2E57 ; N # Ps LEFT SQUARE BRACKET WITH DOUBLE STROKE +2E58 ; N # Pe RIGHT SQUARE BRACKET WITH DOUBLE STROKE +2E59 ; N # Ps TOP HALF LEFT PARENTHESIS +2E5A ; N # Pe TOP HALF RIGHT PARENTHESIS +2E5B ; N # Ps BOTTOM HALF LEFT PARENTHESIS +2E5C ; N # Pe BOTTOM HALF RIGHT PARENTHESIS +2E5D ; N # Pd OBLIQUE HYPHEN +2E80..2E99 ; W # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP +2E9B..2EF3 ; W # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE +2F00..2FD5 ; W # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE +2FF0..2FFF ; W # So [16] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ROTATION +3000 ; F # Zs IDEOGRAPHIC SPACE +3001..3003 ; W # Po [3] IDEOGRAPHIC COMMA..DITTO MARK +3004 ; W # So JAPANESE INDUSTRIAL STANDARD SYMBOL +3005 ; W # Lm IDEOGRAPHIC ITERATION MARK +3006 ; W # Lo IDEOGRAPHIC CLOSING MARK +3007 ; W # Nl IDEOGRAPHIC NUMBER ZERO +3008 ; W # Ps LEFT ANGLE BRACKET +3009 ; W # Pe RIGHT ANGLE BRACKET +300A ; W # Ps LEFT DOUBLE ANGLE BRACKET +300B ; W # Pe RIGHT DOUBLE ANGLE BRACKET +300C ; W # Ps LEFT CORNER BRACKET +300D ; W # Pe RIGHT CORNER BRACKET +300E ; W # Ps LEFT WHITE CORNER BRACKET +300F ; W # Pe RIGHT WHITE CORNER BRACKET +3010 ; W # Ps LEFT BLACK LENTICULAR BRACKET +3011 ; W # Pe RIGHT BLACK LENTICULAR BRACKET +3012..3013 ; W # So [2] POSTAL MARK..GETA MARK +3014 ; W # Ps LEFT TORTOISE SHELL BRACKET +3015 ; W # Pe RIGHT TORTOISE SHELL BRACKET +3016 ; W # Ps LEFT WHITE LENTICULAR BRACKET +3017 ; W # Pe RIGHT WHITE LENTICULAR BRACKET +3018 ; W # Ps LEFT WHITE TORTOISE SHELL BRACKET +3019 ; W # Pe RIGHT WHITE TORTOISE SHELL BRACKET +301A ; W # Ps LEFT WHITE SQUARE BRACKET +301B ; W # Pe RIGHT WHITE SQUARE BRACKET +301C ; W # Pd WAVE DASH +301D ; W # Ps REVERSED DOUBLE PRIME QUOTATION MARK +301E..301F ; W # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK +3020 ; W # So POSTAL MARK FACE +3021..3029 ; W # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE +302A..302D ; W # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; W # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK +3030 ; W # Pd WAVY DASH +3031..3035 ; W # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF +3036..3037 ; W # So [2] CIRCLED POSTAL MARK..IDEOGRAPHIC TELEGRAPH LINE FEED SEPARATOR SYMBOL +3038..303A ; W # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY +303B ; W # Lm VERTICAL IDEOGRAPHIC ITERATION MARK +303C ; W # Lo MASU MARK +303D ; W # Po PART ALTERNATION MARK +303E ; W # So IDEOGRAPHIC VARIATION INDICATOR +303F ; N # So IDEOGRAPHIC HALF FILL SPACE +3041..3096 ; W # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE +3099..309A ; W # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +309B..309C ; W # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +309D..309E ; W # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK +309F ; W # Lo HIRAGANA DIGRAPH YORI +30A0 ; W # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN +30A1..30FA ; W # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO +30FB ; W # Po KATAKANA MIDDLE DOT +30FC..30FE ; W # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK +30FF ; W # Lo KATAKANA DIGRAPH KOTO +3105..312F ; W # Lo [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN +3131..318E ; W # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE +3190..3191 ; W # So [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK +3192..3195 ; W # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK +3196..319F ; W # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK +31A0..31BF ; W # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH +31C0..31E3 ; W # So [36] CJK STROKE T..CJK STROKE Q +31EF ; W # So IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION +31F0..31FF ; W # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO +3200..321E ; W # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU +3220..3229 ; W # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN +322A..3247 ; W # So [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO +3248..324F ; A # No [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE +3250 ; W # So PARTNERSHIP SIGN +3251..325F ; W # No [15] CIRCLED NUMBER TWENTY ONE..CIRCLED NUMBER THIRTY FIVE +3260..327F ; W # So [32] CIRCLED HANGUL KIYEOK..KOREAN STANDARD SYMBOL +3280..3289 ; W # No [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN +328A..32B0 ; W # So [39] CIRCLED IDEOGRAPH MOON..CIRCLED IDEOGRAPH NIGHT +32B1..32BF ; W # No [15] CIRCLED NUMBER THIRTY SIX..CIRCLED NUMBER FIFTY +32C0..32FF ; W # So [64] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..SQUARE ERA NAME REIWA +3300..33FF ; W # So [256] SQUARE APAATO..SQUARE GAL +3400..4DBF ; W # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF +4DC0..4DFF ; N # So [64] HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION +4E00..9FFF ; W # Lo [20992] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFF +A000..A014 ; W # Lo [21] YI SYLLABLE IT..YI SYLLABLE E +A015 ; W # Lm YI SYLLABLE WU +A016..A48C ; W # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR +A490..A4C6 ; W # So [55] YI RADICAL QOT..YI RADICAL KE +A4D0..A4F7 ; N # Lo [40] LISU LETTER BA..LISU LETTER OE +A4F8..A4FD ; N # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU +A4FE..A4FF ; N # Po [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP +A500..A60B ; N # Lo [268] VAI SYLLABLE EE..VAI SYLLABLE NG +A60C ; N # Lm VAI SYLLABLE LENGTHENER +A60D..A60F ; N # Po [3] VAI COMMA..VAI QUESTION MARK +A610..A61F ; N # Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG +A620..A629 ; N # Nd [10] VAI DIGIT ZERO..VAI DIGIT NINE +A62A..A62B ; N # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO +A640..A66D ; N # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O +A66E ; N # Lo CYRILLIC LETTER MULTIOCULAR O +A66F ; N # Mn COMBINING CYRILLIC VZMET +A670..A672 ; N # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN +A673 ; N # Po SLAVONIC ASTERISK +A674..A67D ; N # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A67E ; N # Po CYRILLIC KAVYKA +A67F ; N # Lm CYRILLIC PAYEROK +A680..A69B ; N # L& [28] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER CROSSED O +A69C..A69D ; N # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A69E..A69F ; N # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E +A6A0..A6E5 ; N # Lo [70] BAMUM LETTER A..BAMUM LETTER KI +A6E6..A6EF ; N # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM +A6F0..A6F1 ; N # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS +A6F2..A6F7 ; N # Po [6] BAMUM NJAEMLI..BAMUM QUESTION MARK +A700..A716 ; N # Sk [23] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR +A717..A71F ; N # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK +A720..A721 ; N # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE +A722..A76F ; N # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON +A770 ; N # Lm MODIFIER LETTER US +A771..A787 ; N # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T +A788 ; N # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT +A789..A78A ; N # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN +A78B..A78E ; N # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT +A78F ; N # Lo LATIN LETTER SINOLOGICAL DOT +A790..A7CA ; N # L& [59] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY +A7D0..A7D1 ; N # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A7D3 ; N # Ll LATIN SMALL LETTER DOUBLE THORN +A7D5..A7D9 ; N # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7F2..A7F4 ; N # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F5..A7F6 ; N # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H +A7F7 ; N # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I +A7F8..A7F9 ; N # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +A7FA ; N # Ll LATIN LETTER SMALL CAPITAL TURNED M +A7FB..A7FF ; N # Lo [5] LATIN EPIGRAPHIC LETTER REVERSED F..LATIN EPIGRAPHIC LETTER ARCHAIC M +A800..A801 ; N # Lo [2] SYLOTI NAGRI LETTER A..SYLOTI NAGRI LETTER I +A802 ; N # Mn SYLOTI NAGRI SIGN DVISVARA +A803..A805 ; N # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O +A806 ; N # Mn SYLOTI NAGRI SIGN HASANTA +A807..A80A ; N # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO +A80B ; N # Mn SYLOTI NAGRI SIGN ANUSVARA +A80C..A822 ; N # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO +A823..A824 ; N # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I +A825..A826 ; N # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E +A827 ; N # Mc SYLOTI NAGRI VOWEL SIGN OO +A828..A82B ; N # So [4] SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI POETRY MARK-4 +A82C ; N # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA +A830..A835 ; N # No [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTION THREE SIXTEENTHS +A836..A837 ; N # So [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK +A838 ; N # Sc NORTH INDIC RUPEE MARK +A839 ; N # So NORTH INDIC QUANTITY MARK +A840..A873 ; N # Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU +A874..A877 ; N # Po [4] PHAGS-PA SINGLE HEAD MARK..PHAGS-PA MARK DOUBLE SHAD +A880..A881 ; N # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA +A882..A8B3 ; N # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA +A8B4..A8C3 ; N # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU +A8C4..A8C5 ; N # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU +A8CE..A8CF ; N # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA +A8D0..A8D9 ; N # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE +A8E0..A8F1 ; N # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA +A8F2..A8F7 ; N # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA +A8F8..A8FA ; N # Po [3] DEVANAGARI SIGN PUSHPIKA..DEVANAGARI CARET +A8FB ; N # Lo DEVANAGARI HEADSTROKE +A8FC ; N # Po DEVANAGARI SIGN SIDDHAM +A8FD..A8FE ; N # Lo [2] DEVANAGARI JAIN OM..DEVANAGARI LETTER AY +A8FF ; N # Mn DEVANAGARI VOWEL SIGN AY +A900..A909 ; N # Nd [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE +A90A..A925 ; N # Lo [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO +A926..A92D ; N # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU +A92E..A92F ; N # Po [2] KAYAH LI SIGN CWI..KAYAH LI SIGN SHYA +A930..A946 ; N # Lo [23] REJANG LETTER KA..REJANG LETTER A +A947..A951 ; N # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R +A952..A953 ; N # Mc [2] REJANG CONSONANT SIGN H..REJANG VIRAMA +A95F ; N # Po REJANG SECTION MARK +A960..A97C ; W # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH +A980..A982 ; N # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR +A983 ; N # Mc JAVANESE SIGN WIGNYAN +A984..A9B2 ; N # Lo [47] JAVANESE LETTER A..JAVANESE LETTER HA +A9B3 ; N # Mn JAVANESE SIGN CECAK TELU +A9B4..A9B5 ; N # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG +A9B6..A9B9 ; N # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT +A9BA..A9BB ; N # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE +A9BC..A9BD ; N # Mn [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET +A9BE..A9C0 ; N # Mc [3] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE PANGKON +A9C1..A9CD ; N # Po [13] JAVANESE LEFT RERENGGAN..JAVANESE TURNED PADA PISELEH +A9CF ; N # Lm JAVANESE PANGRANGKEP +A9D0..A9D9 ; N # Nd [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE +A9DE..A9DF ; N # Po [2] JAVANESE PADA TIRTA TUMETES..JAVANESE PADA ISEN-ISEN +A9E0..A9E4 ; N # Lo [5] MYANMAR LETTER SHAN GHA..MYANMAR LETTER SHAN BHA +A9E5 ; N # Mn MYANMAR SIGN SHAN SAW +A9E6 ; N # Lm MYANMAR MODIFIER LETTER SHAN REDUPLICATION +A9E7..A9EF ; N # Lo [9] MYANMAR LETTER TAI LAING NYA..MYANMAR LETTER TAI LAING NNA +A9F0..A9F9 ; N # Nd [10] MYANMAR TAI LAING DIGIT ZERO..MYANMAR TAI LAING DIGIT NINE +A9FA..A9FE ; N # Lo [5] MYANMAR LETTER TAI LAING LLA..MYANMAR LETTER TAI LAING BHA +AA00..AA28 ; N # Lo [41] CHAM LETTER A..CHAM LETTER HA +AA29..AA2E ; N # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE +AA2F..AA30 ; N # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI +AA31..AA32 ; N # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE +AA33..AA34 ; N # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA +AA35..AA36 ; N # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA +AA40..AA42 ; N # Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG +AA43 ; N # Mn CHAM CONSONANT SIGN FINAL NG +AA44..AA4B ; N # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS +AA4C ; N # Mn CHAM CONSONANT SIGN FINAL M +AA4D ; N # Mc CHAM CONSONANT SIGN FINAL H +AA50..AA59 ; N # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE +AA5C..AA5F ; N # Po [4] CHAM PUNCTUATION SPIRAL..CHAM PUNCTUATION TRIPLE DANDA +AA60..AA6F ; N # Lo [16] MYANMAR LETTER KHAMTI GA..MYANMAR LETTER KHAMTI FA +AA70 ; N # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION +AA71..AA76 ; N # Lo [6] MYANMAR LETTER KHAMTI XA..MYANMAR LOGOGRAM KHAMTI HM +AA77..AA79 ; N # So [3] MYANMAR SYMBOL AITON EXCLAMATION..MYANMAR SYMBOL AITON TWO +AA7A ; N # Lo MYANMAR LETTER AITON RA +AA7B ; N # Mc MYANMAR SIGN PAO KAREN TONE +AA7C ; N # Mn MYANMAR SIGN TAI LAING TONE-2 +AA7D ; N # Mc MYANMAR SIGN TAI LAING TONE-5 +AA7E..AA7F ; N # Lo [2] MYANMAR LETTER SHWE PALAUNG CHA..MYANMAR LETTER SHWE PALAUNG SHA +AA80..AAAF ; N # Lo [48] TAI VIET LETTER LOW KO..TAI VIET LETTER HIGH O +AAB0 ; N # Mn TAI VIET MAI KANG +AAB1 ; N # Lo TAI VIET VOWEL AA +AAB2..AAB4 ; N # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U +AAB5..AAB6 ; N # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O +AAB7..AAB8 ; N # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA +AAB9..AABD ; N # Lo [5] TAI VIET VOWEL UEA..TAI VIET VOWEL AN +AABE..AABF ; N # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK +AAC0 ; N # Lo TAI VIET TONE MAI NUENG +AAC1 ; N # Mn TAI VIET TONE MAI THO +AAC2 ; N # Lo TAI VIET TONE MAI SONG +AADB..AADC ; N # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG +AADD ; N # Lm TAI VIET SYMBOL SAM +AADE..AADF ; N # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI +AAE0..AAEA ; N # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAEB ; N # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; N # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; N # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF0..AAF1 ; N # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM +AAF2 ; N # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; N # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF5 ; N # Mc MEETEI MAYEK VOWEL SIGN VISARGA +AAF6 ; N # Mn MEETEI MAYEK VIRAMA +AB01..AB06 ; N # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO +AB09..AB0E ; N # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO +AB11..AB16 ; N # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO +AB20..AB26 ; N # Lo [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO +AB28..AB2E ; N # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO +AB30..AB5A ; N # Ll [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG +AB5B ; N # Sk MODIFIER BREVE WITH INVERTED BREVE +AB5C..AB5F ; N # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK +AB60..AB68 ; N # Ll [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE +AB69 ; N # Lm MODIFIER LETTER SMALL TURNED W +AB6A..AB6B ; N # Sk [2] MODIFIER LETTER LEFT TACK..MODIFIER LETTER RIGHT TACK +AB70..ABBF ; N # Ll [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA +ABC0..ABE2 ; N # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM +ABE3..ABE4 ; N # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP +ABE5 ; N # Mn MEETEI MAYEK VOWEL SIGN ANAP +ABE6..ABE7 ; N # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP +ABE8 ; N # Mn MEETEI MAYEK VOWEL SIGN UNAP +ABE9..ABEA ; N # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG +ABEB ; N # Po MEETEI MAYEK CHEIKHEI +ABEC ; N # Mc MEETEI MAYEK LUM IYEK +ABED ; N # Mn MEETEI MAYEK APUN IYEK +ABF0..ABF9 ; N # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE +AC00..D7A3 ; W # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH +D7B0..D7C6 ; N # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E +D7CB..D7FB ; N # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH +D800..DB7F ; N # Cs [896] .. +DB80..DBFF ; N # Cs [128] .. +DC00..DFFF ; N # Cs [1024] .. +E000..F8FF ; A # Co [6400] .. +F900..FA6D ; W # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA6E..FA6F ; W # Cn [2] .. +FA70..FAD9 ; W # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 +FADA..FAFF ; W # Cn [38] .. +FB00..FB06 ; N # Ll [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST +FB13..FB17 ; N # Ll [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH +FB1D ; N # Lo HEBREW LETTER YOD WITH HIRIQ +FB1E ; N # Mn HEBREW POINT JUDEO-SPANISH VARIKA +FB1F..FB28 ; N # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV +FB29 ; N # Sm HEBREW LETTER ALTERNATIVE PLUS SIGN +FB2A..FB36 ; N # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH +FB38..FB3C ; N # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH +FB3E ; N # Lo HEBREW LETTER MEM WITH DAGESH +FB40..FB41 ; N # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH +FB43..FB44 ; N # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH +FB46..FB4F ; N # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE ALEF LAMED +FB50..FBB1 ; N # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM +FBB2..FBC2 ; N # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE +FBD3..FD3D ; N # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM +FD3E ; N # Pe ORNATE LEFT PARENTHESIS +FD3F ; N # Ps ORNATE RIGHT PARENTHESIS +FD40..FD4F ; N # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH +FD50..FD8F ; N # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD92..FDC7 ; N # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM +FDCF ; N # So ARABIC LIGATURE SALAAMUHU ALAYNAA +FDF0..FDFB ; N # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU +FDFC ; N # Sc RIAL SIGN +FDFD..FDFF ; N # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL +FE00..FE0F ; A # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 +FE10..FE16 ; W # Po [7] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL QUESTION MARK +FE17 ; W # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE LENTICULAR BRACKET +FE18 ; W # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET +FE19 ; W # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS +FE20..FE2F ; N # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF +FE30 ; W # Po PRESENTATION FORM FOR VERTICAL TWO DOT LEADER +FE31..FE32 ; W # Pd [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH +FE33..FE34 ; W # Pc [2] PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE +FE35 ; W # Ps PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS +FE36 ; W # Pe PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS +FE37 ; W # Ps PRESENTATION FORM FOR VERTICAL LEFT CURLY BRACKET +FE38 ; W # Pe PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET +FE39 ; W # Ps PRESENTATION FORM FOR VERTICAL LEFT TORTOISE SHELL BRACKET +FE3A ; W # Pe PRESENTATION FORM FOR VERTICAL RIGHT TORTOISE SHELL BRACKET +FE3B ; W # Ps PRESENTATION FORM FOR VERTICAL LEFT BLACK LENTICULAR BRACKET +FE3C ; W # Pe PRESENTATION FORM FOR VERTICAL RIGHT BLACK LENTICULAR BRACKET +FE3D ; W # Ps PRESENTATION FORM FOR VERTICAL LEFT DOUBLE ANGLE BRACKET +FE3E ; W # Pe PRESENTATION FORM FOR VERTICAL RIGHT DOUBLE ANGLE BRACKET +FE3F ; W # Ps PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET +FE40 ; W # Pe PRESENTATION FORM FOR VERTICAL RIGHT ANGLE BRACKET +FE41 ; W # Ps PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET +FE42 ; W # Pe PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET +FE43 ; W # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET +FE44 ; W # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET +FE45..FE46 ; W # Po [2] SESAME DOT..WHITE SESAME DOT +FE47 ; W # Ps PRESENTATION FORM FOR VERTICAL LEFT SQUARE BRACKET +FE48 ; W # Pe PRESENTATION FORM FOR VERTICAL RIGHT SQUARE BRACKET +FE49..FE4C ; W # Po [4] DASHED OVERLINE..DOUBLE WAVY OVERLINE +FE4D..FE4F ; W # Pc [3] DASHED LOW LINE..WAVY LOW LINE +FE50..FE52 ; W # Po [3] SMALL COMMA..SMALL FULL STOP +FE54..FE57 ; W # Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK +FE58 ; W # Pd SMALL EM DASH +FE59 ; W # Ps SMALL LEFT PARENTHESIS +FE5A ; W # Pe SMALL RIGHT PARENTHESIS +FE5B ; W # Ps SMALL LEFT CURLY BRACKET +FE5C ; W # Pe SMALL RIGHT CURLY BRACKET +FE5D ; W # Ps SMALL LEFT TORTOISE SHELL BRACKET +FE5E ; W # Pe SMALL RIGHT TORTOISE SHELL BRACKET +FE5F..FE61 ; W # Po [3] SMALL NUMBER SIGN..SMALL ASTERISK +FE62 ; W # Sm SMALL PLUS SIGN +FE63 ; W # Pd SMALL HYPHEN-MINUS +FE64..FE66 ; W # Sm [3] SMALL LESS-THAN SIGN..SMALL EQUALS SIGN +FE68 ; W # Po SMALL REVERSE SOLIDUS +FE69 ; W # Sc SMALL DOLLAR SIGN +FE6A..FE6B ; W # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT +FE70..FE74 ; N # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM +FE76..FEFC ; N # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM +FEFF ; N # Cf ZERO WIDTH NO-BREAK SPACE +FF01..FF03 ; F # Po [3] FULLWIDTH EXCLAMATION MARK..FULLWIDTH NUMBER SIGN +FF04 ; F # Sc FULLWIDTH DOLLAR SIGN +FF05..FF07 ; F # Po [3] FULLWIDTH PERCENT SIGN..FULLWIDTH APOSTROPHE +FF08 ; F # Ps FULLWIDTH LEFT PARENTHESIS +FF09 ; F # Pe FULLWIDTH RIGHT PARENTHESIS +FF0A ; F # Po FULLWIDTH ASTERISK +FF0B ; F # Sm FULLWIDTH PLUS SIGN +FF0C ; F # Po FULLWIDTH COMMA +FF0D ; F # Pd FULLWIDTH HYPHEN-MINUS +FF0E..FF0F ; F # Po [2] FULLWIDTH FULL STOP..FULLWIDTH SOLIDUS +FF10..FF19 ; F # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE +FF1A..FF1B ; F # Po [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON +FF1C..FF1E ; F # Sm [3] FULLWIDTH LESS-THAN SIGN..FULLWIDTH GREATER-THAN SIGN +FF1F..FF20 ; F # Po [2] FULLWIDTH QUESTION MARK..FULLWIDTH COMMERCIAL AT +FF21..FF3A ; F # Lu [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z +FF3B ; F # Ps FULLWIDTH LEFT SQUARE BRACKET +FF3C ; F # Po FULLWIDTH REVERSE SOLIDUS +FF3D ; F # Pe FULLWIDTH RIGHT SQUARE BRACKET +FF3E ; F # Sk FULLWIDTH CIRCUMFLEX ACCENT +FF3F ; F # Pc FULLWIDTH LOW LINE +FF40 ; F # Sk FULLWIDTH GRAVE ACCENT +FF41..FF5A ; F # Ll [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z +FF5B ; F # Ps FULLWIDTH LEFT CURLY BRACKET +FF5C ; F # Sm FULLWIDTH VERTICAL LINE +FF5D ; F # Pe FULLWIDTH RIGHT CURLY BRACKET +FF5E ; F # Sm FULLWIDTH TILDE +FF5F ; F # Ps FULLWIDTH LEFT WHITE PARENTHESIS +FF60 ; F # Pe FULLWIDTH RIGHT WHITE PARENTHESIS +FF61 ; H # Po HALFWIDTH IDEOGRAPHIC FULL STOP +FF62 ; H # Ps HALFWIDTH LEFT CORNER BRACKET +FF63 ; H # Pe HALFWIDTH RIGHT CORNER BRACKET +FF64..FF65 ; H # Po [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDLE DOT +FF66..FF6F ; H # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU +FF70 ; H # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF71..FF9D ; H # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N +FF9E..FF9F ; H # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +FFA0..FFBE ; H # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH +FFC2..FFC7 ; H # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E +FFCA..FFCF ; H # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE +FFD2..FFD7 ; H # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU +FFDA..FFDC ; H # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I +FFE0..FFE1 ; F # Sc [2] FULLWIDTH CENT SIGN..FULLWIDTH POUND SIGN +FFE2 ; F # Sm FULLWIDTH NOT SIGN +FFE3 ; F # Sk FULLWIDTH MACRON +FFE4 ; F # So FULLWIDTH BROKEN BAR +FFE5..FFE6 ; F # Sc [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN +FFE8 ; H # So HALFWIDTH FORMS LIGHT VERTICAL +FFE9..FFEC ; H # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW +FFED..FFEE ; H # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE +FFF9..FFFB ; N # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR +FFFC ; N # So OBJECT REPLACEMENT CHARACTER +FFFD ; A # So REPLACEMENT CHARACTER +10000..1000B ; N # Lo [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE +1000D..10026 ; N # Lo [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO +10028..1003A ; N # Lo [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO +1003C..1003D ; N # Lo [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE +1003F..1004D ; N # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO +10050..1005D ; N # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 +10080..100FA ; N # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305 +10100..10102 ; N # Po [3] AEGEAN WORD SEPARATOR LINE..AEGEAN CHECK MARK +10107..10133 ; N # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND +10137..1013F ; N # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT +10140..10174 ; N # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS +10175..10178 ; N # No [4] GREEK ONE HALF SIGN..GREEK THREE QUARTERS SIGN +10179..10189 ; N # So [17] GREEK YEAR SIGN..GREEK TRYBLION BASE SIGN +1018A..1018B ; N # No [2] GREEK ZERO SIGN..GREEK ONE QUARTER SIGN +1018C..1018E ; N # So [3] GREEK SINUSOID SIGN..NOMISMA SIGN +10190..1019C ; N # So [13] ROMAN SEXTANS SIGN..ASCIA SYMBOL +101A0 ; N # So GREEK SYMBOL TAU RHO +101D0..101FC ; N # So [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND +101FD ; N # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE +10280..1029C ; N # Lo [29] LYCIAN LETTER A..LYCIAN LETTER X +102A0..102D0 ; N # Lo [49] CARIAN LETTER A..CARIAN LETTER UUU3 +102E0 ; N # Mn COPTIC EPACT THOUSANDS MARK +102E1..102FB ; N # No [27] COPTIC EPACT DIGIT ONE..COPTIC EPACT NUMBER NINE HUNDRED +10300..1031F ; N # Lo [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS +10320..10323 ; N # No [4] OLD ITALIC NUMERAL ONE..OLD ITALIC NUMERAL FIFTY +1032D..1032F ; N # Lo [3] OLD ITALIC LETTER YE..OLD ITALIC LETTER SOUTHERN TSE +10330..10340 ; N # Lo [17] GOTHIC LETTER AHSA..GOTHIC LETTER PAIRTHRA +10341 ; N # Nl GOTHIC LETTER NINETY +10342..10349 ; N # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL +1034A ; N # Nl GOTHIC LETTER NINE HUNDRED +10350..10375 ; N # Lo [38] OLD PERMIC LETTER AN..OLD PERMIC LETTER IA +10376..1037A ; N # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII +10380..1039D ; N # Lo [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU +1039F ; N # Po UGARITIC WORD DIVIDER +103A0..103C3 ; N # Lo [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA +103C8..103CF ; N # Lo [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH +103D0 ; N # Po OLD PERSIAN WORD DIVIDER +103D1..103D5 ; N # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED +10400..1044F ; N # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW +10450..1047F ; N # Lo [48] SHAVIAN LETTER PEEP..SHAVIAN LETTER YEW +10480..1049D ; N # Lo [30] OSMANYA LETTER ALEF..OSMANYA LETTER OO +104A0..104A9 ; N # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE +104B0..104D3 ; N # Lu [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA +104D8..104FB ; N # Ll [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA +10500..10527 ; N # Lo [40] ELBASAN LETTER A..ELBASAN LETTER KHE +10530..10563 ; N # Lo [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW +1056F ; N # Po CAUCASIAN ALBANIAN CITATION MARK +10570..1057A ; N # Lu [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA +1057C..1058A ; N # Lu [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE +1058C..10592 ; N # Lu [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE +10594..10595 ; N # Lu [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE +10597..105A1 ; N # Ll [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA +105A3..105B1 ; N # Ll [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE +105B3..105B9 ; N # Ll [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE +105BB..105BC ; N # Ll [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +10600..10736 ; N # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 +10740..10755 ; N # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE +10760..10767 ; N # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 +10780..10785 ; N # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK +10787..107B0 ; N # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK +107B2..107BA ; N # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +10800..10805 ; N # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA +10808 ; N # Lo CYPRIOT SYLLABLE JO +1080A..10835 ; N # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO +10837..10838 ; N # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE +1083C ; N # Lo CYPRIOT SYLLABLE ZA +1083F ; N # Lo CYPRIOT SYLLABLE ZO +10840..10855 ; N # Lo [22] IMPERIAL ARAMAIC LETTER ALEPH..IMPERIAL ARAMAIC LETTER TAW +10857 ; N # Po IMPERIAL ARAMAIC SECTION SIGN +10858..1085F ; N # No [8] IMPERIAL ARAMAIC NUMBER ONE..IMPERIAL ARAMAIC NUMBER TEN THOUSAND +10860..10876 ; N # Lo [23] PALMYRENE LETTER ALEPH..PALMYRENE LETTER TAW +10877..10878 ; N # So [2] PALMYRENE LEFT-POINTING FLEURON..PALMYRENE RIGHT-POINTING FLEURON +10879..1087F ; N # No [7] PALMYRENE NUMBER ONE..PALMYRENE NUMBER TWENTY +10880..1089E ; N # Lo [31] NABATAEAN LETTER FINAL ALEPH..NABATAEAN LETTER TAW +108A7..108AF ; N # No [9] NABATAEAN NUMBER ONE..NABATAEAN NUMBER ONE HUNDRED +108E0..108F2 ; N # Lo [19] HATRAN LETTER ALEPH..HATRAN LETTER QOPH +108F4..108F5 ; N # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW +108FB..108FF ; N # No [5] HATRAN NUMBER ONE..HATRAN NUMBER ONE HUNDRED +10900..10915 ; N # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU +10916..1091B ; N # No [6] PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER THREE +1091F ; N # Po PHOENICIAN WORD SEPARATOR +10920..10939 ; N # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +1093F ; N # Po LYDIAN TRIANGULAR MARK +10980..1099F ; N # Lo [32] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC HIEROGLYPHIC SYMBOL VIDJ-2 +109A0..109B7 ; N # Lo [24] MEROITIC CURSIVE LETTER A..MEROITIC CURSIVE LETTER DA +109BC..109BD ; N # No [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF +109BE..109BF ; N # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN +109C0..109CF ; N # No [16] MEROITIC CURSIVE NUMBER ONE..MEROITIC CURSIVE NUMBER SEVENTY +109D2..109FF ; N # No [46] MEROITIC CURSIVE NUMBER ONE HUNDRED..MEROITIC CURSIVE FRACTION TEN TWELFTHS +10A00 ; N # Lo KHAROSHTHI LETTER A +10A01..10A03 ; N # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R +10A05..10A06 ; N # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O +10A0C..10A0F ; N # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA +10A10..10A13 ; N # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA +10A15..10A17 ; N # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA +10A19..10A35 ; N # Lo [29] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER VHA +10A38..10A3A ; N # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW +10A3F ; N # Mn KHAROSHTHI VIRAMA +10A40..10A48 ; N # No [9] KHAROSHTHI DIGIT ONE..KHAROSHTHI FRACTION ONE HALF +10A50..10A58 ; N # Po [9] KHAROSHTHI PUNCTUATION DOT..KHAROSHTHI PUNCTUATION LINES +10A60..10A7C ; N # Lo [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH +10A7D..10A7E ; N # No [2] OLD SOUTH ARABIAN NUMBER ONE..OLD SOUTH ARABIAN NUMBER FIFTY +10A7F ; N # Po OLD SOUTH ARABIAN NUMERIC INDICATOR +10A80..10A9C ; N # Lo [29] OLD NORTH ARABIAN LETTER HEH..OLD NORTH ARABIAN LETTER ZAH +10A9D..10A9F ; N # No [3] OLD NORTH ARABIAN NUMBER ONE..OLD NORTH ARABIAN NUMBER TWENTY +10AC0..10AC7 ; N # Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW +10AC8 ; N # So MANICHAEAN SIGN UD +10AC9..10AE4 ; N # Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW +10AE5..10AE6 ; N # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW +10AEB..10AEF ; N # No [5] MANICHAEAN NUMBER ONE..MANICHAEAN NUMBER ONE HUNDRED +10AF0..10AF6 ; N # Po [7] MANICHAEAN PUNCTUATION STAR..MANICHAEAN PUNCTUATION LINE FILLER +10B00..10B35 ; N # Lo [54] AVESTAN LETTER A..AVESTAN LETTER HE +10B39..10B3F ; N # Po [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION +10B40..10B55 ; N # Lo [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW +10B58..10B5F ; N # No [8] INSCRIPTIONAL PARTHIAN NUMBER ONE..INSCRIPTIONAL PARTHIAN NUMBER ONE THOUSAND +10B60..10B72 ; N # Lo [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW +10B78..10B7F ; N # No [8] INSCRIPTIONAL PAHLAVI NUMBER ONE..INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND +10B80..10B91 ; N # Lo [18] PSALTER PAHLAVI LETTER ALEPH..PSALTER PAHLAVI LETTER TAW +10B99..10B9C ; N # Po [4] PSALTER PAHLAVI SECTION MARK..PSALTER PAHLAVI FOUR DOTS WITH DOT +10BA9..10BAF ; N # No [7] PSALTER PAHLAVI NUMBER ONE..PSALTER PAHLAVI NUMBER ONE HUNDRED +10C00..10C48 ; N # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH +10C80..10CB2 ; N # Lu [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +10CC0..10CF2 ; N # Ll [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10CFA..10CFF ; N # No [6] OLD HUNGARIAN NUMBER ONE..OLD HUNGARIAN NUMBER ONE THOUSAND +10D00..10D23 ; N # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA +10D24..10D27 ; N # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D30..10D39 ; N # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE +10E60..10E7E ; N # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS +10E80..10EA9 ; N # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET +10EAB..10EAC ; N # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EAD ; N # Pd YEZIDI HYPHENATION MARK +10EB0..10EB1 ; N # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10EFD..10EFF ; N # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA +10F00..10F1C ; N # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL +10F1D..10F26 ; N # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF +10F27 ; N # Lo OLD SOGDIAN LIGATURE AYIN-DALETH +10F30..10F45 ; N # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN +10F46..10F50 ; N # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW +10F51..10F54 ; N # No [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED +10F55..10F59 ; N # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT +10F70..10F81 ; N # Lo [18] OLD UYGHUR LETTER ALEPH..OLD UYGHUR LETTER LESH +10F82..10F85 ; N # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW +10F86..10F89 ; N # Po [4] OLD UYGHUR PUNCTUATION BAR..OLD UYGHUR PUNCTUATION FOUR DOTS +10FB0..10FC4 ; N # Lo [21] CHORASMIAN LETTER ALEPH..CHORASMIAN LETTER TAW +10FC5..10FCB ; N # No [7] CHORASMIAN NUMBER ONE..CHORASMIAN NUMBER ONE HUNDRED +10FE0..10FF6 ; N # Lo [23] ELYMAIC LETTER ALEPH..ELYMAIC LIGATURE ZAYIN-YODH +11000 ; N # Mc BRAHMI SIGN CANDRABINDU +11001 ; N # Mn BRAHMI SIGN ANUSVARA +11002 ; N # Mc BRAHMI SIGN VISARGA +11003..11037 ; N # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA +11038..11046 ; N # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA +11047..1104D ; N # Po [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS +11052..11065 ; N # No [20] BRAHMI NUMBER ONE..BRAHMI NUMBER ONE THOUSAND +11066..1106F ; N # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE +11070 ; N # Mn BRAHMI SIGN OLD TAMIL VIRAMA +11071..11072 ; N # Lo [2] BRAHMI LETTER OLD TAMIL SHORT E..BRAHMI LETTER OLD TAMIL SHORT O +11073..11074 ; N # Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O +11075 ; N # Lo BRAHMI LETTER OLD TAMIL LLA +1107F ; N # Mn BRAHMI NUMBER JOINER +11080..11081 ; N # Mn [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA +11082 ; N # Mc KAITHI SIGN VISARGA +11083..110AF ; N # Lo [45] KAITHI LETTER A..KAITHI LETTER HA +110B0..110B2 ; N # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II +110B3..110B6 ; N # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI +110B7..110B8 ; N # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +110B9..110BA ; N # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +110BB..110BC ; N # Po [2] KAITHI ABBREVIATION SIGN..KAITHI ENUMERATION SIGN +110BD ; N # Cf KAITHI NUMBER SIGN +110BE..110C1 ; N # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +110C2 ; N # Mn KAITHI VOWEL SIGN VOCALIC R +110CD ; N # Cf KAITHI NUMBER SIGN ABOVE +110D0..110E8 ; N # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +110F0..110F9 ; N # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11100..11102 ; N # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11103..11126 ; N # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11127..1112B ; N # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; N # Mc CHAKMA VOWEL SIGN E +1112D..11134 ; N # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11136..1113F ; N # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +11140..11143 ; N # Po [4] CHAKMA SECTION MARK..CHAKMA QUESTION MARK +11144 ; N # Lo CHAKMA LETTER LHAA +11145..11146 ; N # Mc [2] CHAKMA VOWEL SIGN AA..CHAKMA VOWEL SIGN EI +11147 ; N # Lo CHAKMA LETTER VAA +11150..11172 ; N # Lo [35] MAHAJANI LETTER A..MAHAJANI LETTER RRA +11173 ; N # Mn MAHAJANI SIGN NUKTA +11174..11175 ; N # Po [2] MAHAJANI ABBREVIATION SIGN..MAHAJANI SECTION MARK +11176 ; N # Lo MAHAJANI LIGATURE SHRI +11180..11181 ; N # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; N # Mc SHARADA SIGN VISARGA +11183..111B2 ; N # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111B3..111B5 ; N # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; N # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF..111C0 ; N # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +111C1..111C4 ; N # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111C5..111C8 ; N # Po [4] SHARADA DANDA..SHARADA SEPARATOR +111C9..111CC ; N # Mn [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK +111CD ; N # Po SHARADA SUTRA MARK +111CE ; N # Mc SHARADA VOWEL SIGN PRISHTHAMATRA E +111CF ; N # Mn SHARADA SIGN INVERTED CANDRABINDU +111D0..111D9 ; N # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +111DA ; N # Lo SHARADA EKAM +111DB ; N # Po SHARADA SIGN SIDDHAM +111DC ; N # Lo SHARADA HEADSTROKE +111DD..111DF ; N # Po [3] SHARADA CONTINUATION SIGN..SHARADA SECTION MARK-2 +111E1..111F4 ; N # No [20] SINHALA ARCHAIC DIGIT ONE..SINHALA ARCHAIC NUMBER ONE THOUSAND +11200..11211 ; N # Lo [18] KHOJKI LETTER A..KHOJKI LETTER JJA +11213..1122B ; N # Lo [25] KHOJKI LETTER NYA..KHOJKI LETTER LLA +1122C..1122E ; N # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II +1122F..11231 ; N # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI +11232..11233 ; N # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU +11234 ; N # Mn KHOJKI SIGN ANUSVARA +11235 ; N # Mc KHOJKI SIGN VIRAMA +11236..11237 ; N # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA +11238..1123D ; N # Po [6] KHOJKI DANDA..KHOJKI ABBREVIATION SIGN +1123E ; N # Mn KHOJKI SIGN SUKUN +1123F..11240 ; N # Lo [2] KHOJKI LETTER QA..KHOJKI LETTER SHORT I +11241 ; N # Mn KHOJKI VOWEL SIGN VOCALIC R +11280..11286 ; N # Lo [7] MULTANI LETTER A..MULTANI LETTER GA +11288 ; N # Lo MULTANI LETTER GHA +1128A..1128D ; N # Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA +1128F..1129D ; N # Lo [15] MULTANI LETTER NYA..MULTANI LETTER BA +1129F..112A8 ; N # Lo [10] MULTANI LETTER BHA..MULTANI LETTER RHA +112A9 ; N # Po MULTANI SECTION MARK +112B0..112DE ; N # Lo [47] KHUDAWADI LETTER A..KHUDAWADI LETTER HA +112DF ; N # Mn KHUDAWADI SIGN ANUSVARA +112E0..112E2 ; N # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II +112E3..112EA ; N # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA +112F0..112F9 ; N # Nd [10] KHUDAWADI DIGIT ZERO..KHUDAWADI DIGIT NINE +11300..11301 ; N # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU +11302..11303 ; N # Mc [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA +11305..1130C ; N # Lo [8] GRANTHA LETTER A..GRANTHA LETTER VOCALIC L +1130F..11310 ; N # Lo [2] GRANTHA LETTER EE..GRANTHA LETTER AI +11313..11328 ; N # Lo [22] GRANTHA LETTER OO..GRANTHA LETTER NA +1132A..11330 ; N # Lo [7] GRANTHA LETTER PA..GRANTHA LETTER RA +11332..11333 ; N # Lo [2] GRANTHA LETTER LA..GRANTHA LETTER LLA +11335..11339 ; N # Lo [5] GRANTHA LETTER VA..GRANTHA LETTER HA +1133B..1133C ; N # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA +1133D ; N # Lo GRANTHA SIGN AVAGRAHA +1133E..1133F ; N # Mc [2] GRANTHA VOWEL SIGN AA..GRANTHA VOWEL SIGN I +11340 ; N # Mn GRANTHA VOWEL SIGN II +11341..11344 ; N # Mc [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR +11347..11348 ; N # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI +1134B..1134D ; N # Mc [3] GRANTHA VOWEL SIGN OO..GRANTHA SIGN VIRAMA +11350 ; N # Lo GRANTHA OM +11357 ; N # Mc GRANTHA AU LENGTH MARK +1135D..11361 ; N # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL +11362..11363 ; N # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +11366..1136C ; N # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX +11370..11374 ; N # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +11400..11434 ; N # Lo [53] NEWA LETTER A..NEWA LETTER HA +11435..11437 ; N # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II +11438..1143F ; N # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI +11440..11441 ; N # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU +11442..11444 ; N # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA +11445 ; N # Mc NEWA SIGN VISARGA +11446 ; N # Mn NEWA SIGN NUKTA +11447..1144A ; N # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI +1144B..1144F ; N # Po [5] NEWA DANDA..NEWA ABBREVIATION SIGN +11450..11459 ; N # Nd [10] NEWA DIGIT ZERO..NEWA DIGIT NINE +1145A..1145B ; N # Po [2] NEWA DOUBLE COMMA..NEWA PLACEHOLDER MARK +1145D ; N # Po NEWA INSERTION SIGN +1145E ; N # Mn NEWA SANDHI MARK +1145F..11461 ; N # Lo [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA +11480..114AF ; N # Lo [48] TIRHUTA ANJI..TIRHUTA LETTER HA +114B0..114B2 ; N # Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II +114B3..114B8 ; N # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL +114B9 ; N # Mc TIRHUTA VOWEL SIGN E +114BA ; N # Mn TIRHUTA VOWEL SIGN SHORT E +114BB..114BE ; N # Mc [4] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN AU +114BF..114C0 ; N # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA +114C1 ; N # Mc TIRHUTA SIGN VISARGA +114C2..114C3 ; N # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA +114C4..114C5 ; N # Lo [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG +114C6 ; N # Po TIRHUTA ABBREVIATION SIGN +114C7 ; N # Lo TIRHUTA OM +114D0..114D9 ; N # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE +11580..115AE ; N # Lo [47] SIDDHAM LETTER A..SIDDHAM LETTER HA +115AF..115B1 ; N # Mc [3] SIDDHAM VOWEL SIGN AA..SIDDHAM VOWEL SIGN II +115B2..115B5 ; N # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR +115B8..115BB ; N # Mc [4] SIDDHAM VOWEL SIGN E..SIDDHAM VOWEL SIGN AU +115BC..115BD ; N # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA +115BE ; N # Mc SIDDHAM SIGN VISARGA +115BF..115C0 ; N # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA +115C1..115D7 ; N # Po [23] SIDDHAM SIGN SIDDHAM..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES +115D8..115DB ; N # Lo [4] SIDDHAM LETTER THREE-CIRCLE ALTERNATE I..SIDDHAM LETTER ALTERNATE U +115DC..115DD ; N # Mn [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU +11600..1162F ; N # Lo [48] MODI LETTER A..MODI LETTER LLA +11630..11632 ; N # Mc [3] MODI VOWEL SIGN AA..MODI VOWEL SIGN II +11633..1163A ; N # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI +1163B..1163C ; N # Mc [2] MODI VOWEL SIGN O..MODI VOWEL SIGN AU +1163D ; N # Mn MODI SIGN ANUSVARA +1163E ; N # Mc MODI SIGN VISARGA +1163F..11640 ; N # Mn [2] MODI SIGN VIRAMA..MODI SIGN ARDHACANDRA +11641..11643 ; N # Po [3] MODI DANDA..MODI ABBREVIATION SIGN +11644 ; N # Lo MODI SIGN HUVA +11650..11659 ; N # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE +11660..1166C ; N # Po [13] MONGOLIAN BIRGA WITH ORNAMENT..MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT +11680..116AA ; N # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116AB ; N # Mn TAKRI SIGN ANUSVARA +116AC ; N # Mc TAKRI SIGN VISARGA +116AD ; N # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; N # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; N # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B6 ; N # Mc TAKRI SIGN VIRAMA +116B7 ; N # Mn TAKRI SIGN NUKTA +116B8 ; N # Lo TAKRI LETTER ARCHAIC KHA +116B9 ; N # Po TAKRI ABBREVIATION SIGN +116C0..116C9 ; N # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE +11700..1171A ; N # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA +1171D..1171F ; N # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +11720..11721 ; N # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA +11722..11725 ; N # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU +11726 ; N # Mc AHOM VOWEL SIGN E +11727..1172B ; N # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER +11730..11739 ; N # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE +1173A..1173B ; N # No [2] AHOM NUMBER TEN..AHOM NUMBER TWENTY +1173C..1173E ; N # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI +1173F ; N # So AHOM SYMBOL VI +11740..11746 ; N # Lo [7] AHOM LETTER CA..AHOM LETTER LLA +11800..1182B ; N # Lo [44] DOGRA LETTER A..DOGRA LETTER RRA +1182C..1182E ; N # Mc [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II +1182F..11837 ; N # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA +11838 ; N # Mc DOGRA SIGN VISARGA +11839..1183A ; N # Mn [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA +1183B ; N # Po DOGRA ABBREVIATION SIGN +118A0..118DF ; N # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO +118E0..118E9 ; N # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE +118EA..118F2 ; N # No [9] WARANG CITI NUMBER TEN..WARANG CITI NUMBER NINETY +118FF ; N # Lo WARANG CITI OM +11900..11906 ; N # Lo [7] DIVES AKURU LETTER A..DIVES AKURU LETTER E +11909 ; N # Lo DIVES AKURU LETTER O +1190C..11913 ; N # Lo [8] DIVES AKURU LETTER KA..DIVES AKURU LETTER JA +11915..11916 ; N # Lo [2] DIVES AKURU LETTER NYA..DIVES AKURU LETTER TTA +11918..1192F ; N # Lo [24] DIVES AKURU LETTER DDA..DIVES AKURU LETTER ZA +11930..11935 ; N # Mc [6] DIVES AKURU VOWEL SIGN AA..DIVES AKURU VOWEL SIGN E +11937..11938 ; N # Mc [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O +1193B..1193C ; N # Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU +1193D ; N # Mc DIVES AKURU SIGN HALANTA +1193E ; N # Mn DIVES AKURU VIRAMA +1193F ; N # Lo DIVES AKURU PREFIXED NASAL SIGN +11940 ; N # Mc DIVES AKURU MEDIAL YA +11941 ; N # Lo DIVES AKURU INITIAL RA +11942 ; N # Mc DIVES AKURU MEDIAL RA +11943 ; N # Mn DIVES AKURU SIGN NUKTA +11944..11946 ; N # Po [3] DIVES AKURU DOUBLE DANDA..DIVES AKURU END OF TEXT MARK +11950..11959 ; N # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE +119A0..119A7 ; N # Lo [8] NANDINAGARI LETTER A..NANDINAGARI LETTER VOCALIC RR +119AA..119D0 ; N # Lo [39] NANDINAGARI LETTER E..NANDINAGARI LETTER RRA +119D1..119D3 ; N # Mc [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II +119D4..119D7 ; N # Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR +119DA..119DB ; N # Mn [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI +119DC..119DF ; N # Mc [4] NANDINAGARI VOWEL SIGN O..NANDINAGARI SIGN VISARGA +119E0 ; N # Mn NANDINAGARI SIGN VIRAMA +119E1 ; N # Lo NANDINAGARI SIGN AVAGRAHA +119E2 ; N # Po NANDINAGARI SIGN SIDDHAM +119E3 ; N # Lo NANDINAGARI HEADSTROKE +119E4 ; N # Mc NANDINAGARI VOWEL SIGN PRISHTHAMATRA E +11A00 ; N # Lo ZANABAZAR SQUARE LETTER A +11A01..11A0A ; N # Mn [10] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL LENGTH MARK +11A0B..11A32 ; N # Lo [40] ZANABAZAR SQUARE LETTER KA..ZANABAZAR SQUARE LETTER KSSA +11A33..11A38 ; N # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA +11A39 ; N # Mc ZANABAZAR SQUARE SIGN VISARGA +11A3A ; N # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA +11A3B..11A3E ; N # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA +11A3F..11A46 ; N # Po [8] ZANABAZAR SQUARE INITIAL HEAD MARK..ZANABAZAR SQUARE CLOSING DOUBLE-LINED HEAD MARK +11A47 ; N # Mn ZANABAZAR SQUARE SUBJOINER +11A50 ; N # Lo SOYOMBO LETTER A +11A51..11A56 ; N # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE +11A57..11A58 ; N # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU +11A59..11A5B ; N # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK +11A5C..11A89 ; N # Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA +11A8A..11A96 ; N # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA +11A97 ; N # Mc SOYOMBO SIGN VISARGA +11A98..11A99 ; N # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11A9A..11A9C ; N # Po [3] SOYOMBO MARK TSHEG..SOYOMBO MARK DOUBLE SHAD +11A9D ; N # Lo SOYOMBO MARK PLUTA +11A9E..11AA2 ; N # Po [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2 +11AB0..11ABF ; N # Lo [16] CANADIAN SYLLABICS NATTILIK HI..CANADIAN SYLLABICS SPA +11AC0..11AF8 ; N # Lo [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL +11B00..11B09 ; N # Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU +11C00..11C08 ; N # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L +11C0A..11C2E ; N # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA +11C2F ; N # Mc BHAIKSUKI VOWEL SIGN AA +11C30..11C36 ; N # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L +11C38..11C3D ; N # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA +11C3E ; N # Mc BHAIKSUKI SIGN VISARGA +11C3F ; N # Mn BHAIKSUKI SIGN VIRAMA +11C40 ; N # Lo BHAIKSUKI SIGN AVAGRAHA +11C41..11C45 ; N # Po [5] BHAIKSUKI DANDA..BHAIKSUKI GAP FILLER-2 +11C50..11C59 ; N # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE +11C5A..11C6C ; N # No [19] BHAIKSUKI NUMBER ONE..BHAIKSUKI HUNDREDS UNIT MARK +11C70..11C71 ; N # Po [2] MARCHEN HEAD MARK..MARCHEN MARK SHAD +11C72..11C8F ; N # Lo [30] MARCHEN LETTER KA..MARCHEN LETTER A +11C92..11CA7 ; N # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA +11CA9 ; N # Mc MARCHEN SUBJOINED LETTER YA +11CAA..11CB0 ; N # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA +11CB1 ; N # Mc MARCHEN VOWEL SIGN I +11CB2..11CB3 ; N # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E +11CB4 ; N # Mc MARCHEN VOWEL SIGN O +11CB5..11CB6 ; N # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU +11D00..11D06 ; N # Lo [7] MASARAM GONDI LETTER A..MASARAM GONDI LETTER E +11D08..11D09 ; N # Lo [2] MASARAM GONDI LETTER AI..MASARAM GONDI LETTER O +11D0B..11D30 ; N # Lo [38] MASARAM GONDI LETTER AU..MASARAM GONDI LETTER TRA +11D31..11D36 ; N # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R +11D3A ; N # Mn MASARAM GONDI VOWEL SIGN E +11D3C..11D3D ; N # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O +11D3F..11D45 ; N # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA +11D46 ; N # Lo MASARAM GONDI REPHA +11D47 ; N # Mn MASARAM GONDI RA-KARA +11D50..11D59 ; N # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE +11D60..11D65 ; N # Lo [6] GUNJALA GONDI LETTER A..GUNJALA GONDI LETTER UU +11D67..11D68 ; N # Lo [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI +11D6A..11D89 ; N # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA +11D8A..11D8E ; N # Mc [5] GUNJALA GONDI VOWEL SIGN AA..GUNJALA GONDI VOWEL SIGN UU +11D90..11D91 ; N # Mn [2] GUNJALA GONDI VOWEL SIGN EE..GUNJALA GONDI VOWEL SIGN AI +11D93..11D94 ; N # Mc [2] GUNJALA GONDI VOWEL SIGN OO..GUNJALA GONDI VOWEL SIGN AU +11D95 ; N # Mn GUNJALA GONDI SIGN ANUSVARA +11D96 ; N # Mc GUNJALA GONDI SIGN VISARGA +11D97 ; N # Mn GUNJALA GONDI VIRAMA +11D98 ; N # Lo GUNJALA GONDI OM +11DA0..11DA9 ; N # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11EE0..11EF2 ; N # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA +11EF3..11EF4 ; N # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U +11EF5..11EF6 ; N # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O +11EF7..11EF8 ; N # Po [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION +11F00..11F01 ; N # Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA +11F02 ; N # Lo KAWI SIGN REPHA +11F03 ; N # Mc KAWI SIGN VISARGA +11F04..11F10 ; N # Lo [13] KAWI LETTER A..KAWI LETTER O +11F12..11F33 ; N # Lo [34] KAWI LETTER KA..KAWI LETTER JNYA +11F34..11F35 ; N # Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA +11F36..11F3A ; N # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R +11F3E..11F3F ; N # Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI +11F40 ; N # Mn KAWI VOWEL SIGN EU +11F41 ; N # Mc KAWI SIGN KILLER +11F42 ; N # Mn KAWI CONJOINER +11F43..11F4F ; N # Po [13] KAWI DANDA..KAWI PUNCTUATION CLOSING SPIRAL +11F50..11F59 ; N # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE +11FB0 ; N # Lo LISU LETTER YHA +11FC0..11FD4 ; N # No [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH +11FD5..11FDC ; N # So [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI +11FDD..11FE0 ; N # Sc [4] TAMIL SIGN KAACU..TAMIL SIGN VARAAKAN +11FE1..11FF1 ; N # So [17] TAMIL SIGN PAARAM..TAMIL SIGN VAKAIYARAA +11FFF ; N # Po TAMIL PUNCTUATION END OF TEXT +12000..12399 ; N # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U +12400..1246E ; N # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM +12470..12474 ; N # Po [5] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON +12480..12543 ; N # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU +12F90..12FF0 ; N # Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 +12FF1..12FF2 ; N # Po [2] CYPRO-MINOAN SIGN CM301..CYPRO-MINOAN SIGN CM302 +13000..1342F ; N # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D +13430..1343F ; N # Cf [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE +13440 ; N # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY +13441..13446 ; N # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN +13447..13455 ; N # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED +14400..14646 ; N # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16800..16A38 ; N # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16A40..16A5E ; N # Lo [31] MRO LETTER TA..MRO LETTER TEK +16A60..16A69 ; N # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE +16A6E..16A6F ; N # Po [2] MRO DANDA..MRO DOUBLE DANDA +16A70..16ABE ; N # Lo [79] TANGSA LETTER OZ..TANGSA LETTER ZA +16AC0..16AC9 ; N # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE +16AD0..16AED ; N # Lo [30] BASSA VAH LETTER ENNI..BASSA VAH LETTER I +16AF0..16AF4 ; N # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE +16AF5 ; N # Po BASSA VAH FULL STOP +16B00..16B2F ; N # Lo [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU +16B30..16B36 ; N # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16B37..16B3B ; N # Po [5] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN VOS FEEM +16B3C..16B3F ; N # So [4] PAHAWH HMONG SIGN XYEEM NTXIV..PAHAWH HMONG SIGN XYEEM FAIB +16B40..16B43 ; N # Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM +16B44 ; N # Po PAHAWH HMONG SIGN XAUS +16B45 ; N # So PAHAWH HMONG SIGN CIM TSOV ROG +16B50..16B59 ; N # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE +16B5B..16B61 ; N # No [7] PAHAWH HMONG NUMBER TENS..PAHAWH HMONG NUMBER TRILLIONS +16B63..16B77 ; N # Lo [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS +16B7D..16B8F ; N # Lo [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ +16E40..16E7F ; N # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16E80..16E96 ; N # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM +16E97..16E9A ; N # Po [4] MEDEFAIDRIN COMMA..MEDEFAIDRIN EXCLAMATION OH +16F00..16F4A ; N # Lo [75] MIAO LETTER PA..MIAO LETTER RTE +16F4F ; N # Mn MIAO SIGN CONSONANT MODIFIER BAR +16F50 ; N # Lo MIAO LETTER NASALIZATION +16F51..16F87 ; N # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI +16F8F..16F92 ; N # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16F93..16F9F ; N # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 +16FE0..16FE1 ; W # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK +16FE2 ; W # Po OLD CHINESE HOOK MARK +16FE3 ; W # Lm OLD CHINESE ITERATION MARK +16FE4 ; W # Mn KHITAN SMALL SCRIPT FILLER +16FF0..16FF1 ; W # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +17000..187F7 ; W # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 +18800..18AFF ; W # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 +18B00..18CD5 ; W # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18D00..18D08 ; W # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +1AFF0..1AFF3 ; W # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 +1AFF5..1AFFB ; W # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 +1AFFD..1AFFE ; W # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 +1B000..1B0FF ; W # Lo [256] KATAKANA LETTER ARCHAIC E..HENTAIGANA LETTER RE-2 +1B100..1B122 ; W # Lo [35] HENTAIGANA LETTER RE-3..KATAKANA LETTER ARCHAIC WU +1B132 ; W # Lo HIRAGANA LETTER SMALL KO +1B150..1B152 ; W # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO +1B155 ; W # Lo KATAKANA LETTER SMALL KO +1B164..1B167 ; W # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N +1B170..1B2FB ; W # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB +1BC00..1BC6A ; N # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M +1BC70..1BC7C ; N # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK +1BC80..1BC88 ; N # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL +1BC90..1BC99 ; N # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW +1BC9C ; N # So DUPLOYAN SIGN O WITH CROSS +1BC9D..1BC9E ; N # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK +1BC9F ; N # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP +1BCA0..1BCA3 ; N # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP +1CF00..1CF2D ; N # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT +1CF30..1CF46 ; N # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG +1CF50..1CFC3 ; N # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK +1D000..1D0F5 ; N # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO +1D100..1D126 ; N # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 +1D129..1D164 ; N # So [60] MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE +1D165..1D166 ; N # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM +1D167..1D169 ; N # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 +1D16A..1D16C ; N # So [3] MUSICAL SYMBOL FINGERED TREMOLO-1..MUSICAL SYMBOL FINGERED TREMOLO-3 +1D16D..1D172 ; N # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 +1D173..1D17A ; N # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE +1D17B..1D182 ; N # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE +1D183..1D184 ; N # So [2] MUSICAL SYMBOL ARPEGGIATO UP..MUSICAL SYMBOL ARPEGGIATO DOWN +1D185..1D18B ; N # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE +1D18C..1D1A9 ; N # So [30] MUSICAL SYMBOL RINFORZANDO..MUSICAL SYMBOL DEGREE SLASH +1D1AA..1D1AD ; N # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO +1D1AE..1D1EA ; N # So [61] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL KORON +1D200..1D241 ; N # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54 +1D242..1D244 ; N # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME +1D245 ; N # So GREEK MUSICAL LEIMMA +1D2C0..1D2D3 ; N # No [20] KAKTOVIK NUMERAL ZERO..KAKTOVIK NUMERAL NINETEEN +1D2E0..1D2F3 ; N # No [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN +1D300..1D356 ; N # So [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING +1D360..1D378 ; N # No [25] COUNTING ROD UNIT DIGIT ONE..TALLY MARK FIVE +1D400..1D454 ; N # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G +1D456..1D49C ; N # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A +1D49E..1D49F ; N # Lu [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D +1D4A2 ; N # Lu MATHEMATICAL SCRIPT CAPITAL G +1D4A5..1D4A6 ; N # Lu [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K +1D4A9..1D4AC ; N # Lu [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q +1D4AE..1D4B9 ; N # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D +1D4BB ; N # Ll MATHEMATICAL SCRIPT SMALL F +1D4BD..1D4C3 ; N # Ll [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N +1D4C5..1D505 ; N # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B +1D507..1D50A ; N # Lu [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G +1D50D..1D514 ; N # Lu [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q +1D516..1D51C ; N # Lu [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y +1D51E..1D539 ; N # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B +1D53B..1D53E ; N # Lu [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G +1D540..1D544 ; N # Lu [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M +1D546 ; N # Lu MATHEMATICAL DOUBLE-STRUCK CAPITAL O +1D54A..1D550 ; N # Lu [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y +1D552..1D6A5 ; N # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J +1D6A8..1D6C0 ; N # Lu [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA +1D6C1 ; N # Sm MATHEMATICAL BOLD NABLA +1D6C2..1D6DA ; N # Ll [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA +1D6DB ; N # Sm MATHEMATICAL BOLD PARTIAL DIFFERENTIAL +1D6DC..1D6FA ; N # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA +1D6FB ; N # Sm MATHEMATICAL ITALIC NABLA +1D6FC..1D714 ; N # Ll [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA +1D715 ; N # Sm MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL +1D716..1D734 ; N # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA +1D735 ; N # Sm MATHEMATICAL BOLD ITALIC NABLA +1D736..1D74E ; N # Ll [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA +1D74F ; N # Sm MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL +1D750..1D76E ; N # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA +1D76F ; N # Sm MATHEMATICAL SANS-SERIF BOLD NABLA +1D770..1D788 ; N # Ll [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA +1D789 ; N # Sm MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL +1D78A..1D7A8 ; N # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA +1D7A9 ; N # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA +1D7AA..1D7C2 ; N # Ll [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA +1D7C3 ; N # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL +1D7C4..1D7CB ; N # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1D7CE..1D7FF ; N # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1D800..1D9FF ; N # So [512] SIGNWRITING HAND-FIST INDEX..SIGNWRITING HEAD +1DA00..1DA36 ; N # Mn [55] SIGNWRITING HEAD RIM..SIGNWRITING AIR SUCKING IN +1DA37..1DA3A ; N # So [4] SIGNWRITING AIR BLOW SMALL ROTATIONS..SIGNWRITING BREATH EXHALE +1DA3B..1DA6C ; N # Mn [50] SIGNWRITING MOUTH CLOSED NEUTRAL..SIGNWRITING EXCITEMENT +1DA6D..1DA74 ; N # So [8] SIGNWRITING SHOULDER HIP SPINE..SIGNWRITING TORSO-FLOORPLANE TWISTING +1DA75 ; N # Mn SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS +1DA76..1DA83 ; N # So [14] SIGNWRITING LIMB COMBINATION..SIGNWRITING LOCATION DEPTH +1DA84 ; N # Mn SIGNWRITING LOCATION HEAD NECK +1DA85..1DA86 ; N # So [2] SIGNWRITING LOCATION TORSO..SIGNWRITING LOCATION LIMBS DIGITS +1DA87..1DA8B ; N # Po [5] SIGNWRITING COMMA..SIGNWRITING PARENTHESIS +1DA9B..1DA9F ; N # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6 +1DAA1..1DAAF ; N # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16 +1DF00..1DF09 ; N # Ll [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK +1DF0A ; N # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK +1DF0B..1DF1E ; N # Ll [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL +1DF25..1DF2A ; N # Ll [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1E000..1E006 ; N # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE +1E008..1E018 ; N # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU +1E01B..1E021 ; N # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI +1E023..1E024 ; N # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS +1E026..1E02A ; N # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA +1E030..1E06D ; N # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE +1E08F ; N # Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +1E100..1E12C ; N # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W +1E130..1E136 ; N # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D +1E137..1E13D ; N # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER +1E140..1E149 ; N # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE +1E14E ; N # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ +1E14F ; N # So NYIAKENG PUACHUE HMONG CIRCLED CA +1E290..1E2AD ; N # Lo [30] TOTO LETTER PA..TOTO LETTER A +1E2AE ; N # Mn TOTO SIGN RISING TONE +1E2C0..1E2EB ; N # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH +1E2EC..1E2EF ; N # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI +1E2F0..1E2F9 ; N # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE +1E2FF ; N # Sc WANCHO NGUN SIGN +1E4D0..1E4EA ; N # Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL +1E4EB ; N # Lm NAG MUNDARI SIGN OJOD +1E4EC..1E4EF ; N # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH +1E4F0..1E4F9 ; N # Nd [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE +1E7E0..1E7E6 ; N # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO +1E7E8..1E7EB ; N # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE +1E7ED..1E7EE ; N # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE +1E7F0..1E7FE ; N # Lo [15] ETHIOPIC SYLLABLE GURAGE QWI..ETHIOPIC SYLLABLE GURAGE PWEE +1E800..1E8C4 ; N # Lo [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON +1E8C7..1E8CF ; N # No [9] MENDE KIKAKUI DIGIT ONE..MENDE KIKAKUI DIGIT NINE +1E8D0..1E8D6 ; N # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS +1E900..1E943 ; N # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA +1E944..1E94A ; N # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA +1E94B ; N # Lm ADLAM NASALIZATION MARK +1E950..1E959 ; N # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE +1E95E..1E95F ; N # Po [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK +1EC71..1ECAB ; N # No [59] INDIC SIYAQ NUMBER ONE..INDIC SIYAQ NUMBER PREFIXED NINE +1ECAC ; N # So INDIC SIYAQ PLACEHOLDER +1ECAD..1ECAF ; N # No [3] INDIC SIYAQ FRACTION ONE QUARTER..INDIC SIYAQ FRACTION THREE QUARTERS +1ECB0 ; N # Sc INDIC SIYAQ RUPEE MARK +1ECB1..1ECB4 ; N # No [4] INDIC SIYAQ NUMBER ALTERNATE ONE..INDIC SIYAQ ALTERNATE LAKH MARK +1ED01..1ED2D ; N # No [45] OTTOMAN SIYAQ NUMBER ONE..OTTOMAN SIYAQ NUMBER NINETY THOUSAND +1ED2E ; N # So OTTOMAN SIYAQ MARRATAN +1ED2F..1ED3D ; N # No [15] OTTOMAN SIYAQ ALTERNATE NUMBER TWO..OTTOMAN SIYAQ FRACTION ONE SIXTH +1EE00..1EE03 ; N # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; N # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; N # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; N # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; N # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; N # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; N # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; N # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; N # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; N # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; N # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; N # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; N # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; N # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; N # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; N # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; N # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; N # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; N # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; N # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; N # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; N # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; N # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; N # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; N # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; N # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; N # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; N # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; N # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; N # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; N # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; N # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; N # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +1EEF0..1EEF1 ; N # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL +1F000..1F003 ; N # So [4] MAHJONG TILE EAST WIND..MAHJONG TILE NORTH WIND +1F004 ; W # So MAHJONG TILE RED DRAGON +1F005..1F02B ; N # So [39] MAHJONG TILE GREEN DRAGON..MAHJONG TILE BACK +1F030..1F093 ; N # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06 +1F0A0..1F0AE ; N # So [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES +1F0B1..1F0BF ; N # So [15] PLAYING CARD ACE OF HEARTS..PLAYING CARD RED JOKER +1F0C1..1F0CE ; N # So [14] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD KING OF DIAMONDS +1F0CF ; W # So PLAYING CARD BLACK JOKER +1F0D1..1F0F5 ; N # So [37] PLAYING CARD ACE OF CLUBS..PLAYING CARD TRUMP-21 +1F100..1F10A ; A # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA +1F10B..1F10C ; N # No [2] DINGBAT CIRCLED SANS-SERIF DIGIT ZERO..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO +1F10D..1F10F ; N # So [3] CIRCLED ZERO WITH SLASH..CIRCLED DOLLAR SIGN WITH OVERLAID BACKSLASH +1F110..1F12D ; A # So [30] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED CD +1F12E..1F12F ; N # So [2] CIRCLED WZ..COPYLEFT SYMBOL +1F130..1F169 ; A # So [58] SQUARED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +1F16A..1F16F ; N # So [6] RAISED MC SIGN..CIRCLED HUMAN FIGURE +1F170..1F18D ; A # So [30] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED SA +1F18E ; W # So NEGATIVE SQUARED AB +1F18F..1F190 ; A # So [2] NEGATIVE SQUARED WC..SQUARE DJ +1F191..1F19A ; W # So [10] SQUARED CL..SQUARED VS +1F19B..1F1AC ; A # So [18] SQUARED THREE D..SQUARED VOD +1F1AD ; N # So MASK WORK SYMBOL +1F1E6..1F1FF ; N # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z +1F200..1F202 ; W # So [3] SQUARE HIRAGANA HOKA..SQUARED KATAKANA SA +1F210..1F23B ; W # So [44] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-914D +1F240..1F248 ; W # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 +1F250..1F251 ; W # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT +1F260..1F265 ; W # So [6] ROUNDED SYMBOL FOR FU..ROUNDED SYMBOL FOR CAI +1F300..1F320 ; W # So [33] CYCLONE..SHOOTING STAR +1F321..1F32C ; N # So [12] THERMOMETER..WIND BLOWING FACE +1F32D..1F335 ; W # So [9] HOT DOG..CACTUS +1F336 ; N # So HOT PEPPER +1F337..1F37C ; W # So [70] TULIP..BABY BOTTLE +1F37D ; N # So FORK AND KNIFE WITH PLATE +1F37E..1F393 ; W # So [22] BOTTLE WITH POPPING CORK..GRADUATION CAP +1F394..1F39F ; N # So [12] HEART WITH TIP ON THE LEFT..ADMISSION TICKETS +1F3A0..1F3CA ; W # So [43] CAROUSEL HORSE..SWIMMER +1F3CB..1F3CE ; N # So [4] WEIGHT LIFTER..RACING CAR +1F3CF..1F3D3 ; W # So [5] CRICKET BAT AND BALL..TABLE TENNIS PADDLE AND BALL +1F3D4..1F3DF ; N # So [12] SNOW CAPPED MOUNTAIN..STADIUM +1F3E0..1F3F0 ; W # So [17] HOUSE BUILDING..EUROPEAN CASTLE +1F3F1..1F3F3 ; N # So [3] WHITE PENNANT..WAVING WHITE FLAG +1F3F4 ; W # So WAVING BLACK FLAG +1F3F5..1F3F7 ; N # So [3] ROSETTE..LABEL +1F3F8..1F3FA ; W # So [3] BADMINTON RACQUET AND SHUTTLECOCK..AMPHORA +1F3FB..1F3FF ; W # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6 +1F400..1F43E ; W # So [63] RAT..PAW PRINTS +1F43F ; N # So CHIPMUNK +1F440 ; W # So EYES +1F441 ; N # So EYE +1F442..1F4FC ; W # So [187] EAR..VIDEOCASSETTE +1F4FD..1F4FE ; N # So [2] FILM PROJECTOR..PORTABLE STEREO +1F4FF..1F53D ; W # So [63] PRAYER BEADS..DOWN-POINTING SMALL RED TRIANGLE +1F53E..1F54A ; N # So [13] LOWER RIGHT SHADOWED WHITE CIRCLE..DOVE OF PEACE +1F54B..1F54E ; W # So [4] KAABA..MENORAH WITH NINE BRANCHES +1F54F ; N # So BOWL OF HYGIEIA +1F550..1F567 ; W # So [24] CLOCK FACE ONE OCLOCK..CLOCK FACE TWELVE-THIRTY +1F568..1F579 ; N # So [18] RIGHT SPEAKER..JOYSTICK +1F57A ; W # So MAN DANCING +1F57B..1F594 ; N # So [26] LEFT HAND TELEPHONE RECEIVER..REVERSED VICTORY HAND +1F595..1F596 ; W # So [2] REVERSED HAND WITH MIDDLE FINGER EXTENDED..RAISED HAND WITH PART BETWEEN MIDDLE AND RING FINGERS +1F597..1F5A3 ; N # So [13] WHITE DOWN POINTING LEFT HAND INDEX..BLACK DOWN POINTING BACKHAND INDEX +1F5A4 ; W # So BLACK HEART +1F5A5..1F5FA ; N # So [86] DESKTOP COMPUTER..WORLD MAP +1F5FB..1F5FF ; W # So [5] MOUNT FUJI..MOYAI +1F600..1F64F ; W # So [80] GRINNING FACE..PERSON WITH FOLDED HANDS +1F650..1F67F ; N # So [48] NORTH WEST POINTING LEAF..REVERSE CHECKER BOARD +1F680..1F6C5 ; W # So [70] ROCKET..LEFT LUGGAGE +1F6C6..1F6CB ; N # So [6] TRIANGLE WITH ROUNDED CORNERS..COUCH AND LAMP +1F6CC ; W # So SLEEPING ACCOMMODATION +1F6CD..1F6CF ; N # So [3] SHOPPING BAGS..BED +1F6D0..1F6D2 ; W # So [3] PLACE OF WORSHIP..SHOPPING TROLLEY +1F6D3..1F6D4 ; N # So [2] STUPA..PAGODA +1F6D5..1F6D7 ; W # So [3] HINDU TEMPLE..ELEVATOR +1F6DC..1F6DF ; W # So [4] WIRELESS..RING BUOY +1F6E0..1F6EA ; N # So [11] HAMMER AND WRENCH..NORTHEAST-POINTING AIRPLANE +1F6EB..1F6EC ; W # So [2] AIRPLANE DEPARTURE..AIRPLANE ARRIVING +1F6F0..1F6F3 ; N # So [4] SATELLITE..PASSENGER SHIP +1F6F4..1F6FC ; W # So [9] SCOOTER..ROLLER SKATE +1F700..1F776 ; N # So [119] ALCHEMICAL SYMBOL FOR QUINTESSENCE..LUNAR ECLIPSE +1F77B..1F77F ; N # So [5] HAUMEA..ORCUS +1F780..1F7D9 ; N # So [90] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..NINE POINTED WHITE STAR +1F7E0..1F7EB ; W # So [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE +1F7F0 ; W # So HEAVY EQUALS SIGN +1F800..1F80B ; N # So [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD +1F810..1F847 ; N # So [56] LEFTWARDS ARROW WITH SMALL EQUILATERAL ARROWHEAD..DOWNWARDS HEAVY ARROW +1F850..1F859 ; N # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW +1F860..1F887 ; N # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW +1F890..1F8AD ; N # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS +1F8B0..1F8B1 ; N # So [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST +1F900..1F90B ; N # So [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT +1F90C..1F93A ; W # So [47] PINCHED FINGERS..FENCER +1F93B ; N # So MODERN PENTATHLON +1F93C..1F945 ; W # So [10] WRESTLERS..GOAL NET +1F946 ; N # So RIFLE +1F947..1F9FF ; W # So [185] FIRST PLACE MEDAL..NAZAR AMULET +1FA00..1FA53 ; N # So [84] NEUTRAL CHESS KING..BLACK CHESS KNIGHT-BISHOP +1FA60..1FA6D ; N # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER +1FA70..1FA7C ; W # So [13] BALLET SHOES..CRUTCH +1FA80..1FA88 ; W # So [9] YO-YO..FLUTE +1FA90..1FABD ; W # So [46] RINGED PLANET..WING +1FABF..1FAC5 ; W # So [7] GOOSE..PERSON WITH CROWN +1FACE..1FADB ; W # So [14] MOOSE..PEA POD +1FAE0..1FAE8 ; W # So [9] MELTING FACE..SHAKING FACE +1FAF0..1FAF8 ; W # So [9] HAND WITH INDEX FINGER AND THUMB CROSSED..RIGHTWARDS PUSHING HAND +1FB00..1FB92 ; N # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK +1FB94..1FBCA ; N # So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON +1FBF0..1FBF9 ; N # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE +20000..2A6DF ; W # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF +2A6E0..2A6FF ; W # Cn [32] .. +2A700..2B739 ; W # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 +2B73A..2B73F ; W # Cn [6] .. +2B740..2B81D ; W # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D +2B81E..2B81F ; W # Cn [2] .. +2B820..2CEA1 ; W # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 +2CEA2..2CEAF ; W # Cn [14] .. +2CEB0..2EBE0 ; W # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2EBE1..2EBEF ; W # Cn [15] .. +2EBF0..2EE5D ; W # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D +2EE5E..2F7FF ; W # Cn [2466] .. +2F800..2FA1D ; W # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D +2FA1E..2FA1F ; W # Cn [2] .. +2FA20..2FFFD ; W # Cn [1502] .. +30000..3134A ; W # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A +3134B..3134F ; W # Cn [5] .. +31350..323AF ; W # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF +323B0..3FFFD ; W # Cn [56398] .. +E0001 ; N # Cf LANGUAGE TAG +E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG +E0100..E01EF ; A # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 +F0000..FFFFD ; A # Co [65534] .. +100000..10FFFD ; A # Co [65534] .. # EOF diff --git a/libc/str/getx86processormodel.c b/libc/str/getx86processormodel.c index 3ac7c77fe..9746cc4be 100644 --- a/libc/str/getx86processormodel.c +++ b/libc/str/getx86processormodel.c @@ -16,7 +16,6 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/mem/bisect.internal.h" #include "libc/nexgen32e/x86info.h" static int CmpX86ProcModelKey(const struct X86ProcessorModel *a, @@ -32,7 +31,8 @@ static int CmpX86ProcModelKey(const struct X86ProcessorModel *a, * @see https://a4lg.com/tech/x86/database/x86-families-and-models.en.html */ const struct X86ProcessorModel *getx86processormodel(short key) { - return bisect(&(struct X86ProcessorModel){key}, kX86ProcessorModels, - kX86ProcessorModelCount, sizeof(struct X86ProcessorModel), - (void *)CmpX86ProcModelKey, NULL); + for (int i = 0; kX86ProcessorModels[i].key; ++i) + if (kX86ProcessorModels[i].key == key) + return &kX86ProcessorModels[i]; + return 0; } diff --git a/libc/str/has_char.h b/libc/str/has_char.h new file mode 100644 index 000000000..64c5a4763 --- /dev/null +++ b/libc/str/has_char.h @@ -0,0 +1,24 @@ +// -*- c++ -*- +#ifndef COSMOPOLITAN_LIBC_STR_HAS_CHAR_H_ +#define COSMOPOLITAN_LIBC_STR_HAS_CHAR_H_ +#ifdef __cplusplus + +template +static bool has_char(const T (*ranges)[2], size_t n, T c) { + unsigned l = 0; + unsigned r = n; + while (l < r) { + unsigned m = (l & r) + ((l ^ r) >> 1); // floor((a+b)/2) + if (c < ranges[m][0]) { + r = m; + } else if (c > ranges[m][1]) { + l = m + 1; + } else { + return true; + } + } + return false; +} + +#endif /* __cplusplus */ +#endif /* COSMOPOLITAN_LIBC_STR_HAS_CHAR_H_ */ diff --git a/libc/str/iswcntrl.c b/libc/str/iswcntrl.c index b67dbf854..a8f63aba3 100644 --- a/libc/str/iswcntrl.c +++ b/libc/str/iswcntrl.c @@ -19,10 +19,15 @@ #include "libc/wctype.h" /** - * Returns nonzero if c is C0 or C1 control code. + * Returns nonzero if `c` is control code. + * + * This includes C0 or C1 control codes, in addition to the "LINE + * SEPARATOR" and "PARAGRAPH SEPARATOR" characters. */ int iswcntrl(wint_t c) { - return (0x00 <= c && c <= 0x1F) || (0x7F <= c && c <= 0x9F); + return (0x0000 <= c && c <= 0x001F) || // + (0x007F <= c && c <= 0x009F) || // + (0x2028 <= c && c <= 0x2029); } __weak_reference(iswcntrl, iswcntrl_l); diff --git a/libc/str/iswlower.c b/libc/str/iswlower.c deleted file mode 100644 index 546ee379f..000000000 --- a/libc/str/iswlower.c +++ /dev/null @@ -1,520 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/wctype.h" - -/** - * Returns nonzero if c is lowercase letter. - */ -int iswlower(wint_t c) { - if (c < 0200) { - return 'a' <= c && c <= 'z'; - } else { - if (towupper(c) != c) - return 1; - switch (c) { - case 0x00df: /* ß Watin */ - case 0x0138: /* ĸ Watin-A */ - case 0x0149: /* ʼn Watin-A */ - case 0x018d: /* ƍ Watin-B */ - case 0x019b: /* ƛ Watin-B */ - case 0x01aa: /* ƪ Watin-B */ - case 0x01ab: /* ƫ Watin-B */ - case 0x01ba: /* ƺ Watin-B */ - case 0x01be: /* ƾ Watin-B */ - case 0x01f0: /* ǰ Watin-B */ - case 0x0221: /* ȡ Watin-B */ - case 0x0234: /* ȴ Watin-B */ - case 0x0235: /* ȵ Watin-B */ - case 0x0236: /* ȶ Watin-B */ - case 0x0237: /* ȷ Watin-B */ - case 0x0238: /* ȸ Watin-B */ - case 0x0239: /* ȹ Watin-B */ - case 0x0255: /* ɕ IPA */ - case 0x0258: /* ɘ IPA */ - case 0x025a: /* ɚ IPA */ - case 0x025d: /* ɝ IPA */ - case 0x025e: /* ɞ IPA */ - case 0x025f: /* ɟ IPA */ - case 0x0262: /* ɢ IPA */ - case 0x0264: /* ɤ IPA */ - case 0x0267: /* ɧ IPA */ - case 0x026d: /* ɭ IPA */ - case 0x026e: /* ɮ IPA */ - case 0x0270: /* ɰ IPA */ - case 0x0273: /* ɳ IPA */ - case 0x0274: /* ɴ IPA */ - case 0x0276: /* ɶ IPA */ - case 0x0277: /* ɷ IPA */ - case 0x0278: /* ɸ IPA */ - case 0x0279: /* ɹ IPA */ - case 0x027a: /* ɺ IPA */ - case 0x027b: /* ɻ IPA */ - case 0x027c: /* ɼ IPA */ - case 0x027e: /* ɾ IPA */ - case 0x027f: /* ɿ IPA */ - case 0x0281: /* ʁ IPA */ - case 0x0284: /* ʄ IPA */ - case 0x0285: /* ʅ IPA */ - case 0x0286: /* ʆ IPA */ - case 0x028d: /* ʍ IPA */ - case 0x028e: /* ʎ IPA */ - case 0x028f: /* ʏ IPA */ - case 0x0290: /* ʐ IPA */ - case 0x0291: /* ʑ IPA */ - case 0x0293: /* ʓ IPA */ - case 0x0295: /* ʕ IPA */ - case 0x0296: /* ʖ IPA */ - case 0x0297: /* ʗ IPA */ - case 0x0298: /* ʘ IPA */ - case 0x0299: /* ʙ IPA */ - case 0x029a: /* ʚ IPA */ - case 0x029b: /* ʛ IPA */ - case 0x029c: /* ʜ IPA */ - case 0x029f: /* ʟ IPA */ - case 0x02a0: /* ʠ IPA */ - case 0x02a1: /* ʡ IPA */ - case 0x02a2: /* ʢ IPA */ - case 0x02a3: /* ʣ IPA */ - case 0x02a4: /* ʤ IPA */ - case 0x02a5: /* ʥ IPA */ - case 0x02a6: /* ʦ IPA */ - case 0x02a7: /* ʧ IPA */ - case 0x02a8: /* ʨ IPA */ - case 0x02a9: /* ʩ IPA */ - case 0x02aa: /* ʪ IPA */ - case 0x02ab: /* ʫ IPA */ - case 0x02ac: /* ʬ IPA */ - case 0x02ad: /* ʭ IPA */ - case 0x02ae: /* ʮ IPA */ - case 0x02af: /* ʯ IPA */ - case 0x0390: /* ΐ Greek */ - case 0x03b0: /* ΰ Greek */ - case 0x03fc: /* ϼ Greek */ - case 0x0560: /* ՠ Armenian */ - case 0x0587: /* և Armenian */ - case 0x0588: /* ֈ Armenian */ - case 0x1d00: /* ᴀ Phonetic Extensions */ - case 0x1d01: /* ᴁ Phonetic Extensions */ - case 0x1d02: /* ᴂ Phonetic Extensions */ - case 0x1d03: /* ᴃ Phonetic Extensions */ - case 0x1d04: /* ᴄ Phonetic Extensions */ - case 0x1d05: /* ᴅ Phonetic Extensions */ - case 0x1d06: /* ᴆ Phonetic Extensions */ - case 0x1d07: /* ᴇ Phonetic Extensions */ - case 0x1d08: /* ᴈ Phonetic Extensions */ - case 0x1d09: /* ᴉ Phonetic Extensions */ - case 0x1d0a: /* ᴊ Phonetic Extensions */ - case 0x1d0b: /* ᴋ Phonetic Extensions */ - case 0x1d0c: /* ᴌ Phonetic Extensions */ - case 0x1d0d: /* ᴍ Phonetic Extensions */ - case 0x1d0e: /* ᴎ Phonetic Extensions */ - case 0x1d0f: /* ᴏ Phonetic Extensions */ - case 0x1d10: /* ᴐ Phonetic Extensions */ - case 0x1d11: /* ᴑ Phonetic Extensions */ - case 0x1d12: /* ᴒ Phonetic Extensions */ - case 0x1d13: /* ᴓ Phonetic Extensions */ - case 0x1d14: /* ᴔ Phonetic Extensions */ - case 0x1d15: /* ᴕ Phonetic Extensions */ - case 0x1d16: /* ᴖ Phonetic Extensions */ - case 0x1d17: /* ᴗ Phonetic Extensions */ - case 0x1d18: /* ᴘ Phonetic Extensions */ - case 0x1d19: /* ᴙ Phonetic Extensions */ - case 0x1d1a: /* ᴚ Phonetic Extensions */ - case 0x1d1b: /* ᴛ Phonetic Extensions */ - case 0x1d1c: /* ᴜ Phonetic Extensions */ - case 0x1d1d: /* ᴝ Phonetic Extensions */ - case 0x1d1e: /* ᴞ Phonetic Extensions */ - case 0x1d1f: /* ᴟ Phonetic Extensions */ - case 0x1d20: /* ᴠ Phonetic Extensions */ - case 0x1d21: /* ᴡ Phonetic Extensions */ - case 0x1d22: /* ᴢ Phonetic Extensions */ - case 0x1d23: /* ᴣ Phonetic Extensions */ - case 0x1d24: /* ᴤ Phonetic Extensions */ - case 0x1d25: /* ᴥ Phonetic Extensions */ - case 0x1d26: /* ᴦ Phonetic Extensions */ - case 0x1d27: /* ᴧ Phonetic Extensions */ - case 0x1d28: /* ᴨ Phonetic Extensions */ - case 0x1d29: /* ᴩ Phonetic Extensions */ - case 0x1d2a: /* ᴪ Phonetic Extensions */ - case 0x1d2b: /* ᴫ Phonetic Extensions */ - case 0x1d6b: /* ᵫ Phonetic Extensions */ - case 0x1d6c: /* ᵬ Phonetic Extensions */ - case 0x1d6d: /* ᵭ Phonetic Extensions */ - case 0x1d6e: /* ᵮ Phonetic Extensions */ - case 0x1d6f: /* ᵯ Phonetic Extensions */ - case 0x1d70: /* ᵰ Phonetic Extensions */ - case 0x1d71: /* ᵱ Phonetic Extensions */ - case 0x1d72: /* ᵲ Phonetic Extensions */ - case 0x1d73: /* ᵳ Phonetic Extensions */ - case 0x1d74: /* ᵴ Phonetic Extensions */ - case 0x1d75: /* ᵵ Phonetic Extensions */ - case 0x1d76: /* ᵶ Phonetic Extensions */ - case 0x1d77: /* ᵷ Phonetic Extensions */ - case 0x1d7a: /* ᵺ Phonetic Extensions */ - case 0x1d7b: /* ᵻ Phonetic Extensions */ - case 0x1d7c: /* ᵼ Phonetic Extensions */ - case 0x1d7e: /* ᵾ Phonetic Extensions */ - case 0x1d7f: /* ᵿ Phonetic Extensions */ - case 0x1d80: /* . Phonetic Extensions Supplement */ - case 0x1d81: /* . Phonetic Extensions Supplement */ - case 0x1d82: /* . Phonetic Extensions Supplement */ - case 0x1d83: /* . Phonetic Extensions Supplement */ - case 0x1d84: /* . Phonetic Extensions Supplement */ - case 0x1d85: /* . Phonetic Extensions Supplement */ - case 0x1d86: /* . Phonetic Extensions Supplement */ - case 0x1d87: /* . Phonetic Extensions Supplement */ - case 0x1d88: /* . Phonetic Extensions Supplement */ - case 0x1d89: /* . Phonetic Extensions Supplement */ - case 0x1d8a: /* . Phonetic Extensions Supplement */ - case 0x1d8b: /* . Phonetic Extensions Supplement */ - case 0x1d8c: /* . Phonetic Extensions Supplement */ - case 0x1d8d: /* . Phonetic Extensions Supplement */ - case 0x1d8f: /* . Phonetic Extensions Supplement */ - case 0x1d90: /* . Phonetic Extensions Supplement */ - case 0x1d91: /* . Phonetic Extensions Supplement */ - case 0x1d92: /* . Phonetic Extensions Supplement */ - case 0x1d93: /* . Phonetic Extensions Supplement */ - case 0x1d94: /* . Phonetic Extensions Supplement */ - case 0x1d95: /* . Phonetic Extensions Supplement */ - case 0x1d96: /* . Phonetic Extensions Supplement */ - case 0x1d97: /* . Phonetic Extensions Supplement */ - case 0x1d98: /* . Phonetic Extensions Supplement */ - case 0x1d99: /* . Phonetic Extensions Supplement */ - case 0x1d9a: /* . Phonetic Extensions Supplement */ - case 0x1e96: /* ẖ Watin-C */ - case 0x1e97: /* ẗ Watin-C */ - case 0x1e98: /* ẘ Watin-C */ - case 0x1e99: /* ẙ Watin-C */ - case 0x1e9a: /* ẚ Watin-C */ - case 0x1e9c: /* ẜ Watin-C */ - case 0x1e9d: /* ẝ Watin-C */ - case 0x1e9f: /* ẟ Watin-C */ - case 0x1f50: /* ὐ Greek2 */ - case 0x1f52: /* ὒ Greek2 */ - case 0x1f54: /* ὔ Greek2 */ - case 0x1f56: /* ὖ Greek2 */ - case 0x1fb2: /* ᾲ Greek2 */ - case 0x1fb4: /* ᾴ Greek2 */ - case 0x1fb6: /* ᾶ Greek2 */ - case 0x1fb7: /* ᾷ Greek2 */ - case 0x1fc2: /* ῂ Greek2 */ - case 0x1fc4: /* ῄ Greek2 */ - case 0x1fc6: /* ῆ Greek2 */ - case 0x1fc7: /* ῇ Greek2 */ - case 0x1fd2: /* ῒ Greek2 */ - case 0x1fd3: /* ΐ Greek2 */ - case 0x1fd6: /* ῖ Greek2 */ - case 0x1fd7: /* ῗ Greek2 */ - case 0x1fe2: /* ῢ Greek2 */ - case 0x1fe3: /* ΰ Greek2 */ - case 0x1fe4: /* ῤ Greek2 */ - case 0x1fe6: /* ῦ Greek2 */ - case 0x1fe7: /* ῧ Greek2 */ - case 0x1ff2: /* ῲ Greek2 */ - case 0x1ff4: /* ῴ Greek2 */ - case 0x1ff6: /* ῶ Greek2 */ - case 0x1ff7: /* ῷ Greek2 */ - case 0x210a: /* ℊ Letterlike */ - case 0x210e: /* ℎ Letterlike */ - case 0x210f: /* ℏ Letterlike */ - case 0x2113: /* ℓ Letterlike */ - case 0x212f: /* ℯ Letterlike */ - case 0x2134: /* ℴ Letterlike */ - case 0x2139: /* ℹ Letterlike */ - case 0x213c: /* ℼ Letterlike */ - case 0x213d: /* ℽ Letterlike */ - case 0x2146: /* ⅆ Letterlike */ - case 0x2147: /* ⅇ Letterlike */ - case 0x2148: /* ⅈ Letterlike */ - case 0x2149: /* ⅉ Letterlike */ - case 0x2c71: /* . Watin-D */ - case 0x2c74: /* . Watin-D */ - case 0x2c77: /* . Watin-D */ - case 0x2c78: /* . Watin-D */ - case 0x2c79: /* . Watin-D */ - case 0x2c7a: /* . Watin-D */ - case 0x2c7b: /* . Watin-D */ - case 0x2ce4: /* . Coptic */ - case 0xa730: /* . Latin Extended-D */ - case 0xa731: /* . Latin Extended-D */ - case 0xa771: /* . Latin Extended-D */ - case 0xa772: /* . Latin Extended-D */ - case 0xa773: /* . Latin Extended-D */ - case 0xa774: /* . Latin Extended-D */ - case 0xa775: /* . Latin Extended-D */ - case 0xa776: /* . Latin Extended-D */ - case 0xa777: /* . Latin Extended-D */ - case 0xa778: /* . Latin Extended-D */ - case 0xa78e: /* . Latin Extended-D */ - case 0xa795: /* . Latin Extended-D */ - case 0xa7af: /* . Latin Extended-D */ - case 0xa7fa: /* . Latin Extended-D */ - case 0xab30: /* . Latin Extended-E */ - case 0xab31: /* . Latin Extended-E */ - case 0xab32: /* . Latin Extended-E */ - case 0xab33: /* . Latin Extended-E */ - case 0xab34: /* . Latin Extended-E */ - case 0xab35: /* . Latin Extended-E */ - case 0xab36: /* . Latin Extended-E */ - case 0xab37: /* . Latin Extended-E */ - case 0xab38: /* . Latin Extended-E */ - case 0xab39: /* . Latin Extended-E */ - case 0xab3a: /* . Latin Extended-E */ - case 0xab3b: /* . Latin Extended-E */ - case 0xab3c: /* . Latin Extended-E */ - case 0xab3d: /* . Latin Extended-E */ - case 0xab3e: /* . Latin Extended-E */ - case 0xab3f: /* . Latin Extended-E */ - case 0xab40: /* . Latin Extended-E */ - case 0xab41: /* . Latin Extended-E */ - case 0xab42: /* . Latin Extended-E */ - case 0xab43: /* . Latin Extended-E */ - case 0xab44: /* . Latin Extended-E */ - case 0xab45: /* . Latin Extended-E */ - case 0xab46: /* . Latin Extended-E */ - case 0xab47: /* . Latin Extended-E */ - case 0xab48: /* . Latin Extended-E */ - case 0xab49: /* . Latin Extended-E */ - case 0xab4a: /* . Latin Extended-E */ - case 0xab4b: /* . Latin Extended-E */ - case 0xab4c: /* . Latin Extended-E */ - case 0xab4d: /* . Latin Extended-E */ - case 0xab4e: /* . Latin Extended-E */ - case 0xab4f: /* . Latin Extended-E */ - case 0xab50: /* . Latin Extended-E */ - case 0xab51: /* . Latin Extended-E */ - case 0xab52: /* . Latin Extended-E */ - case 0xab54: /* . Latin Extended-E */ - case 0xab55: /* . Latin Extended-E */ - case 0xab56: /* . Latin Extended-E */ - case 0xab57: /* . Latin Extended-E */ - case 0xab58: /* . Latin Extended-E */ - case 0xab59: /* . Latin Extended-E */ - case 0xab5a: /* . Latin Extended-E */ - case 0xab60: /* . Latin Extended-E */ - case 0xab61: /* . Latin Extended-E */ - case 0xab62: /* . Latin Extended-E */ - case 0xab63: /* . Latin Extended-E */ - case 0xab64: /* . Latin Extended-E */ - case 0xab65: /* . Latin Extended-E */ - case 0xab66: /* . Latin Extended-E */ - case 0xab67: /* . Latin Extended-E */ - case 0xfb00: /* . Alphabetic Presentation Forms */ - case 0xfb01: /* . Alphabetic Presentation Forms */ - case 0xfb02: /* . Alphabetic Presentation Forms */ - case 0xfb03: /* . Alphabetic Presentation Forms */ - case 0xfb04: /* . Alphabetic Presentation Forms */ - case 0xfb05: /* . Alphabetic Presentation Forms */ - case 0xfb06: /* . Alphabetic Presentation Forms */ - case 0xfb13: /* . Alphabetic Presentation Forms */ - case 0xfb14: /* . Alphabetic Presentation Forms */ - case 0xfb15: /* . Alphabetic Presentation Forms */ - case 0xfb16: /* . Alphabetic Presentation Forms */ - case 0xfb17: /* . Alphabetic Presentation Forms */ - case 0x1d44e: /* 𝑎 Math */ - case 0x1d44f: /* 𝑏 Math */ - case 0x1d450: /* 𝑐 Math */ - case 0x1d451: /* 𝑑 Math */ - case 0x1d452: /* 𝑒 Math */ - case 0x1d453: /* 𝑓 Math */ - case 0x1d454: /* 𝑔 Math */ - case 0x1d45e: /* 𝑞 Math */ - case 0x1d45f: /* 𝑟 Math */ - case 0x1d460: /* 𝑠 Math */ - case 0x1d461: /* 𝑡 Math */ - case 0x1d462: /* 𝑢 Math */ - case 0x1d463: /* 𝑣 Math */ - case 0x1d464: /* 𝑤 Math */ - case 0x1d465: /* 𝑥 Math */ - case 0x1d466: /* 𝑦 Math */ - case 0x1d467: /* 𝑧 Math */ - case 0x1d4b6: /* 𝒶 Math */ - case 0x1d4b7: /* 𝒷 Math */ - case 0x1d4b8: /* 𝒸 Math */ - case 0x1d4b9: /* 𝒹 Math */ - case 0x1d4bb: /* 𝒻 Math */ - case 0x1d4bd: /* 𝒽 Math */ - case 0x1d4be: /* 𝒾 Math */ - case 0x1d4bf: /* 𝒿 Math */ - case 0x1d4c0: /* 𝓀 Math */ - case 0x1d4c1: /* 𝓁 Math */ - case 0x1d4c2: /* 𝓂 Math */ - case 0x1d4c3: /* 𝓃 Math */ - case 0x1d4c5: /* 𝓅 Math */ - case 0x1d4c6: /* 𝓆 Math */ - case 0x1d4c7: /* 𝓇 Math */ - case 0x1d51e: /* 𝔞 Math */ - case 0x1d51f: /* 𝔟 Math */ - case 0x1d520: /* 𝔠 Math */ - case 0x1d521: /* 𝔡 Math */ - case 0x1d522: /* 𝔢 Math */ - case 0x1d523: /* 𝔣 Math */ - case 0x1d524: /* 𝔤 Math */ - case 0x1d525: /* 𝔥 Math */ - case 0x1d526: /* 𝔦 Math */ - case 0x1d52f: /* 𝔯 Math */ - case 0x1d530: /* 𝔰 Math */ - case 0x1d531: /* 𝔱 Math */ - case 0x1d532: /* 𝔲 Math */ - case 0x1d533: /* 𝔳 Math */ - case 0x1d534: /* 𝔴 Math */ - case 0x1d535: /* 𝔵 Math */ - case 0x1d536: /* 𝔶 Math */ - case 0x1d537: /* 𝔷 Math */ - case 0x1d552: /* 𝕒 Math */ - case 0x1d553: /* 𝕓 Math */ - case 0x1d554: /* 𝕔 Math */ - case 0x1d555: /* 𝕕 Math */ - case 0x1d556: /* 𝕖 Math */ - case 0x1d557: /* 𝕗 Math */ - case 0x1d558: /* 𝕘 Math */ - case 0x1d559: /* 𝕙 Math */ - case 0x1d55a: /* 𝕚 Math */ - case 0x1d55b: /* 𝕛 Math */ - case 0x1d55c: /* 𝕜 Math */ - case 0x1d55d: /* 𝕝 Math */ - case 0x1d55e: /* 𝕞 Math */ - case 0x1d55f: /* 𝕟 Math */ - case 0x1d560: /* 𝕠 Math */ - case 0x1d561: /* 𝕡 Math */ - case 0x1d562: /* 𝕢 Math */ - case 0x1d563: /* 𝕣 Math */ - case 0x1d564: /* 𝕤 Math */ - case 0x1d565: /* 𝕥 Math */ - case 0x1d566: /* 𝕦 Math */ - case 0x1d567: /* 𝕧 Math */ - case 0x1d568: /* 𝕨 Math */ - case 0x1d569: /* 𝕩 Math */ - case 0x1d56a: /* 𝕪 Math */ - case 0x1d56b: /* 𝕫 Math */ - case 0x1d656: /* 𝙖 Math */ - case 0x1d657: /* 𝙗 Math */ - case 0x1d658: /* 𝙘 Math */ - case 0x1d659: /* 𝙙 Math */ - case 0x1d65a: /* 𝙚 Math */ - case 0x1d65b: /* 𝙛 Math */ - case 0x1d65c: /* 𝙜 Math */ - case 0x1d65d: /* 𝙝 Math */ - case 0x1d65e: /* 𝙞 Math */ - case 0x1d65f: /* 𝙟 Math */ - case 0x1d660: /* 𝙠 Math */ - case 0x1d661: /* 𝙡 Math */ - case 0x1d662: /* 𝙢 Math */ - case 0x1d663: /* 𝙣 Math */ - case 0x1d664: /* 𝙤 Math */ - case 0x1d665: /* 𝙥 Math */ - case 0x1d666: /* 𝙦 Math */ - case 0x1d667: /* 𝙧 Math */ - case 0x1d668: /* 𝙨 Math */ - case 0x1d669: /* 𝙩 Math */ - case 0x1d66a: /* 𝙪 Math */ - case 0x1d66b: /* 𝙫 Math */ - case 0x1d66c: /* 𝙬 Math */ - case 0x1d66d: /* 𝙭 Math */ - case 0x1d66e: /* 𝙮 Math */ - case 0x1d66f: /* 𝙯 Math */ - case 0x1d6da: /* 𝛚 Math */ - case 0x1d6dc: /* 𝛜 Math */ - case 0x1d6dd: /* 𝛝 Math */ - case 0x1d6de: /* 𝛞 Math */ - case 0x1d6df: /* 𝛟 Math */ - case 0x1d6e0: /* 𝛠 Math */ - case 0x1d6e1: /* 𝛡 Math */ - case 0x1d70d: /* 𝜍 Math */ - case 0x1d70e: /* 𝜎 Math */ - case 0x1d70f: /* 𝜏 Math */ - case 0x1d710: /* 𝜐 Math */ - case 0x1d711: /* 𝜑 Math */ - case 0x1d712: /* 𝜒 Math */ - case 0x1d713: /* 𝜓 Math */ - case 0x1d714: /* 𝜔 Math */ - case 0x1d716: /* 𝜖 Math */ - case 0x1d717: /* 𝜗 Math */ - case 0x1d718: /* 𝜘 Math */ - case 0x1d719: /* 𝜙 Math */ - case 0x1d71a: /* 𝜚 Math */ - case 0x1d71b: /* 𝜛 Math */ - case 0x1d747: /* 𝝇 Math */ - case 0x1d748: /* 𝝈 Math */ - case 0x1d749: /* 𝝉 Math */ - case 0x1d74a: /* 𝝊 Math */ - case 0x1d74b: /* 𝝋 Math */ - case 0x1d74c: /* 𝝌 Math */ - case 0x1d74d: /* 𝝍 Math */ - case 0x1d74e: /* 𝝎 Math */ - case 0x1d750: /* 𝝐 Math */ - case 0x1d751: /* 𝝑 Math */ - case 0x1d752: /* 𝝒 Math */ - case 0x1d753: /* 𝝓 Math */ - case 0x1d754: /* 𝝔 Math */ - case 0x1d755: /* 𝝕 Math */ - case 0x1d781: /* 𝞁 Math */ - case 0x1d782: /* 𝞂 Math */ - case 0x1d783: /* 𝞃 Math */ - case 0x1d784: /* 𝞄 Math */ - case 0x1d785: /* 𝞅 Math */ - case 0x1d786: /* 𝞆 Math */ - case 0x1d787: /* 𝞇 Math */ - case 0x1d788: /* 𝞈 Math */ - case 0x1d78a: /* 𝞊 Math */ - case 0x1d78b: /* 𝞋 Math */ - case 0x1d78c: /* 𝞌 Math */ - case 0x1d78d: /* 𝞍 Math */ - case 0x1d78e: /* 𝞎 Math */ - case 0x1d78f: /* 𝞏 Math */ - case 0x1d7aa: /* 𝞪 Math */ - case 0x1d7ab: /* 𝞫 Math */ - case 0x1d7ac: /* 𝞬 Math */ - case 0x1d7ad: /* 𝞭 Math */ - case 0x1d7ae: /* 𝞮 Math */ - case 0x1d7af: /* 𝞯 Math */ - case 0x1d7b0: /* 𝞰 Math */ - case 0x1d7b1: /* 𝞱 Math */ - case 0x1d7b2: /* 𝞲 Math */ - case 0x1d7b3: /* 𝞳 Math */ - case 0x1d7b4: /* 𝞴 Math */ - case 0x1d7b5: /* 𝞵 Math */ - case 0x1d7b6: /* 𝞶 Math */ - case 0x1d7b7: /* 𝞷 Math */ - case 0x1d7b8: /* 𝞸 Math */ - case 0x1d7b9: /* 𝞹 Math */ - case 0x1d7ba: /* 𝞺 Math */ - case 0x1d7bb: /* 𝞻 Math */ - case 0x1d7bc: /* 𝞼 Math */ - case 0x1d7bd: /* 𝞽 Math */ - case 0x1d7be: /* 𝞾 Math */ - case 0x1d7bf: /* 𝞿 Math */ - case 0x1d7c0: /* 𝟀 Math */ - case 0x1d7c1: /* 𝟁 Math */ - case 0x1d7c2: /* 𝟂 Math */ - case 0x1d7c4: /* 𝟄 Math */ - case 0x1d7c5: /* 𝟅 Math */ - case 0x1d7c6: /* 𝟆 Math */ - case 0x1d7c7: /* 𝟇 Math */ - case 0x1d7c8: /* 𝟈 Math */ - case 0x1d7c9: /* 𝟉 Math */ - case 0x1d7cb: /* 𝟋 Math */ - return 1; - default: - return 0; - } - } -} - -__weak_reference(iswlower, iswlower_l); diff --git a/libc/str/iswlower.cc b/libc/str/iswlower.cc new file mode 100644 index 000000000..a0b5778d6 --- /dev/null +++ b/libc/str/iswlower.cc @@ -0,0 +1,712 @@ +/*-*-mode:c++;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8-*-│ +│ vi: set et ft=c++ ts=2 sts=2 sw=2 fenc=utf-8 :vi │ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2024 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/dce.h" +#include "libc/macros.h" +#include "libc/str/has_char.h" +#include "libc/wctype.h" + +static const unsigned short kLower[][2] = { + {0x61, 0x7a}, // + {0xaa, 0xaa}, // + {0xb5, 0xb5}, // + {0xba, 0xba}, // + {0xdf, 0xf6}, // + {0xf8, 0xff}, // + {0x101, 0x101}, // + {0x103, 0x103}, // + {0x105, 0x105}, // + {0x107, 0x107}, // + {0x109, 0x109}, // + {0x10b, 0x10b}, // + {0x10d, 0x10d}, // + {0x10f, 0x10f}, // + {0x111, 0x111}, // + {0x113, 0x113}, // + {0x115, 0x115}, // + {0x117, 0x117}, // + {0x119, 0x119}, // + {0x11b, 0x11b}, // + {0x11d, 0x11d}, // + {0x11f, 0x11f}, // + {0x121, 0x121}, // + {0x123, 0x123}, // + {0x125, 0x125}, // + {0x127, 0x127}, // + {0x129, 0x129}, // + {0x12b, 0x12b}, // + {0x12d, 0x12d}, // + {0x12f, 0x12f}, // + {0x131, 0x131}, // + {0x133, 0x133}, // + {0x135, 0x135}, // + {0x137, 0x138}, // + {0x13a, 0x13a}, // + {0x13c, 0x13c}, // + {0x13e, 0x13e}, // + {0x140, 0x140}, // + {0x142, 0x142}, // + {0x144, 0x144}, // + {0x146, 0x146}, // + {0x148, 0x149}, // + {0x14b, 0x14b}, // + {0x14d, 0x14d}, // + {0x14f, 0x14f}, // + {0x151, 0x151}, // + {0x153, 0x153}, // + {0x155, 0x155}, // + {0x157, 0x157}, // + {0x159, 0x159}, // + {0x15b, 0x15b}, // + {0x15d, 0x15d}, // + {0x15f, 0x15f}, // + {0x161, 0x161}, // + {0x163, 0x163}, // + {0x165, 0x165}, // + {0x167, 0x167}, // + {0x169, 0x169}, // + {0x16b, 0x16b}, // + {0x16d, 0x16d}, // + {0x16f, 0x16f}, // + {0x171, 0x171}, // + {0x173, 0x173}, // + {0x175, 0x175}, // + {0x177, 0x177}, // + {0x17a, 0x17a}, // + {0x17c, 0x17c}, // + {0x17e, 0x180}, // + {0x183, 0x183}, // + {0x185, 0x185}, // + {0x188, 0x188}, // + {0x18c, 0x18d}, // + {0x192, 0x192}, // + {0x195, 0x195}, // + {0x199, 0x19b}, // + {0x19e, 0x19e}, // + {0x1a1, 0x1a1}, // + {0x1a3, 0x1a3}, // + {0x1a5, 0x1a5}, // + {0x1a8, 0x1a8}, // + {0x1aa, 0x1ab}, // + {0x1ad, 0x1ad}, // + {0x1b0, 0x1b0}, // + {0x1b4, 0x1b4}, // + {0x1b6, 0x1b6}, // + {0x1b9, 0x1ba}, // + {0x1bd, 0x1bf}, // + {0x1c5, 0x1c6}, // + {0x1c8, 0x1c9}, // + {0x1cb, 0x1cc}, // + {0x1ce, 0x1ce}, // + {0x1d0, 0x1d0}, // + {0x1d2, 0x1d2}, // + {0x1d4, 0x1d4}, // + {0x1d6, 0x1d6}, // + {0x1d8, 0x1d8}, // + {0x1da, 0x1da}, // + {0x1dc, 0x1dd}, // + {0x1df, 0x1df}, // + {0x1e1, 0x1e1}, // + {0x1e3, 0x1e3}, // + {0x1e5, 0x1e5}, // + {0x1e7, 0x1e7}, // + {0x1e9, 0x1e9}, // + {0x1eb, 0x1eb}, // + {0x1ed, 0x1ed}, // + {0x1ef, 0x1f0}, // + {0x1f2, 0x1f3}, // + {0x1f5, 0x1f5}, // + {0x1f9, 0x1f9}, // + {0x1fb, 0x1fb}, // + {0x1fd, 0x1fd}, // + {0x1ff, 0x1ff}, // + {0x201, 0x201}, // + {0x203, 0x203}, // + {0x205, 0x205}, // + {0x207, 0x207}, // + {0x209, 0x209}, // + {0x20b, 0x20b}, // + {0x20d, 0x20d}, // + {0x20f, 0x20f}, // + {0x211, 0x211}, // + {0x213, 0x213}, // + {0x215, 0x215}, // + {0x217, 0x217}, // + {0x219, 0x219}, // + {0x21b, 0x21b}, // + {0x21d, 0x21d}, // + {0x21f, 0x21f}, // + {0x221, 0x221}, // + {0x223, 0x223}, // + {0x225, 0x225}, // + {0x227, 0x227}, // + {0x229, 0x229}, // + {0x22b, 0x22b}, // + {0x22d, 0x22d}, // + {0x22f, 0x22f}, // + {0x231, 0x231}, // + {0x233, 0x239}, // + {0x23c, 0x23c}, // + {0x23f, 0x240}, // + {0x242, 0x242}, // + {0x247, 0x247}, // + {0x249, 0x249}, // + {0x24b, 0x24b}, // + {0x24d, 0x24d}, // + {0x24f, 0x293}, // + {0x295, 0x2b8}, // + {0x2c0, 0x2c1}, // + {0x2e0, 0x2e4}, // + {0x345, 0x345}, // + {0x371, 0x371}, // + {0x373, 0x373}, // + {0x377, 0x377}, // + {0x37a, 0x37d}, // + {0x390, 0x390}, // + {0x3ac, 0x3ce}, // + {0x3d0, 0x3d1}, // + {0x3d5, 0x3d7}, // + {0x3d9, 0x3d9}, // + {0x3db, 0x3db}, // + {0x3dd, 0x3dd}, // + {0x3df, 0x3df}, // + {0x3e1, 0x3e1}, // + {0x3e3, 0x3e3}, // + {0x3e5, 0x3e5}, // + {0x3e7, 0x3e7}, // + {0x3e9, 0x3e9}, // + {0x3eb, 0x3eb}, // + {0x3ed, 0x3ed}, // + {0x3ef, 0x3f3}, // + {0x3f5, 0x3f5}, // + {0x3f8, 0x3f8}, // + {0x3fb, 0x3fc}, // + {0x430, 0x45f}, // + {0x461, 0x461}, // + {0x463, 0x463}, // + {0x465, 0x465}, // + {0x467, 0x467}, // + {0x469, 0x469}, // + {0x46b, 0x46b}, // + {0x46d, 0x46d}, // + {0x46f, 0x46f}, // + {0x471, 0x471}, // + {0x473, 0x473}, // + {0x475, 0x475}, // + {0x477, 0x477}, // + {0x479, 0x479}, // + {0x47b, 0x47b}, // + {0x47d, 0x47d}, // + {0x47f, 0x47f}, // + {0x481, 0x481}, // + {0x48b, 0x48b}, // + {0x48d, 0x48d}, // + {0x48f, 0x48f}, // + {0x491, 0x491}, // + {0x493, 0x493}, // + {0x495, 0x495}, // + {0x497, 0x497}, // + {0x499, 0x499}, // + {0x49b, 0x49b}, // + {0x49d, 0x49d}, // + {0x49f, 0x49f}, // + {0x4a1, 0x4a1}, // + {0x4a3, 0x4a3}, // + {0x4a5, 0x4a5}, // + {0x4a7, 0x4a7}, // + {0x4a9, 0x4a9}, // + {0x4ab, 0x4ab}, // + {0x4ad, 0x4ad}, // + {0x4af, 0x4af}, // + {0x4b1, 0x4b1}, // + {0x4b3, 0x4b3}, // + {0x4b5, 0x4b5}, // + {0x4b7, 0x4b7}, // + {0x4b9, 0x4b9}, // + {0x4bb, 0x4bb}, // + {0x4bd, 0x4bd}, // + {0x4bf, 0x4bf}, // + {0x4c2, 0x4c2}, // + {0x4c4, 0x4c4}, // + {0x4c6, 0x4c6}, // + {0x4c8, 0x4c8}, // + {0x4ca, 0x4ca}, // + {0x4cc, 0x4cc}, // + {0x4ce, 0x4cf}, // + {0x4d1, 0x4d1}, // + {0x4d3, 0x4d3}, // + {0x4d5, 0x4d5}, // + {0x4d7, 0x4d7}, // + {0x4d9, 0x4d9}, // + {0x4db, 0x4db}, // + {0x4dd, 0x4dd}, // + {0x4df, 0x4df}, // + {0x4e1, 0x4e1}, // + {0x4e3, 0x4e3}, // + {0x4e5, 0x4e5}, // + {0x4e7, 0x4e7}, // + {0x4e9, 0x4e9}, // + {0x4eb, 0x4eb}, // + {0x4ed, 0x4ed}, // + {0x4ef, 0x4ef}, // + {0x4f1, 0x4f1}, // + {0x4f3, 0x4f3}, // + {0x4f5, 0x4f5}, // + {0x4f7, 0x4f7}, // + {0x4f9, 0x4f9}, // + {0x4fb, 0x4fb}, // + {0x4fd, 0x4fd}, // + {0x4ff, 0x4ff}, // + {0x501, 0x501}, // + {0x503, 0x503}, // + {0x505, 0x505}, // + {0x507, 0x507}, // + {0x509, 0x509}, // + {0x50b, 0x50b}, // + {0x50d, 0x50d}, // + {0x50f, 0x50f}, // + {0x511, 0x511}, // + {0x513, 0x513}, // + {0x515, 0x515}, // + {0x517, 0x517}, // + {0x519, 0x519}, // + {0x51b, 0x51b}, // + {0x51d, 0x51d}, // + {0x51f, 0x51f}, // + {0x521, 0x521}, // + {0x523, 0x523}, // + {0x525, 0x525}, // + {0x527, 0x527}, // + {0x529, 0x529}, // + {0x52b, 0x52b}, // + {0x52d, 0x52d}, // + {0x52f, 0x52f}, // + {0x560, 0x588}, // + {0x10d0, 0x10fa}, // + {0x10fc, 0x10ff}, // + {0x13f8, 0x13fd}, // + {0x1c80, 0x1c88}, // + {0x1d00, 0x1dbf}, // + {0x1e01, 0x1e01}, // + {0x1e03, 0x1e03}, // + {0x1e05, 0x1e05}, // + {0x1e07, 0x1e07}, // + {0x1e09, 0x1e09}, // + {0x1e0b, 0x1e0b}, // + {0x1e0d, 0x1e0d}, // + {0x1e0f, 0x1e0f}, // + {0x1e11, 0x1e11}, // + {0x1e13, 0x1e13}, // + {0x1e15, 0x1e15}, // + {0x1e17, 0x1e17}, // + {0x1e19, 0x1e19}, // + {0x1e1b, 0x1e1b}, // + {0x1e1d, 0x1e1d}, // + {0x1e1f, 0x1e1f}, // + {0x1e21, 0x1e21}, // + {0x1e23, 0x1e23}, // + {0x1e25, 0x1e25}, // + {0x1e27, 0x1e27}, // + {0x1e29, 0x1e29}, // + {0x1e2b, 0x1e2b}, // + {0x1e2d, 0x1e2d}, // + {0x1e2f, 0x1e2f}, // + {0x1e31, 0x1e31}, // + {0x1e33, 0x1e33}, // + {0x1e35, 0x1e35}, // + {0x1e37, 0x1e37}, // + {0x1e39, 0x1e39}, // + {0x1e3b, 0x1e3b}, // + {0x1e3d, 0x1e3d}, // + {0x1e3f, 0x1e3f}, // + {0x1e41, 0x1e41}, // + {0x1e43, 0x1e43}, // + {0x1e45, 0x1e45}, // + {0x1e47, 0x1e47}, // + {0x1e49, 0x1e49}, // + {0x1e4b, 0x1e4b}, // + {0x1e4d, 0x1e4d}, // + {0x1e4f, 0x1e4f}, // + {0x1e51, 0x1e51}, // + {0x1e53, 0x1e53}, // + {0x1e55, 0x1e55}, // + {0x1e57, 0x1e57}, // + {0x1e59, 0x1e59}, // + {0x1e5b, 0x1e5b}, // + {0x1e5d, 0x1e5d}, // + {0x1e5f, 0x1e5f}, // + {0x1e61, 0x1e61}, // + {0x1e63, 0x1e63}, // + {0x1e65, 0x1e65}, // + {0x1e67, 0x1e67}, // + {0x1e69, 0x1e69}, // + {0x1e6b, 0x1e6b}, // + {0x1e6d, 0x1e6d}, // + {0x1e6f, 0x1e6f}, // + {0x1e71, 0x1e71}, // + {0x1e73, 0x1e73}, // + {0x1e75, 0x1e75}, // + {0x1e77, 0x1e77}, // + {0x1e79, 0x1e79}, // + {0x1e7b, 0x1e7b}, // + {0x1e7d, 0x1e7d}, // + {0x1e7f, 0x1e7f}, // + {0x1e81, 0x1e81}, // + {0x1e83, 0x1e83}, // + {0x1e85, 0x1e85}, // + {0x1e87, 0x1e87}, // + {0x1e89, 0x1e89}, // + {0x1e8b, 0x1e8b}, // + {0x1e8d, 0x1e8d}, // + {0x1e8f, 0x1e8f}, // + {0x1e91, 0x1e91}, // + {0x1e93, 0x1e93}, // + {0x1e95, 0x1e9d}, // + {0x1e9f, 0x1e9f}, // + {0x1ea1, 0x1ea1}, // + {0x1ea3, 0x1ea3}, // + {0x1ea5, 0x1ea5}, // + {0x1ea7, 0x1ea7}, // + {0x1ea9, 0x1ea9}, // + {0x1eab, 0x1eab}, // + {0x1ead, 0x1ead}, // + {0x1eaf, 0x1eaf}, // + {0x1eb1, 0x1eb1}, // + {0x1eb3, 0x1eb3}, // + {0x1eb5, 0x1eb5}, // + {0x1eb7, 0x1eb7}, // + {0x1eb9, 0x1eb9}, // + {0x1ebb, 0x1ebb}, // + {0x1ebd, 0x1ebd}, // + {0x1ebf, 0x1ebf}, // + {0x1ec1, 0x1ec1}, // + {0x1ec3, 0x1ec3}, // + {0x1ec5, 0x1ec5}, // + {0x1ec7, 0x1ec7}, // + {0x1ec9, 0x1ec9}, // + {0x1ecb, 0x1ecb}, // + {0x1ecd, 0x1ecd}, // + {0x1ecf, 0x1ecf}, // + {0x1ed1, 0x1ed1}, // + {0x1ed3, 0x1ed3}, // + {0x1ed5, 0x1ed5}, // + {0x1ed7, 0x1ed7}, // + {0x1ed9, 0x1ed9}, // + {0x1edb, 0x1edb}, // + {0x1edd, 0x1edd}, // + {0x1edf, 0x1edf}, // + {0x1ee1, 0x1ee1}, // + {0x1ee3, 0x1ee3}, // + {0x1ee5, 0x1ee5}, // + {0x1ee7, 0x1ee7}, // + {0x1ee9, 0x1ee9}, // + {0x1eeb, 0x1eeb}, // + {0x1eed, 0x1eed}, // + {0x1eef, 0x1eef}, // + {0x1ef1, 0x1ef1}, // + {0x1ef3, 0x1ef3}, // + {0x1ef5, 0x1ef5}, // + {0x1ef7, 0x1ef7}, // + {0x1ef9, 0x1ef9}, // + {0x1efb, 0x1efb}, // + {0x1efd, 0x1efd}, // + {0x1eff, 0x1f07}, // + {0x1f10, 0x1f15}, // + {0x1f20, 0x1f27}, // + {0x1f30, 0x1f37}, // + {0x1f40, 0x1f45}, // + {0x1f50, 0x1f57}, // + {0x1f60, 0x1f67}, // + {0x1f70, 0x1f7d}, // + {0x1f80, 0x1f87}, // + {0x1f90, 0x1f97}, // + {0x1fa0, 0x1fa7}, // + {0x1fb0, 0x1fb4}, // + {0x1fb6, 0x1fb7}, // + {0x1fbe, 0x1fbe}, // + {0x1fc2, 0x1fc4}, // + {0x1fc6, 0x1fc7}, // + {0x1fd0, 0x1fd3}, // + {0x1fd6, 0x1fd7}, // + {0x1fe0, 0x1fe7}, // + {0x1ff2, 0x1ff4}, // + {0x1ff6, 0x1ff7}, // + {0x2071, 0x2071}, // + {0x207f, 0x207f}, // + {0x2090, 0x209c}, // + {0x210a, 0x210a}, // + {0x210e, 0x210f}, // + {0x2113, 0x2113}, // + {0x212f, 0x212f}, // + {0x2134, 0x2134}, // + {0x2139, 0x2139}, // + {0x213c, 0x213d}, // + {0x2146, 0x2149}, // + {0x214e, 0x214e}, // + {0x2170, 0x217f}, // + {0x2184, 0x2184}, // + {0x24d0, 0x24e9}, // + {0x2c30, 0x2c5f}, // + {0x2c61, 0x2c61}, // + {0x2c65, 0x2c66}, // + {0x2c68, 0x2c68}, // + {0x2c6a, 0x2c6a}, // + {0x2c6c, 0x2c6c}, // + {0x2c71, 0x2c71}, // + {0x2c73, 0x2c74}, // + {0x2c76, 0x2c7d}, // + {0x2c81, 0x2c81}, // + {0x2c83, 0x2c83}, // + {0x2c85, 0x2c85}, // + {0x2c87, 0x2c87}, // + {0x2c89, 0x2c89}, // + {0x2c8b, 0x2c8b}, // + {0x2c8d, 0x2c8d}, // + {0x2c8f, 0x2c8f}, // + {0x2c91, 0x2c91}, // + {0x2c93, 0x2c93}, // + {0x2c95, 0x2c95}, // + {0x2c97, 0x2c97}, // + {0x2c99, 0x2c99}, // + {0x2c9b, 0x2c9b}, // + {0x2c9d, 0x2c9d}, // + {0x2c9f, 0x2c9f}, // + {0x2ca1, 0x2ca1}, // + {0x2ca3, 0x2ca3}, // + {0x2ca5, 0x2ca5}, // + {0x2ca7, 0x2ca7}, // + {0x2ca9, 0x2ca9}, // + {0x2cab, 0x2cab}, // + {0x2cad, 0x2cad}, // + {0x2caf, 0x2caf}, // + {0x2cb1, 0x2cb1}, // + {0x2cb3, 0x2cb3}, // + {0x2cb5, 0x2cb5}, // + {0x2cb7, 0x2cb7}, // + {0x2cb9, 0x2cb9}, // + {0x2cbb, 0x2cbb}, // + {0x2cbd, 0x2cbd}, // + {0x2cbf, 0x2cbf}, // + {0x2cc1, 0x2cc1}, // + {0x2cc3, 0x2cc3}, // + {0x2cc5, 0x2cc5}, // + {0x2cc7, 0x2cc7}, // + {0x2cc9, 0x2cc9}, // + {0x2ccb, 0x2ccb}, // + {0x2ccd, 0x2ccd}, // + {0x2ccf, 0x2ccf}, // + {0x2cd1, 0x2cd1}, // + {0x2cd3, 0x2cd3}, // + {0x2cd5, 0x2cd5}, // + {0x2cd7, 0x2cd7}, // + {0x2cd9, 0x2cd9}, // + {0x2cdb, 0x2cdb}, // + {0x2cdd, 0x2cdd}, // + {0x2cdf, 0x2cdf}, // + {0x2ce1, 0x2ce1}, // + {0x2ce3, 0x2ce4}, // + {0x2cec, 0x2cec}, // + {0x2cee, 0x2cee}, // + {0x2cf3, 0x2cf3}, // + {0x2d00, 0x2d25}, // + {0x2d27, 0x2d27}, // + {0x2d2d, 0x2d2d}, // + {0xa641, 0xa641}, // + {0xa643, 0xa643}, // + {0xa645, 0xa645}, // + {0xa647, 0xa647}, // + {0xa649, 0xa649}, // + {0xa64b, 0xa64b}, // + {0xa64d, 0xa64d}, // + {0xa64f, 0xa64f}, // + {0xa651, 0xa651}, // + {0xa653, 0xa653}, // + {0xa655, 0xa655}, // + {0xa657, 0xa657}, // + {0xa659, 0xa659}, // + {0xa65b, 0xa65b}, // + {0xa65d, 0xa65d}, // + {0xa65f, 0xa65f}, // + {0xa661, 0xa661}, // + {0xa663, 0xa663}, // + {0xa665, 0xa665}, // + {0xa667, 0xa667}, // + {0xa669, 0xa669}, // + {0xa66b, 0xa66b}, // + {0xa66d, 0xa66d}, // + {0xa681, 0xa681}, // + {0xa683, 0xa683}, // + {0xa685, 0xa685}, // + {0xa687, 0xa687}, // + {0xa689, 0xa689}, // + {0xa68b, 0xa68b}, // + {0xa68d, 0xa68d}, // + {0xa68f, 0xa68f}, // + {0xa691, 0xa691}, // + {0xa693, 0xa693}, // + {0xa695, 0xa695}, // + {0xa697, 0xa697}, // + {0xa699, 0xa699}, // + {0xa69b, 0xa69d}, // + {0xa723, 0xa723}, // + {0xa725, 0xa725}, // + {0xa727, 0xa727}, // + {0xa729, 0xa729}, // + {0xa72b, 0xa72b}, // + {0xa72d, 0xa72d}, // + {0xa72f, 0xa731}, // + {0xa733, 0xa733}, // + {0xa735, 0xa735}, // + {0xa737, 0xa737}, // + {0xa739, 0xa739}, // + {0xa73b, 0xa73b}, // + {0xa73d, 0xa73d}, // + {0xa73f, 0xa73f}, // + {0xa741, 0xa741}, // + {0xa743, 0xa743}, // + {0xa745, 0xa745}, // + {0xa747, 0xa747}, // + {0xa749, 0xa749}, // + {0xa74b, 0xa74b}, // + {0xa74d, 0xa74d}, // + {0xa74f, 0xa74f}, // + {0xa751, 0xa751}, // + {0xa753, 0xa753}, // + {0xa755, 0xa755}, // + {0xa757, 0xa757}, // + {0xa759, 0xa759}, // + {0xa75b, 0xa75b}, // + {0xa75d, 0xa75d}, // + {0xa75f, 0xa75f}, // + {0xa761, 0xa761}, // + {0xa763, 0xa763}, // + {0xa765, 0xa765}, // + {0xa767, 0xa767}, // + {0xa769, 0xa769}, // + {0xa76b, 0xa76b}, // + {0xa76d, 0xa76d}, // + {0xa76f, 0xa778}, // + {0xa77a, 0xa77a}, // + {0xa77c, 0xa77c}, // + {0xa77f, 0xa77f}, // + {0xa781, 0xa781}, // + {0xa783, 0xa783}, // + {0xa785, 0xa785}, // + {0xa787, 0xa787}, // + {0xa78c, 0xa78c}, // + {0xa78e, 0xa78e}, // + {0xa791, 0xa791}, // + {0xa793, 0xa795}, // + {0xa797, 0xa797}, // + {0xa799, 0xa799}, // + {0xa79b, 0xa79b}, // + {0xa79d, 0xa79d}, // + {0xa79f, 0xa79f}, // + {0xa7a1, 0xa7a1}, // + {0xa7a3, 0xa7a3}, // + {0xa7a5, 0xa7a5}, // + {0xa7a7, 0xa7a7}, // + {0xa7a9, 0xa7a9}, // + {0xa7af, 0xa7af}, // + {0xa7b5, 0xa7b5}, // + {0xa7b7, 0xa7b7}, // + {0xa7b9, 0xa7b9}, // + {0xa7bb, 0xa7bb}, // + {0xa7bd, 0xa7bd}, // + {0xa7bf, 0xa7bf}, // + {0xa7c1, 0xa7c1}, // + {0xa7c3, 0xa7c3}, // + {0xa7c8, 0xa7c8}, // + {0xa7ca, 0xa7ca}, // + {0xa7d1, 0xa7d1}, // + {0xa7d3, 0xa7d3}, // + {0xa7d5, 0xa7d5}, // + {0xa7d7, 0xa7d7}, // + {0xa7d9, 0xa7d9}, // + {0xa7f2, 0xa7f4}, // + {0xa7f6, 0xa7f6}, // + {0xa7f8, 0xa7fa}, // + {0xab30, 0xab5a}, // + {0xab5c, 0xab69}, // + {0xab70, 0xabbf}, // + {0xfb00, 0xfb06}, // + {0xfb13, 0xfb17}, // + {0xff41, 0xff5a}, // +}; + +static const unsigned kLowerAstral[][2] = { + {0x10428, 0x1044f}, // + {0x104d8, 0x104fb}, // + {0x10597, 0x105a1}, // + {0x105a3, 0x105b1}, // + {0x105b3, 0x105b9}, // + {0x105bb, 0x105bc}, // + {0x10780, 0x10780}, // + {0x10783, 0x10785}, // + {0x10787, 0x107b0}, // + {0x107b2, 0x107ba}, // + {0x10cc0, 0x10cf2}, // + {0x118c0, 0x118df}, // + {0x16e60, 0x16e7f}, // + {0x1d41a, 0x1d433}, // + {0x1d44e, 0x1d454}, // + {0x1d456, 0x1d467}, // + {0x1d482, 0x1d49b}, // + {0x1d4b6, 0x1d4b9}, // + {0x1d4bb, 0x1d4bb}, // + {0x1d4bd, 0x1d4c3}, // + {0x1d4c5, 0x1d4cf}, // + {0x1d4ea, 0x1d503}, // + {0x1d51e, 0x1d537}, // + {0x1d552, 0x1d56b}, // + {0x1d586, 0x1d59f}, // + {0x1d5ba, 0x1d5d3}, // + {0x1d5ee, 0x1d607}, // + {0x1d622, 0x1d63b}, // + {0x1d656, 0x1d66f}, // + {0x1d68a, 0x1d6a5}, // + {0x1d6c2, 0x1d6da}, // + {0x1d6dc, 0x1d6e1}, // + {0x1d6fc, 0x1d714}, // + {0x1d716, 0x1d71b}, // + {0x1d736, 0x1d74e}, // + {0x1d750, 0x1d755}, // + {0x1d770, 0x1d788}, // + {0x1d78a, 0x1d78f}, // + {0x1d7aa, 0x1d7c2}, // + {0x1d7c4, 0x1d7c9}, // + {0x1d7cb, 0x1d7cb}, // + {0x1df00, 0x1df09}, // + {0x1df0b, 0x1df1e}, // + {0x1df25, 0x1df2a}, // + {0x1e030, 0x1e06d}, // + {0x1e922, 0x1e943}, // +}; + +/** + * Returns nonzero if c is lowercase letter. + */ +int iswlower(wint_t c) { + if (!IsTiny() && c < 128) + return 'a' <= c && c <= 'z'; + if (c < 65536) + return has_char(kLower, ARRAYLEN(kLower), (unsigned short)c); + return has_char(kLowerAstral, ARRAYLEN(kLowerAstral), (unsigned)c); +} + +__weak_reference(iswlower, iswlower_l); diff --git a/libc/str/iswprint.c b/libc/str/iswprint.c index 030e45d46..9a4875c3e 100644 --- a/libc/str/iswprint.c +++ b/libc/str/iswprint.c @@ -22,8 +22,11 @@ * Returns nonzero if c is printable. */ int iswprint(wint_t c) { - return !((0x00 <= c && c <= 0x1F) || (0x7F <= c && c <= 0x9F) || - (0xFFF9 <= c && c <= 0xFFFB) || c == 0x2028 || c == 0x2029); + return (0 <= c && c <= 0x10FFFD) && // legal unicode + !(0x0000 <= c && c <= 0x001F) && // c0 control codes + !(0x007F <= c && c <= 0x009F) && // c1 control codes + !(0x2028 <= c && c <= 0x2029) && // line / paragraph separator + !(0xFFF9 <= c && c <= 0xFFFB); // interlinear annotation controls } __weak_reference(iswprint, iswprint_l); diff --git a/libc/str/iswpunct.c b/libc/str/iswpunct.c deleted file mode 100644 index d66779b76..000000000 --- a/libc/str/iswpunct.c +++ /dev/null @@ -1,543 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/wctype.h" - -/** - * Returns nonzero if c is punctuation mark. - */ -int iswpunct(wint_t c) { - if (c < 0xa0) { - switch (c) { - case '!': - case '"': - case '#': - case '$': - case '%': - case '&': - case '\'': - case '(': - case ')': - case '*': - case '+': - case ',': - case '-': - case '.': - case '/': - case ':': - case ';': - case '<': - case '=': - case '>': - case '?': - case '@': - case '[': - case '\\': - case ']': - case '^': - case '_': - case '`': - case '{': - case '|': - case '}': - case '~': - return 1; - default: - return 0; - } - } - switch (c) { - case u'¡': // INVERTED EXCLAMATION MARK (0x00a1 Po) - case u'§': // SECTION SIGN (0x00a7 Po) - case u'«': // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (0x00ab Pi) - case u'¶': // PILCROW SIGN (0x00b6 Po) - case u'·': // MIDDLE DOT (0x00b7 Po) - case u'»': // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (0x00bb Pf) - case u'¿': // INVERTED QUESTION MARK (0x00bf Po) - case u';': // GREEK QUESTION MARK (0x037e Po) - case u'·': // GREEK ANO TELEIA (0x0387 Po) - case u'՚': // ARMENIAN APOSTROPHE (0x055a Po) - case u'՛': // ARMENIAN EMPHASIS MARK (0x055b Po) - case u'՜': // ARMENIAN EXCLAMATION MARK (0x055c Po) - case u'՝': // ARMENIAN COMMA (0x055d Po) - case u'՞': // ARMENIAN QUESTION MARK (0x055e Po) - case u'՟': // ARMENIAN ABBREVIATION MARK (0x055f Po) - case u'։': // ARMENIAN FULL STOP (0x0589 Po) - case u'֊': // ARMENIAN HYPHEN (0x058a Pd) - case 0x05be: // HEBREW PUNCTUATION MAQAF (0x05be Pd) - case 0x05c0: // HEBREW PUNCTUATION PASEQ (0x05c0 Po) - case 0x05c3: // HEBREW PUNCTUATION SOF PASUQ (0x05c3 Po) - case 0x05c6: // HEBREW PUNCTUATION NUN HAFUKHA (0x05c6 Po) - case 0x05f3: // HEBREW PUNCTUATION GERESH (0x05f3 Po) - case 0x05f4: // HEBREW PUNCTUATION GERSHAYIM (0x05f4 Po) - case 0x0609: // ARABIC-INDIC PER MILLE SIGN (0x0609 Po) - case 0x060a: // ARABIC-INDIC PER TEN THOUSAND SIGN (0x060a Po) - case 0x060c: // ARABIC COMMA (0x060c Po) - case 0x060d: // ARABIC DATE SEPARATOR (0x060d Po) - case 0x061b: // ARABIC SEMICOLON (0x061b Po) - case u'؞': // ARABIC TRIPLE DOT PUNCTUATION MARK (0x061e Po) - case u'؟': // ARABIC QUESTION MARK (0x061f Po) - case u'٪': // ARABIC PERCENT SIGN (0x066a Po) - case u'٫': // ARABIC DECIMAL SEPARATOR (0x066b Po) - case u'٬': // ARABIC THOUSANDS SEPARATOR (0x066c Po) - case u'٭': // ARABIC FIVE POINTED STAR (0x066d Po) - case u'۔': // ARABIC FULL STOP (0x06d4 Po) - case u'߷': // NKO SYMBOL GBAKURUNEN (0x07f7 Po) - case u'߸': // NKO COMMA (0x07f8 Po) - case u'߹': // NKO EXCLAMATION MARK (0x07f9 Po) - case u'।': // DEVANAGARI DANDA (0x0964 Po) - case u'॥': // DEVANAGARI DOUBLE DANDA (0x0965 Po) - case u'॰': // DEVANAGARI ABBREVIATION SIGN (0x0970 Po) - case 0x09fd: // BENGALI ABBREVIATION SIGN (0x09fd Po) - case 0x0a76: // GURMUKHI ABBREVIATION SIGN (0x0a76 Po) - case 0x0af0: // GUJARATI ABBREVIATION SIGN (0x0af0 Po) - case 0x0c77: // TELUGU SIGN SIDDHAM (0x0c77 Po) - case 0x0c84: // KANNADA SIGN SIDDHAM (0x0c84 Po) - case u'෴': // SINHALA PUNCTUATION KUNDDALIYA (0x0df4 Po) - case u'๏': // THAI CHARACTER FONGMAN (0x0e4f Po) - case u'๚': // THAI CHARACTER ANGKHANKHU (0x0e5a Po) - case u'๛': // THAI CHARACTER KHOMUT (0x0e5b Po) - case u'༄': // TIBETAN MARK INITIAL YIG MGO MDUN MA (0x0f04 Po) - case u'༅': // TIBETAN MARK CLOSING YIG MGO SGAB MA (0x0f05 Po) - case u'༆': // TIBETAN MARK CARET YIG MGO PHUR SHAD MA (0x0f06 Po) - case u'༇': // TIBETAN MARK YIG MGO TSHEG SHAD MA (0x0f07 Po) - case u'༈': // TIBETAN MARK SBRUL SHAD (0x0f08 Po) - case u'༉': // TIBETAN MARK BSKUR YIG MGO (0x0f09 Po) - case u'༊': // TIBETAN MARK BKA- SHOG YIG MGO (0x0f0a Po) - case u'་': // TIBETAN MARK INTERSYLLABIC TSHEG (0x0f0b Po) - case u'༌': // TIBETAN MARK DELIMITER TSHEG BSTAR (0x0f0c Po) - case u'།': // TIBETAN MARK SHAD (0x0f0d Po) - case u'༎': // TIBETAN MARK NYIS SHAD (0x0f0e Po) - case u'༏': // TIBETAN MARK TSHEG SHAD (0x0f0f Po) - case u'༐': // TIBETAN MARK NYIS TSHEG SHAD (0x0f10 Po) - case u'༑': // TIBETAN MARK RIN CHEN SPUNGS SHAD (0x0f11 Po) - case u'༒': // TIBETAN MARK RGYA GRAM SHAD (0x0f12 Po) - case u'༔': // TIBETAN MARK GTER TSHEG (0x0f14 Po) - case u'༺': // TIBETAN MARK GUG RTAGS GYON (0x0f3a Ps) - case u'༻': // TIBETAN MARK GUG RTAGS GYAS (0x0f3b Pe) - case u'༼': // TIBETAN MARK ANG KHANG GYON (0x0f3c Ps) - case u'༽': // TIBETAN MARK ANG KHANG GYAS (0x0f3d Pe) - case u'྅': // TIBETAN MARK PALUTA (0x0f85 Po) - case u'࿐': // TIBETAN MARK BSKA- SHOG GI MGO RGYAN (0x0fd0 Po) - case u'࿑': // TIBETAN MARK MNYAM YIG GI MGO RGYAN (0x0fd1 Po) - case u'࿒': // TIBETAN MARK NYIS TSHEG (0x0fd2 Po) - case u'࿓': // TIBETAN MARK INITIAL BRDA RNYING YIG MGO MDUN MA (0x0fd3 Po) - case u'࿔': // TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA (0x0fd4 Po) - case u'࿙': // TIBETAN MARK LEADING MCHAN RTAGS (0x0fd9 Po) - case u'࿚': // TIBETAN MARK TRAILING MCHAN RTAGS (0x0fda Po) - case u'၊': // MYANMAR SIGN LITTLE SECTION (0x104a Po) - case u'။': // MYANMAR SIGN SECTION (0x104b Po) - case u'၌': // MYANMAR SYMBOL LOCATIVE (0x104c Po) - case u'၍': // MYANMAR SYMBOL COMPLETED (0x104d Po) - case u'၎': // MYANMAR SYMBOL AFOREMENTIONED (0x104e Po) - case u'၏': // MYANMAR SYMBOL GENITIVE (0x104f Po) - case u'჻': // GEORGIAN PARAGRAPH SEPARATOR (0x10fb Po) - case u'፠': // ETHIOPIC SECTION MARK (0x1360 Po) - case u'፡': // ETHIOPIC WORDSPACE (0x1361 Po) - case u'።': // ETHIOPIC FULL STOP (0x1362 Po) - case u'፣': // ETHIOPIC COMMA (0x1363 Po) - case u'፤': // ETHIOPIC SEMICOLON (0x1364 Po) - case u'፥': // ETHIOPIC COLON (0x1365 Po) - case u'፦': // ETHIOPIC PREFACE COLON (0x1366 Po) - case u'፧': // ETHIOPIC QUESTION MARK (0x1367 Po) - case u'፨': // ETHIOPIC PARAGRAPH SEPARATOR (0x1368 Po) - case u'᐀': // CANADIAN SYLLABICS HYPHEN (0x1400 Pd) - case u'᙮': // CANADIAN SYLLABICS FULL STOP (0x166e Po) - case u'᚛': // OGHAM FEATHER MARK (0x169b Ps) - case u'᚜': // OGHAM REVERSED FEATHER MARK (0x169c Pe) - case u'᛫': // RUNIC SINGLE PUNCTUATION (0x16eb Po) - case u'᛬': // RUNIC MULTIPLE PUNCTUATION (0x16ec Po) - case u'᛭': // RUNIC CROSS PUNCTUATION (0x16ed Po) - case u'᜵': // PHILIPPINE SINGLE PUNCTUATION (0x1735 Po) - case u'᜶': // PHILIPPINE DOUBLE PUNCTUATION (0x1736 Po) - case u'។': // KHMER SIGN KHAN (0x17d4 Po) - case u'៕': // KHMER SIGN BARIYOOSAN (0x17d5 Po) - case u'៖': // KHMER SIGN CAMNUC PII KUUH (0x17d6 Po) - case u'៘': // KHMER SIGN BEYYAL (0x17d8 Po) - case u'៙': // KHMER SIGN PHNAEK MUAN (0x17d9 Po) - case u'៚': // KHMER SIGN KOOMUUT (0x17da Po) - case u'᠀': // MONGOLIAN BIRGA (0x1800 Po) - case u'᠁': // MONGOLIAN ELLIPSIS (0x1801 Po) - case u'᠂': // MONGOLIAN COMMA (0x1802 Po) - case u'᠃': // MONGOLIAN FULL STOP (0x1803 Po) - case u'᠄': // MONGOLIAN COLON (0x1804 Po) - case u'᠅': // MONGOLIAN FOUR DOTS (0x1805 Po) - case u'᠆': // MONGOLIAN TODO SOFT HYPHEN (0x1806 Pd) - case u'᠇': // MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER (0x1807 Po) - case u'᠈': // MONGOLIAN MANCHU COMMA (0x1808 Po) - case u'᠉': // MONGOLIAN MANCHU FULL STOP (0x1809 Po) - case u'᠊': // MONGOLIAN NIRUGU (0x180a Po) - case u'᥄': // LIMBU EXCLAMATION MARK (0x1944 Po) - case u'᥅': // LIMBU QUESTION MARK (0x1945 Po) - case u'᨞': // BUGINESE PALLAWA (0x1a1e Po) - case u'᨟': // BUGINESE END OF SECTION (0x1a1f Po) - case u'᱾': // OL CHIKI PUNCTUATION MUCAAD (0x1c7e Po) - case u'᱿': // OL CHIKI PUNCTUATION DOUBLE MUCAAD (0x1c7f Po) - case u'‐': // HYPHEN (0x2010 Pd) - case u'‑': // NON-BREAKING HYPHEN (0x2011 Pd) - case u'‒': // FIGURE DASH (0x2012 Pd) - case u'–': // EN DASH (0x2013 Pd) - case u'—': // EM DASH (0x2014 Pd) - case u'―': // HORIZONTAL BAR (0x2015 Pd) - case u'‖': // DOUBLE VERTICAL LINE (0x2016 Po) - case u'‗': // DOUBLE LOW LINE (0x2017 Po) - case u'‘': // LEFT SINGLE QUOTATION MARK (0x2018 Pi) - case u'’': // RIGHT SINGLE QUOTATION MARK (0x2019 Pf) - case u'‚': // SINGLE LOW-9 QUOTATION MARK (0x201a Ps) - case u'‛': // SINGLE HIGH-REVERSED-9 QUOTATION MARK (0x201b Pi) - case u'“': // LEFT DOUBLE QUOTATION MARK (0x201c Pi) - case u'”': // RIGHT DOUBLE QUOTATION MARK (0x201d Pf) - case u'„': // DOUBLE LOW-9 QUOTATION MARK (0x201e Ps) - case u'‟': // DOUBLE HIGH-REVERSED-9 QUOTATION MARK (0x201f Pi) - case u'†': // DAGGER (0x2020 Po) - case u'‡': // DOUBLE DAGGER (0x2021 Po) - case u'•': // BULLET (0x2022 Po) - case u'‣': // TRIANGULAR BULLET (0x2023 Po) - case u'․': // ONE DOT LEADER (0x2024 Po) - case u'‥': // TWO DOT LEADER (0x2025 Po) - case u'…': // HORIZONTAL ELLIPSIS (0x2026 Po) - case u'‧': // HYPHENATION POINT (0x2027 Po) - case u'‰': // PER MILLE SIGN (0x2030 Po) - case u'‱': // PER TEN THOUSAND SIGN (0x2031 Po) - case u'′': // PRIME (0x2032 Po) - case u'″': // DOUBLE PRIME (0x2033 Po) - case u'‴': // TRIPLE PRIME (0x2034 Po) - case u'‵': // REVERSED PRIME (0x2035 Po) - case u'‶': // REVERSED DOUBLE PRIME (0x2036 Po) - case u'‷': // REVERSED TRIPLE PRIME (0x2037 Po) - case u'‸': // CARET (0x2038 Po) - case u'‹': // SINGLE LEFT-POINTING ANGLE QUOTATION MARK (0x2039 Pi) - case u'›': // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK (0x203a Pf) - case u'※': // REFERENCE MARK (0x203b Po) - case u'‼': // DOUBLE EXCLAMATION MARK (0x203c Po) - case u'‽': // INTERROBANG (0x203d Po) - case u'‾': // OVERLINE (0x203e Po) - case u'‿': // UNDERTIE (0x203f Pc) - case u'⁀': // CHARACTER TIE (0x2040 Pc) - case u'⁁': // CARET INSERTION POINT (0x2041 Po) - case u'⁂': // ASTERISM (0x2042 Po) - case u'⁃': // HYPHEN BULLET (0x2043 Po) - case u'⁅': // LEFT SQUARE BRACKET WITH QUILL (0x2045 Ps) - case u'⁆': // RIGHT SQUARE BRACKET WITH QUILL (0x2046 Pe) - case u'⁇': // DOUBLE QUESTION MARK (0x2047 Po) - case u'⁈': // QUESTION EXCLAMATION MARK (0x2048 Po) - case u'⁉': // EXCLAMATION QUESTION MARK (0x2049 Po) - case u'⁊': // TIRONIAN SIGN ET (0x204a Po) - case u'⁋': // REVERSED PILCROW SIGN (0x204b Po) - case u'⁌': // BLACK LEFTWARDS BULLET (0x204c Po) - case u'⁍': // BLACK RIGHTWARDS BULLET (0x204d Po) - case u'⁎': // LOW ASTERISK (0x204e Po) - case u'⁏': // REVERSED SEMICOLON (0x204f Po) - case u'⁐': // CLOSE UP (0x2050 Po) - case u'⁑': // TWO ASTERISKS ALIGNED VERTICALLY (0x2051 Po) - case u'⁓': // SWUNG DASH (0x2053 Po) - case u'⁔': // INVERTED UNDERTIE (0x2054 Pc) - case u'⁕': // FLOWER PUNCTUATION MARK (0x2055 Po) - case u'⁖': // THREE DOT PUNCTUATION (0x2056 Po) - case u'⁗': // QUADRUPLE PRIME (0x2057 Po) - case u'⁘': // FOUR DOT PUNCTUATION (0x2058 Po) - case u'⁙': // FIVE DOT PUNCTUATION (0x2059 Po) - case u'⁚': // TWO DOT PUNCTUATION (0x205a Po) - case u'⁛': // FOUR DOT MARK (0x205b Po) - case u'⁜': // DOTTED CROSS (0x205c Po) - case u'⁝': // TRICOLON (0x205d Po) - case u'⁞': // VERTICAL FOUR DOTS (0x205e Po) - case u'⁽': // SUPERSCRIPT LEFT PARENTHESIS (0x207d Ps) - case u'⁾': // SUPERSCRIPT RIGHT PARENTHESIS (0x207e Pe) - case u'₍': // SUBSCRIPT LEFT PARENTHESIS (0x208d Ps) - case u'₎': // SUBSCRIPT RIGHT PARENTHESIS (0x208e Pe) - case u'⌈': // LEFT CEILING (0x2308 Ps) - case u'⌉': // RIGHT CEILING (0x2309 Pe) - case u'⌊': // LEFT FLOOR (0x230a Ps) - case u'⌋': // RIGHT FLOOR (0x230b Pe) - case u'〈': // LEFT-POINTING ANGLE BRACKET (0x2329 Ps) - case u'〉': // RIGHT-POINTING ANGLE BRACKET (0x232a Pe) - case u'❨': // MEDIUM LEFT PARENTHESIS ORNAMENT (0x2768 Ps) - case u'❩': // MEDIUM RIGHT PARENTHESIS ORNAMENT (0x2769 Pe) - case u'❪': // MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT (0x276a Ps) - case u'❫': // MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT (0x276b Pe) - case u'❬': // MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT (0x276c Ps) - case u'❭': // MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT (0x276d Pe) - case u'❮': // HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT (0x276e Ps) - case u'❯': // HEAVY RIGHT-POINTING ANGLE QUOT MARK ORNAMENT (0x276f Pe) - case u'❰': // HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT (0x2770 Ps) - case u'❱': // HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT (0x2771 Pe) - case u'❲': // LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT (0x2772 Ps) - case u'❳': // LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT (0x2773 Pe) - case u'❴': // MEDIUM LEFT CURLY BRACKET ORNAMENT (0x2774 Ps) - case u'❵': // MEDIUM RIGHT CURLY BRACKET ORNAMENT (0x2775 Pe) - case u'⟅': // LEFT S-SHAPED BAG DELIMITER (0x27c5 Ps) - case u'⟆': // RIGHT S-SHAPED BAG DELIMITER (0x27c6 Pe) - case u'⟦': // MATHEMATICAL LEFT WHITE SQUARE BRACKET (0x27e6 Ps) - case u'⟧': // MATHEMATICAL RIGHT WHITE SQUARE BRACKET (0x27e7 Pe) - case u'⟨': // MATHEMATICAL LEFT ANGLE BRACKET (0x27e8 Ps) - case u'⟩': // MATHEMATICAL RIGHT ANGLE BRACKET (0x27e9 Pe) - case u'⟪': // MATHEMATICAL LEFT DOUBLE ANGLE BRACKET (0x27ea Ps) - case u'⟫': // MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET (0x27eb Pe) - case u'⟬': // MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET (0x27ec Ps) - case u'⟭': // MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET (0x27ed Pe) - case u'⟮': // MATHEMATICAL LEFT FLATTENED PARENTHESIS (0x27ee Ps) - case u'⟯': // MATHEMATICAL RIGHT FLATTENED PARENTHESIS (0x27ef Pe) - case u'⦃': // LEFT WHITE CURLY BRACKET (0x2983 Ps) - case u'⦄': // RIGHT WHITE CURLY BRACKET (0x2984 Pe) - case u'⦅': // LEFT WHITE PARENTHESIS (0x2985 Ps) - case u'⦆': // RIGHT WHITE PARENTHESIS (0x2986 Pe) - case u'⦇': // Z NOTATION LEFT IMAGE BRACKET (0x2987 Ps) - case u'⦈': // Z NOTATION RIGHT IMAGE BRACKET (0x2988 Pe) - case u'⦉': // Z NOTATION LEFT BINDING BRACKET (0x2989 Ps) - case u'⦊': // Z NOTATION RIGHT BINDING BRACKET (0x298a Pe) - case u'⦋': // LEFT SQUARE BRACKET WITH UNDERBAR (0x298b Ps) - case u'⦌': // RIGHT SQUARE BRACKET WITH UNDERBAR (0x298c Pe) - case u'⦍': // LEFT SQUARE BRACKET WITH TICK IN TOP CORNER (0x298d Ps) - case u'⦎': // RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER (0x298e Pe) - case u'⦏': // LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER (0x298f Ps) - case u'⦐': // RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER (0x2990 Pe) - case u'⦑': // LEFT ANGLE BRACKET WITH DOT (0x2991 Ps) - case u'⦒': // RIGHT ANGLE BRACKET WITH DOT (0x2992 Pe) - case u'⦓': // LEFT ARC LESS-THAN BRACKET (0x2993 Ps) - case u'⦔': // RIGHT ARC GREATER-THAN BRACKET (0x2994 Pe) - case u'⦗': // LEFT BLACK TORTOISE SHELL BRACKET (0x2997 Ps) - case u'⦘': // RIGHT BLACK TORTOISE SHELL BRACKET (0x2998 Pe) - case u'⧘': // LEFT WIGGLY FENCE (0x29d8 Ps) - case u'⧙': // RIGHT WIGGLY FENCE (0x29d9 Pe) - case u'⧚': // LEFT DOUBLE WIGGLY FENCE (0x29da Ps) - case u'⧛': // RIGHT DOUBLE WIGGLY FENCE (0x29db Pe) - case u'⧼': // LEFT-POINTING CURVED ANGLE BRACKET (0x29fc Ps) - case u'⧽': // RIGHT-POINTING CURVED ANGLE BRACKET (0x29fd Pe) - case u'⵰': // TIFINAGH SEPARATOR MARK (0x2d70 Po) - case u'⸎': // EDITORIAL CORONIS (0x2e0e Po) - case u'⸏': // PARAGRAPHOS (0x2e0f Po) - case u'⸐': // FORKED PARAGRAPHOS (0x2e10 Po) - case u'⸑': // REVERSED FORKED PARAGRAPHOS (0x2e11 Po) - case u'⸒': // HYPODIASTOLE (0x2e12 Po) - case u'⸓': // DOTTED OBELOS (0x2e13 Po) - case u'⸔': // DOWNWARDS ANCORA (0x2e14 Po) - case u'⸕': // UPWARDS ANCORA (0x2e15 Po) - case u'⸖': // DOTTED RIGHT-POINTING ANGLE (0x2e16 Po) - case u'⸗': // DOUBLE OBLIQUE HYPHEN (0x2e17 Pd) - case u'⸙': // PALM BRANCH (0x2e19 Po) - case u'⸚': // HYPHEN WITH DIAERESIS (0x2e1a Pd) - case u'⸛': // TILDE WITH RING ABOVE (0x2e1b Po) - case u'⸞': // TILDE WITH DOT ABOVE (0x2e1e Po) - case u'⸟': // TILDE WITH DOT BELOW (0x2e1f Po) - case u'⸪': // TWO DOTS OVER ONE DOT PUNCTUATION (0x2e2a Po) - case u'⸫': // ONE DOT OVER TWO DOTS PUNCTUATION (0x2e2b Po) - case u'⸬': // SQUARED FOUR DOT PUNCTUATION (0x2e2c Po) - case u'⸭': // FIVE DOT MARK (0x2e2d Po) - case u'⸮': // REVERSED QUESTION MARK (0x2e2e Po) - case u'⸰': // RING POINT (0x2e30 Po) - case u'⸱': // WORD SEPARATOR MIDDLE DOT (0x2e31 Po) - case u'⸲': // TURNED COMMA (0x2e32 Po) - case u'⸳': // RAISED DOT (0x2e33 Po) - case u'⸴': // RAISED COMMA (0x2e34 Po) - case u'⸵': // TURNED SEMICOLON (0x2e35 Po) - case u'⸶': // DAGGER WITH LEFT GUARD (0x2e36 Po) - case u'⸷': // DAGGER WITH RIGHT GUARD (0x2e37 Po) - case u'⸸': // TURNED DAGGER (0x2e38 Po) - case u'⸹': // TOP HALF SECTION SIGN (0x2e39 Po) - case u'⸺': // TWO-EM DASH (0x2e3a Pd) - case u'⸻': // THREE-EM DASH (0x2e3b Pd) - case u'⸼': // STENOGRAPHIC FULL STOP (0x2e3c Po) - case u'⸽': // VERTICAL SIX DOTS (0x2e3d Po) - case u'⸾': // WIGGLY VERTICAL LINE (0x2e3e Po) - case u'⸿': // CAPITULUM (0x2e3f Po) - case u'⹀': // DOUBLE HYPHEN (0x2e40 Pd) - case u'⹁': // REVERSED COMMA (0x2e41 Po) - case u'⹂': // DOUBLE LOW-REVERSED-9 QUOTATION MARK (0x2e42 Ps) - case u'⹃': // DASH WITH LEFT UPTURN (0x2e43 Po) - case u'⹄': // DOUBLE SUSPENSION MARK (0x2e44 Po) - case u'⹅': // INVERTED LOW KAVYKA (0x2e45 Po) - case u'⹆': // INVERTED LOW KAVYKA WITH KAVYKA ABOVE (0x2e46 Po) - case u'⹇': // LOW KAVYKA (0x2e47 Po) - case u'⹈': // LOW KAVYKA WITH DOT (0x2e48 Po) - case u'⹉': // DOUBLE STACKED COMMA (0x2e49 Po) - case u'⹊': // DOTTED SOLIDUS (0x2e4a Po) - case u'⹋': // TRIPLE DAGGER (0x2e4b Po) - case u'⹌': // MEDIEVAL COMMA (0x2e4c Po) - case u'⹍': // PARAGRAPHUS MARK (0x2e4d Po) - case u'⹎': // PUNCTUS ELEVATUS MARK (0x2e4e Po) - case u'⹏': // CORNISH VERSE DIVIDER (0x2e4f Po) - case u'、': // IDEOGRAPHIC COMMA (0x3001 Po) - case u'。': // IDEOGRAPHIC FULL STOP (0x3002 Po) - case u'〃': // DITTO MARK (0x3003 Po) - case u'〈': // LEFT ANGLE BRACKET (0x3008 Ps) - case u'〉': // RIGHT ANGLE BRACKET (0x3009 Pe) - case u'《': // LEFT DOUBLE ANGLE BRACKET (0x300a Ps) - case u'》': // RIGHT DOUBLE ANGLE BRACKET (0x300b Pe) - case u'「': // LEFT CORNER BRACKET (0x300c Ps) - case u'」': // RIGHT CORNER BRACKET (0x300d Pe) - case u'『': // LEFT WHITE CORNER BRACKET (0x300e Ps) - case u'』': // RIGHT WHITE CORNER BRACKET (0x300f Pe) - case u'【': // LEFT BLACK LENTICULAR BRACKET (0x3010 Ps) - case u'】': // RIGHT BLACK LENTICULAR BRACKET (0x3011 Pe) - case u'〔': // LEFT TORTOISE SHELL BRACKET (0x3014 Ps) - case u'〕': // RIGHT TORTOISE SHELL BRACKET (0x3015 Pe) - case u'〖': // LEFT WHITE LENTICULAR BRACKET (0x3016 Ps) - case u'〗': // RIGHT WHITE LENTICULAR BRACKET (0x3017 Pe) - case u'〘': // LEFT WHITE TORTOISE SHELL BRACKET (0x3018 Ps) - case u'〙': // RIGHT WHITE TORTOISE SHELL BRACKET (0x3019 Pe) - case u'〚': // LEFT WHITE SQUARE BRACKET (0x301a Ps) - case u'〛': // RIGHT WHITE SQUARE BRACKET (0x301b Pe) - case u'〜': // WAVE DASH (0x301c Pd) - case u'〝': // REVERSED DOUBLE PRIME QUOTATION MARK (0x301d Ps) - case u'〞': // DOUBLE PRIME QUOTATION MARK (0x301e Pe) - case u'〟': // LOW DOUBLE PRIME QUOTATION MARK (0x301f Pe) - case u'〰': // WAVY DASH (0x3030 Pd) - case u'〽': // PART ALTERNATION MARK (0x303d Po) - case u'゠': // KATAKANA-HIRAGANA DOUBLE HYPHEN (0x30a0 Pd) - case u'・': // KATAKANA MIDDLE DOT (0x30fb Po) - case u'꓾': // LISU PUNCTUATION COMMA (0xa4fe Po) - case u'꓿': // LISU PUNCTUATION FULL STOP (0xa4ff Po) - case u'꘍': // VAI COMMA (0xa60d Po) - case u'꘎': // VAI FULL STOP (0xa60e Po) - case u'꘏': // VAI QUESTION MARK (0xa60f Po) - case u'꙾': // CYRILLIC KAVYKA (0xa67e Po) - case u'꡴': // PHAGS-PA SINGLE HEAD MARK (0xa874 Po) - case u'꡵': // PHAGS-PA DOUBLE HEAD MARK (0xa875 Po) - case u'꡶': // PHAGS-PA MARK SHAD (0xa876 Po) - case u'꡷': // PHAGS-PA MARK DOUBLE SHAD (0xa877 Po) - case u'꣎': // SAURASHTRA DANDA (0xa8ce Po) - case u'꣏': // SAURASHTRA DOUBLE DANDA (0xa8cf Po) - case u'꣸': // DEVANAGARI SIGN PUSHPIKA (0xa8f8 Po) - case u'꣹': // DEVANAGARI GAP FILLER (0xa8f9 Po) - case u'꣺': // DEVANAGARI CARET (0xa8fa Po) - case u'꣼': // DEVANAGARI SIGN SIDDHAM (0xa8fc Po) - case u'꧁': // JAVANESE LEFT RERENGGAN (0xa9c1 Po) - case u'꧂': // JAVANESE RIGHT RERENGGAN (0xa9c2 Po) - case u'꧃': // JAVANESE PADA ANDAP (0xa9c3 Po) - case u'꧄': // JAVANESE PADA MADYA (0xa9c4 Po) - case u'꧅': // JAVANESE PADA LUHUR (0xa9c5 Po) - case u'꧆': // JAVANESE PADA WINDU (0xa9c6 Po) - case u'꧇': // JAVANESE PADA PANGKAT (0xa9c7 Po) - case u'꧈': // JAVANESE PADA LINGSA (0xa9c8 Po) - case u'꧉': // JAVANESE PADA LUNGSI (0xa9c9 Po) - case u'꧊': // JAVANESE PADA ADEG (0xa9ca Po) - case u'꧋': // JAVANESE PADA ADEG ADEG (0xa9cb Po) - case u'꧌': // JAVANESE PADA PISELEH (0xa9cc Po) - case u'꧍': // JAVANESE TURNED PADA PISELEH (0xa9cd Po) - case u'꧞': // JAVANESE PADA TIRTA TUMETES (0xa9de Po) - case u'꧟': // JAVANESE PADA ISEN-ISEN (0xa9df Po) - case u'꩜': // CHAM PUNCTUATION SPIRAL (0xaa5c Po) - case u'꩝': // CHAM PUNCTUATION DANDA (0xaa5d Po) - case u'꩞': // CHAM PUNCTUATION DOUBLE DANDA (0xaa5e Po) - case u'꩟': // CHAM PUNCTUATION TRIPLE DANDA (0xaa5f Po) - case u'꫞': // TAI VIET SYMBOL HO HOI (0xaade Po) - case u'꫟': // TAI VIET SYMBOL KOI KOI (0xaadf Po) - case u'꫰': // MEETEI MAYEK CHEIKHAN (0xaaf0 Po) - case u'꫱': // MEETEI MAYEK AHANG KHUDAM (0xaaf1 Po) - case u'꯫': // MEETEI MAYEK CHEIKHEI (0xabeb Po) - case u'︐': // PRESENTATION FORM FOR VERTICAL COMMA (0xfe10 Po) - case u'︑': // PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC COMMA (0xfe11 Po) - case u'︒': // PRESENTATION FORM FOR VERTICAL IDEO FULL STOP (0xfe12 Po) - case u'︓': // PRESENTATION FORM FOR VERTICAL COLON (0xfe13 Po) - case u'︔': // PRESENTATION FORM FOR VERTICAL SEMICOLON (0xfe14 Po) - case u'︕': // PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (0xfe15 Po) - case u'︖': // PRESENTATION FORM FOR VERTICAL QUESTION MARK (0xfe16 Po) - case u'︗': // PRESENTATION ... LEFT WHITE LENTICULAR BRACKET (0xfe17 Ps) - case u'︘': // PRESENTATION ... RIGHT WHITE LENTICULAR BRAKCET (0xfe18 Pe) - case u'︙': // PRESENTATION ... VERTICAL HORIZONTAL ELLIPSIS (0xfe19 Po) - case u'︰': // PRESENTATION FORM FOR VERTICAL TWO DOT LEADER (0xfe30 Po) - case u'︱': // PRESENTATION FORM FOR VERTICAL EM DASH (0xfe31 Pd) - case u'︲': // PRESENTATION FORM FOR VERTICAL EN DASH (0xfe32 Pd) - case u'︳': // PRESENTATION FORM FOR VERTICAL LOW LINE (0xfe33 Pc) - case u'︴': // PRESENTATION FORM FOR VERTICAL WAVY LOW LINE (0xfe34 Pc) - case u'︵': // PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS (0xfe35 Ps) - case u'︶': // PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS (0xfe36 Pe) - case u'︷': // PRESENTATION ... VERTICAL LEFT CURLY BRACKET (0xfe37 Ps) - case u'︸': // PRESENTATION ... VERTICAL RIGHT CURLY BRACKET (0xfe38 Pe) - case u'︹': // PRESENTATION ... LEFT TORTOISE SHELL BRACKET (0xfe39 Ps) - case u'︺': // PRESENTATION ... RIGHT TORTOISE SHELL BRACKET (0xfe3a Pe) - case u'︻': // PRESENTATION ... LEFT BLACK LENTICULAR BRACKET (0xfe3b Ps) - case u'︼': // PRESENTATION ... RIGHT BLACK LENTICULAR BRACKET (0xfe3c Pe) - case u'︽': // PRESENTATION ... LEFT DOUBLE ANGLE BRACKET (0xfe3d Ps) - case u'︾': // PRESENTATION ... RIGHT DOUBLE ANGLE BRACKET (0xfe3e Pe) - case u'︿': // PRESENTATION ... LEFT ANGLE BRACKET (0xfe3f Ps) - case u'﹀': // PRESENTATION ... RIGHT ANGLE BRACKET (0xfe40 Pe) - case u'﹁': // PRESENTATION ... LEFT CORNER BRACKET (0xfe41 Ps) - case u'﹂': // PRESENTATION ... RIGHT CORNER BRACKET (0xfe42 Pe) - case u'﹃': // PRESENTATION ... LEFT WHITE CORNER BRACKET (0xfe43 Ps) - case u'﹄': // PRESENTATION ... RIGHT WHITE CORNER BRACKET Pe) - case u'﹅': // SESAME DOT (0xfe45 Po) - case u'﹆': // WHITE SESAME DOT (0xfe46 Po) - case u'﹇': // PRESENTATION ... VERTICAL LEFT SQUARE BRACKET (0xfe47 Ps) - case u'﹈': // PRESENTATION ... VERTICAL RIGHT SQUARE BRACKET (0xfe48 Pe) - case u'﹉': // DASHED OVERLINE (0xfe49 Po) - case u'﹊': // CENTRELINE OVERLINE (0xfe4a Po) - case u'﹋': // WAVY OVERLINE (0xfe4b Po) - case u'﹌': // DOUBLE WAVY OVERLINE (0xfe4c Po) - case u'﹍': // DASHED LOW LINE (0xfe4d Pc) - case u'﹎': // CENTRELINE LOW LINE (0xfe4e Pc) - case u'﹏': // WAVY LOW LINE (0xfe4f Pc) - case u'﹐': // SMALL COMMA (0xfe50 Po) - case u'﹑': // SMALL IDEOGRAPHIC COMMA (0xfe51 Po) - case u'﹒': // SMALL FULL STOP (0xfe52 Po) - case u'﹔': // SMALL SEMICOLON (0xfe54 Po) - case u'﹕': // SMALL COLON (0xfe55 Po) - case u'﹖': // SMALL QUESTION MARK (0xfe56 Po) - case u'﹗': // SMALL EXCLAMATION MARK (0xfe57 Po) - case u'﹘': // SMALL EM DASH (0xfe58 Pd) - case u'﹙': // SMALL LEFT PARENTHESIS (0xfe59 Ps) - case u'﹚': // SMALL RIGHT PARENTHESIS (0xfe5a Pe) - case u'﹛': // SMALL LEFT CURLY BRACKET (0xfe5b Ps) - case u'﹜': // SMALL RIGHT CURLY BRACKET (0xfe5c Pe) - case u'﹝': // SMALL LEFT TORTOISE SHELL BRACKET (0xfe5d Ps) - case u'﹞': // SMALL RIGHT TORTOISE SHELL BRACKET (0xfe5e Pe) - case u'﹟': // SMALL NUMBER SIGN (0xfe5f Po) - case u'﹠': // SMALL AMPERSAND (0xfe60 Po) - case u'﹡': // SMALL ASTERISK (0xfe61 Po) - case u'﹣': // SMALL HYPHEN-MINUS (0xfe63 Pd) - case u'﹨': // SMALL REVERSE SOLIDUS (0xfe68 Po) - case u'﹪': // SMALL PERCENT SIGN (0xfe6a Po) - case u'﹫': // SMALL COMMERCIAL AT (0xfe6b Po) - case u'!': // FULLWIDTH EXCLAMATION MARK (0xff01 Po) - case u'"': // FULLWIDTH QUOTATION MARK (0xff02 Po) - case u'#': // FULLWIDTH NUMBER SIGN (0xff03 Po) - case u'%': // FULLWIDTH PERCENT SIGN (0xff05 Po) - case u'&': // FULLWIDTH AMPERSAND (0xff06 Po) - case u''': // FULLWIDTH APOSTROPHE (0xff07 Po) - case u'(': // FULLWIDTH LEFT PARENTHESIS (0xff08 Ps) - case u')': // FULLWIDTH RIGHT PARENTHESIS (0xff09 Pe) - case u'*': // FULLWIDTH ASTERISK (0xff0a Po) - case u',': // FULLWIDTH COMMA (0xff0c Po) - case u'-': // FULLWIDTH HYPHEN-MINUS (0xff0d Pd) - case u'.': // FULLWIDTH FULL STOP (0xff0e Po) - case u'/': // FULLWIDTH SOLIDUS (0xff0f Po) - case u':': // FULLWIDTH COLON (0xff1a Po) - case u';': // FULLWIDTH SEMICOLON (0xff1b Po) - case u'?': // FULLWIDTH QUESTION MARK (0xff1f Po) - case u'@': // FULLWIDTH COMMERCIAL AT (0xff20 Po) - case u'[': // FULLWIDTH LEFT SQUARE BRACKET (0xff3b Ps) - case u'\': // FULLWIDTH REVERSE SOLIDUS (0xff3c Po) - case u']': // FULLWIDTH RIGHT SQUARE BRACKET (0xff3d Pe) - case u'_': // FULLWIDTH LOW LINE (0xff3f Pc) - case u'{': // FULLWIDTH LEFT CURLY BRACKET (0xff5b Ps) - case u'}': // FULLWIDTH RIGHT CURLY BRACKET (0xff5d Pe) - case u'⦅': // FULLWIDTH LEFT WHITE PARENTHESIS (0xff5f Ps) - case u'⦆': // FULLWIDTH RIGHT WHITE PARENTHESIS (0xff60 Pe) - case u'。': // HALFWIDTH IDEOGRAPHIC FULL STOP (0xff61 Po) - case u'「': // HALFWIDTH LEFT CORNER BRACKET (0xff62 Ps) - case u'」': // HALFWIDTH RIGHT CORNER BRACKET (0xff63 Pe) - case u'、': // HALFWIDTH IDEOGRAPHIC COMMA (0xff64 Po) - case u'・': // HALFWIDTH KATAKANA MIDDLE DOT (0xff65 Po) - return 1; - default: - return 0; - } -} - -__weak_reference(iswpunct, iswpunct_l); diff --git a/libc/str/iswseparator.c b/libc/str/iswseparator.cc similarity index 94% rename from libc/str/iswseparator.c rename to libc/str/iswseparator.cc index 224fec28a..6ed2c7788 100644 --- a/libc/str/iswseparator.c +++ b/libc/str/iswseparator.cc @@ -1,7 +1,7 @@ -/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ +/*-*-mode:c++;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8-*-│ +│ vi: set et ft=c++ ts=2 sts=2 sw=2 fenc=utf-8 :vi │ ╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2021 Justine Alexandra Roberts Tunney │ +│ Copyright 2024 Justine Alexandra Roberts Tunney │ │ │ │ Permission to use, copy, modify, and/or distribute this software for │ │ any purpose with or without fee is hereby granted, provided that the │ @@ -16,9 +16,11 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/macros.h" +#include "libc/str/has_char.h" #include "libc/wctype.h" -static const unsigned short kCodes[][2] = { +static const unsigned short kSeparators[][2] = { {0x00aa, 0x00aa}, /* 1x English */ {0x00b2, 0x00b3}, /* 2x English Arabic */ {0x00b5, 0x00b5}, /* 1x Greek */ @@ -172,7 +174,7 @@ static const unsigned short kCodes[][2] = { {0xffda, 0xffdc}, /* 3x Dubs */ }; -static const unsigned kAstralCodes[][2] = { +static const unsigned kAstralSeparators[][2] = { {0x10107, 0x10133}, /* 45x Aegean */ {0x10140, 0x10178}, /* 57x Ancient Greek Numbers */ {0x1018a, 0x1018b}, /* 2x Ancient Greek Numbers */ @@ -390,34 +392,11 @@ static const unsigned kAstralCodes[][2] = { * other things like blocks and emoji (So). */ int iswseparator(wint_t c) { - int m, l, r, n; - if (c < 0200) { - return !(('0' <= c && c <= '9') || ('A' <= c && c <= 'Z') || + if (c < 128) + return !(('0' <= c && c <= '9') || // + ('A' <= c && c <= 'Z') || // ('a' <= c && c <= 'z')); - } - if (c <= 0xffff) { - l = 0; - r = n = sizeof(kCodes) / sizeof(kCodes[0]); - while (l < r) { - m = (l & r) + ((l ^ r) >> 1); // floor((a+b)/2) - if (kCodes[m][1] < c) { - l = m + 1; - } else { - r = m; - } - } - return !(l < n && kCodes[l][0] <= c && c <= kCodes[l][1]); - } else { - l = 0; - r = n = sizeof(kAstralCodes) / sizeof(kAstralCodes[0]); - while (l < r) { - m = (l & r) + ((l ^ r) >> 1); // floor((a+b)/2) - if (kAstralCodes[m][1] < c) { - l = m + 1; - } else { - r = m; - } - } - return !(l < n && kAstralCodes[l][0] <= c && c <= kAstralCodes[l][1]); - } + if (c < 65536) + return has_char(kSeparators, ARRAYLEN(kSeparators), (unsigned short)c); + return has_char(kAstralSeparators, ARRAYLEN(kAstralSeparators), (unsigned)c); } diff --git a/libc/str/iswspace.c b/libc/str/iswspace.c index 44d62af9d..097e6ce51 100644 --- a/libc/str/iswspace.c +++ b/libc/str/iswspace.c @@ -41,7 +41,6 @@ int iswspace(wint_t c) { case 0x2004: // THREE-PER-EM SPACE (Zs) case 0x2005: // FOUR-PER-EM SPACE (Zs) case 0x2006: // SIX-PER-EM SPACE (Zs) - case 0x2007: // FIGURE SPACE (Zs) case 0x2008: // PUNCTUATION SPACE (Zs) case 0x2009: // THIN SPACE (Zs) case 0x200a: // HAIR SPACE (Zs) diff --git a/libc/str/iswupper.c b/libc/str/iswupper.c deleted file mode 100644 index aad3dd6e7..000000000 --- a/libc/str/iswupper.c +++ /dev/null @@ -1,164 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/wctype.h" - -/** - * Returns nonzero if c is uppercase letter. - */ -int iswupper(wint_t c) { - if (c < 0200) { - return 'A' <= c && c <= 'Z'; - } else { - if (towlower(c) != c) - return 1; - switch (c) { - case 0x03d2: /* ϒ Greek */ - case 0x03d3: /* ϓ Greek */ - case 0x03d4: /* ϔ Greek */ - case 0x2102: /* ℂ Letterlike */ - case 0x2107: /* ℇ Letterlike */ - case 0x210b: /* ℋ Letterlike */ - case 0x210c: /* ℌ Letterlike */ - case 0x210d: /* ℍ Letterlike */ - case 0x2110: /* ℐ Letterlike */ - case 0x2111: /* ℑ Letterlike */ - case 0x2112: /* ℒ Letterlike */ - case 0x2115: /* ℕ Letterlike */ - case 0x2119: /* ℙ Letterlike */ - case 0x211a: /* ℚ Letterlike */ - case 0x211b: /* ℛ Letterlike */ - case 0x211c: /* ℜ Letterlike */ - case 0x211d: /* ℝ Letterlike */ - case 0x2124: /* ℤ Letterlike */ - case 0x2128: /* ℨ Letterlike */ - case 0x212c: /* ℬ Letterlike */ - case 0x212d: /* ℭ Letterlike */ - case 0x2130: /* ℰ Letterlike */ - case 0x2131: /* ℱ Letterlike */ - case 0x2133: /* ℳ Letterlike */ - case 0x213e: /* ℾ Letterlike */ - case 0x213f: /* ℿ Letterlike */ - case 0x2145: /* ⅅ Letterlike */ - case 0x1d434: /* 𝐴 Math */ - case 0x1d435: /* 𝐵 Math */ - case 0x1d436: /* 𝐶 Math */ - case 0x1d437: /* 𝐷 Math */ - case 0x1d438: /* 𝐸 Math */ - case 0x1d439: /* 𝐹 Math */ - case 0x1d43a: /* 𝐺 Math */ - case 0x1d43b: /* 𝐻 Math */ - case 0x1d49c: /* 𝒜 Math */ - case 0x1d49e: /* 𝒞 Math */ - case 0x1d49f: /* 𝒟 Math */ - case 0x1d4a2: /* 𝒢 Math */ - case 0x1d4a5: /* 𝒥 Math */ - case 0x1d4a6: /* 𝒦 Math */ - case 0x1d4a9: /* 𝒩 Math */ - case 0x1d4aa: /* 𝒪 Math */ - case 0x1d4ab: /* 𝒫 Math */ - case 0x1d4ac: /* 𝒬 Math */ - case 0x1d504: /* 𝔄 Math */ - case 0x1d505: /* 𝔅 Math */ - case 0x1d507: /* 𝔇 Math */ - case 0x1d508: /* 𝔈 Math */ - case 0x1d509: /* 𝔉 Math */ - case 0x1d50a: /* 𝔊 Math */ - case 0x1d516: /* 𝔖 Math */ - case 0x1d517: /* 𝔗 Math */ - case 0x1d518: /* 𝔘 Math */ - case 0x1d519: /* 𝔙 Math */ - case 0x1d51a: /* 𝔚 Math */ - case 0x1d51b: /* 𝔛 Math */ - case 0x1d51c: /* 𝔜 Math */ - case 0x1d538: /* 𝔸 Math */ - case 0x1d539: /* 𝔹 Math */ - case 0x1d53b: /* 𝔻 Math */ - case 0x1d53c: /* 𝔼 Math */ - case 0x1d53d: /* 𝔽 Math */ - case 0x1d53e: /* 𝔾 Math */ - case 0x1d540: /* 𝕀 Math */ - case 0x1d541: /* 𝕁 Math */ - case 0x1d542: /* 𝕂 Math */ - case 0x1d543: /* 𝕃 Math */ - case 0x1d544: /* 𝕄 Math */ - case 0x1d546: /* 𝕆 Math */ - case 0x1d54a: /* 𝕊 Math */ - case 0x1d54b: /* 𝕋 Math */ - case 0x1d54c: /* 𝕌 Math */ - case 0x1d54d: /* 𝕍 Math */ - case 0x1d54e: /* 𝕎 Math */ - case 0x1d54f: /* 𝕏 Math */ - case 0x1d550: /* 𝕐 Math */ - case 0x1d6e3: /* 𝛣 Math */ - case 0x1d6e4: /* 𝛤 Math */ - case 0x1d6e5: /* 𝛥 Math */ - case 0x1d6e6: /* 𝛦 Math */ - case 0x1d6e7: /* 𝛧 Math */ - case 0x1d6e8: /* 𝛨 Math */ - case 0x1d6e9: /* 𝛩 Math */ - case 0x1d6ea: /* 𝛪 Math */ - case 0x1d6eb: /* 𝛫 Math */ - case 0x1d6ec: /* 𝛬 Math */ - case 0x1d6ed: /* 𝛭 Math */ - case 0x1d6ee: /* 𝛮 Math */ - case 0x1d6ef: /* 𝛯 Math */ - case 0x1d6f0: /* 𝛰 Math */ - case 0x1d6f1: /* 𝛱 Math */ - case 0x1d6f2: /* 𝛲 Math */ - case 0x1d6f3: /* 𝛳 Math */ - case 0x1d6f4: /* 𝛴 Math */ - case 0x1d6f5: /* 𝛵 Math */ - case 0x1d6f6: /* 𝛶 Math */ - case 0x1d6f7: /* 𝛷 Math */ - case 0x1d6f8: /* 𝛸 Math */ - case 0x1d6f9: /* 𝛹 Math */ - case 0x1d6fa: /* 𝛺 Math */ - case 0x1d72d: /* 𝜭 Math */ - case 0x1d72e: /* 𝜮 Math */ - case 0x1d72f: /* 𝜯 Math */ - case 0x1d730: /* 𝜰 Math */ - case 0x1d731: /* 𝜱 Math */ - case 0x1d732: /* 𝜲 Math */ - case 0x1d733: /* 𝜳 Math */ - case 0x1d734: /* 𝜴 Math */ - case 0x1d767: /* 𝝧 Math */ - case 0x1d768: /* 𝝨 Math */ - case 0x1d769: /* 𝝩 Math */ - case 0x1d76a: /* 𝝪 Math */ - case 0x1d76b: /* 𝝫 Math */ - case 0x1d76c: /* 𝝬 Math */ - case 0x1d76d: /* 𝝭 Math */ - case 0x1d76e: /* 𝝮 Math */ - case 0x1d7a1: /* 𝞡 Math */ - case 0x1d7a2: /* 𝞢 Math */ - case 0x1d7a3: /* 𝞣 Math */ - case 0x1d7a4: /* 𝞤 Math */ - case 0x1d7a5: /* 𝞥 Math */ - case 0x1d7a6: /* 𝞦 Math */ - case 0x1d7a7: /* 𝞧 Math */ - case 0x1d7a8: /* 𝞨 Math */ - case 0x1d7ca: /* 𝟊 Math */ - return 1; - default: - return 0; - } - } -} - -__weak_reference(iswupper, iswupper_l); diff --git a/libc/str/iswupper.cc b/libc/str/iswupper.cc new file mode 100644 index 000000000..4db11a3f4 --- /dev/null +++ b/libc/str/iswupper.cc @@ -0,0 +1,695 @@ +/*-*-mode:c++;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8-*-│ +│ vi: set et ft=c++ ts=2 sts=2 sw=2 fenc=utf-8 :vi │ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2024 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/dce.h" +#include "libc/macros.h" +#include "libc/str/has_char.h" +#include "libc/wctype.h" + +static const unsigned short kUpper[][2] = { + {0x41, 0x5a}, // + {0xc0, 0xd6}, // + {0xd8, 0xde}, // + {0x100, 0x100}, // + {0x102, 0x102}, // + {0x104, 0x104}, // + {0x106, 0x106}, // + {0x108, 0x108}, // + {0x10a, 0x10a}, // + {0x10c, 0x10c}, // + {0x10e, 0x10e}, // + {0x110, 0x110}, // + {0x112, 0x112}, // + {0x114, 0x114}, // + {0x116, 0x116}, // + {0x118, 0x118}, // + {0x11a, 0x11a}, // + {0x11c, 0x11c}, // + {0x11e, 0x11e}, // + {0x120, 0x120}, // + {0x122, 0x122}, // + {0x124, 0x124}, // + {0x126, 0x126}, // + {0x128, 0x128}, // + {0x12a, 0x12a}, // + {0x12c, 0x12c}, // + {0x12e, 0x12e}, // + {0x130, 0x130}, // + {0x132, 0x132}, // + {0x134, 0x134}, // + {0x136, 0x136}, // + {0x139, 0x139}, // + {0x13b, 0x13b}, // + {0x13d, 0x13d}, // + {0x13f, 0x13f}, // + {0x141, 0x141}, // + {0x143, 0x143}, // + {0x145, 0x145}, // + {0x147, 0x147}, // + {0x14a, 0x14a}, // + {0x14c, 0x14c}, // + {0x14e, 0x14e}, // + {0x150, 0x150}, // + {0x152, 0x152}, // + {0x154, 0x154}, // + {0x156, 0x156}, // + {0x158, 0x158}, // + {0x15a, 0x15a}, // + {0x15c, 0x15c}, // + {0x15e, 0x15e}, // + {0x160, 0x160}, // + {0x162, 0x162}, // + {0x164, 0x164}, // + {0x166, 0x166}, // + {0x168, 0x168}, // + {0x16a, 0x16a}, // + {0x16c, 0x16c}, // + {0x16e, 0x16e}, // + {0x170, 0x170}, // + {0x172, 0x172}, // + {0x174, 0x174}, // + {0x176, 0x176}, // + {0x178, 0x179}, // + {0x17b, 0x17b}, // + {0x17d, 0x17d}, // + {0x181, 0x182}, // + {0x184, 0x184}, // + {0x186, 0x187}, // + {0x189, 0x18b}, // + {0x18e, 0x191}, // + {0x193, 0x194}, // + {0x196, 0x198}, // + {0x19c, 0x19d}, // + {0x19f, 0x1a0}, // + {0x1a2, 0x1a2}, // + {0x1a4, 0x1a4}, // + {0x1a6, 0x1a7}, // + {0x1a9, 0x1a9}, // + {0x1ac, 0x1ac}, // + {0x1ae, 0x1af}, // + {0x1b1, 0x1b3}, // + {0x1b5, 0x1b5}, // + {0x1b7, 0x1b8}, // + {0x1bc, 0x1bc}, // + {0x1c4, 0x1c5}, // + {0x1c7, 0x1c8}, // + {0x1ca, 0x1cb}, // + {0x1cd, 0x1cd}, // + {0x1cf, 0x1cf}, // + {0x1d1, 0x1d1}, // + {0x1d3, 0x1d3}, // + {0x1d5, 0x1d5}, // + {0x1d7, 0x1d7}, // + {0x1d9, 0x1d9}, // + {0x1db, 0x1db}, // + {0x1de, 0x1de}, // + {0x1e0, 0x1e0}, // + {0x1e2, 0x1e2}, // + {0x1e4, 0x1e4}, // + {0x1e6, 0x1e6}, // + {0x1e8, 0x1e8}, // + {0x1ea, 0x1ea}, // + {0x1ec, 0x1ec}, // + {0x1ee, 0x1ee}, // + {0x1f1, 0x1f2}, // + {0x1f4, 0x1f4}, // + {0x1f6, 0x1f8}, // + {0x1fa, 0x1fa}, // + {0x1fc, 0x1fc}, // + {0x1fe, 0x1fe}, // + {0x200, 0x200}, // + {0x202, 0x202}, // + {0x204, 0x204}, // + {0x206, 0x206}, // + {0x208, 0x208}, // + {0x20a, 0x20a}, // + {0x20c, 0x20c}, // + {0x20e, 0x20e}, // + {0x210, 0x210}, // + {0x212, 0x212}, // + {0x214, 0x214}, // + {0x216, 0x216}, // + {0x218, 0x218}, // + {0x21a, 0x21a}, // + {0x21c, 0x21c}, // + {0x21e, 0x21e}, // + {0x220, 0x220}, // + {0x222, 0x222}, // + {0x224, 0x224}, // + {0x226, 0x226}, // + {0x228, 0x228}, // + {0x22a, 0x22a}, // + {0x22c, 0x22c}, // + {0x22e, 0x22e}, // + {0x230, 0x230}, // + {0x232, 0x232}, // + {0x23a, 0x23b}, // + {0x23d, 0x23e}, // + {0x241, 0x241}, // + {0x243, 0x246}, // + {0x248, 0x248}, // + {0x24a, 0x24a}, // + {0x24c, 0x24c}, // + {0x24e, 0x24e}, // + {0x370, 0x370}, // + {0x372, 0x372}, // + {0x376, 0x376}, // + {0x37f, 0x37f}, // + {0x386, 0x386}, // + {0x388, 0x38a}, // + {0x38c, 0x38c}, // + {0x38e, 0x38f}, // + {0x391, 0x3a1}, // + {0x3a3, 0x3ab}, // + {0x3cf, 0x3cf}, // + {0x3d2, 0x3d4}, // + {0x3d8, 0x3d8}, // + {0x3da, 0x3da}, // + {0x3dc, 0x3dc}, // + {0x3de, 0x3de}, // + {0x3e0, 0x3e0}, // + {0x3e2, 0x3e2}, // + {0x3e4, 0x3e4}, // + {0x3e6, 0x3e6}, // + {0x3e8, 0x3e8}, // + {0x3ea, 0x3ea}, // + {0x3ec, 0x3ec}, // + {0x3ee, 0x3ee}, // + {0x3f4, 0x3f4}, // + {0x3f7, 0x3f7}, // + {0x3f9, 0x3fa}, // + {0x3fd, 0x42f}, // + {0x460, 0x460}, // + {0x462, 0x462}, // + {0x464, 0x464}, // + {0x466, 0x466}, // + {0x468, 0x468}, // + {0x46a, 0x46a}, // + {0x46c, 0x46c}, // + {0x46e, 0x46e}, // + {0x470, 0x470}, // + {0x472, 0x472}, // + {0x474, 0x474}, // + {0x476, 0x476}, // + {0x478, 0x478}, // + {0x47a, 0x47a}, // + {0x47c, 0x47c}, // + {0x47e, 0x47e}, // + {0x480, 0x480}, // + {0x48a, 0x48a}, // + {0x48c, 0x48c}, // + {0x48e, 0x48e}, // + {0x490, 0x490}, // + {0x492, 0x492}, // + {0x494, 0x494}, // + {0x496, 0x496}, // + {0x498, 0x498}, // + {0x49a, 0x49a}, // + {0x49c, 0x49c}, // + {0x49e, 0x49e}, // + {0x4a0, 0x4a0}, // + {0x4a2, 0x4a2}, // + {0x4a4, 0x4a4}, // + {0x4a6, 0x4a6}, // + {0x4a8, 0x4a8}, // + {0x4aa, 0x4aa}, // + {0x4ac, 0x4ac}, // + {0x4ae, 0x4ae}, // + {0x4b0, 0x4b0}, // + {0x4b2, 0x4b2}, // + {0x4b4, 0x4b4}, // + {0x4b6, 0x4b6}, // + {0x4b8, 0x4b8}, // + {0x4ba, 0x4ba}, // + {0x4bc, 0x4bc}, // + {0x4be, 0x4be}, // + {0x4c0, 0x4c1}, // + {0x4c3, 0x4c3}, // + {0x4c5, 0x4c5}, // + {0x4c7, 0x4c7}, // + {0x4c9, 0x4c9}, // + {0x4cb, 0x4cb}, // + {0x4cd, 0x4cd}, // + {0x4d0, 0x4d0}, // + {0x4d2, 0x4d2}, // + {0x4d4, 0x4d4}, // + {0x4d6, 0x4d6}, // + {0x4d8, 0x4d8}, // + {0x4da, 0x4da}, // + {0x4dc, 0x4dc}, // + {0x4de, 0x4de}, // + {0x4e0, 0x4e0}, // + {0x4e2, 0x4e2}, // + {0x4e4, 0x4e4}, // + {0x4e6, 0x4e6}, // + {0x4e8, 0x4e8}, // + {0x4ea, 0x4ea}, // + {0x4ec, 0x4ec}, // + {0x4ee, 0x4ee}, // + {0x4f0, 0x4f0}, // + {0x4f2, 0x4f2}, // + {0x4f4, 0x4f4}, // + {0x4f6, 0x4f6}, // + {0x4f8, 0x4f8}, // + {0x4fa, 0x4fa}, // + {0x4fc, 0x4fc}, // + {0x4fe, 0x4fe}, // + {0x500, 0x500}, // + {0x502, 0x502}, // + {0x504, 0x504}, // + {0x506, 0x506}, // + {0x508, 0x508}, // + {0x50a, 0x50a}, // + {0x50c, 0x50c}, // + {0x50e, 0x50e}, // + {0x510, 0x510}, // + {0x512, 0x512}, // + {0x514, 0x514}, // + {0x516, 0x516}, // + {0x518, 0x518}, // + {0x51a, 0x51a}, // + {0x51c, 0x51c}, // + {0x51e, 0x51e}, // + {0x520, 0x520}, // + {0x522, 0x522}, // + {0x524, 0x524}, // + {0x526, 0x526}, // + {0x528, 0x528}, // + {0x52a, 0x52a}, // + {0x52c, 0x52c}, // + {0x52e, 0x52e}, // + {0x531, 0x556}, // + {0x10a0, 0x10c5}, // + {0x10c7, 0x10c7}, // + {0x10cd, 0x10cd}, // + {0x13a0, 0x13f5}, // + {0x1c90, 0x1cba}, // + {0x1cbd, 0x1cbf}, // + {0x1e00, 0x1e00}, // + {0x1e02, 0x1e02}, // + {0x1e04, 0x1e04}, // + {0x1e06, 0x1e06}, // + {0x1e08, 0x1e08}, // + {0x1e0a, 0x1e0a}, // + {0x1e0c, 0x1e0c}, // + {0x1e0e, 0x1e0e}, // + {0x1e10, 0x1e10}, // + {0x1e12, 0x1e12}, // + {0x1e14, 0x1e14}, // + {0x1e16, 0x1e16}, // + {0x1e18, 0x1e18}, // + {0x1e1a, 0x1e1a}, // + {0x1e1c, 0x1e1c}, // + {0x1e1e, 0x1e1e}, // + {0x1e20, 0x1e20}, // + {0x1e22, 0x1e22}, // + {0x1e24, 0x1e24}, // + {0x1e26, 0x1e26}, // + {0x1e28, 0x1e28}, // + {0x1e2a, 0x1e2a}, // + {0x1e2c, 0x1e2c}, // + {0x1e2e, 0x1e2e}, // + {0x1e30, 0x1e30}, // + {0x1e32, 0x1e32}, // + {0x1e34, 0x1e34}, // + {0x1e36, 0x1e36}, // + {0x1e38, 0x1e38}, // + {0x1e3a, 0x1e3a}, // + {0x1e3c, 0x1e3c}, // + {0x1e3e, 0x1e3e}, // + {0x1e40, 0x1e40}, // + {0x1e42, 0x1e42}, // + {0x1e44, 0x1e44}, // + {0x1e46, 0x1e46}, // + {0x1e48, 0x1e48}, // + {0x1e4a, 0x1e4a}, // + {0x1e4c, 0x1e4c}, // + {0x1e4e, 0x1e4e}, // + {0x1e50, 0x1e50}, // + {0x1e52, 0x1e52}, // + {0x1e54, 0x1e54}, // + {0x1e56, 0x1e56}, // + {0x1e58, 0x1e58}, // + {0x1e5a, 0x1e5a}, // + {0x1e5c, 0x1e5c}, // + {0x1e5e, 0x1e5e}, // + {0x1e60, 0x1e60}, // + {0x1e62, 0x1e62}, // + {0x1e64, 0x1e64}, // + {0x1e66, 0x1e66}, // + {0x1e68, 0x1e68}, // + {0x1e6a, 0x1e6a}, // + {0x1e6c, 0x1e6c}, // + {0x1e6e, 0x1e6e}, // + {0x1e70, 0x1e70}, // + {0x1e72, 0x1e72}, // + {0x1e74, 0x1e74}, // + {0x1e76, 0x1e76}, // + {0x1e78, 0x1e78}, // + {0x1e7a, 0x1e7a}, // + {0x1e7c, 0x1e7c}, // + {0x1e7e, 0x1e7e}, // + {0x1e80, 0x1e80}, // + {0x1e82, 0x1e82}, // + {0x1e84, 0x1e84}, // + {0x1e86, 0x1e86}, // + {0x1e88, 0x1e88}, // + {0x1e8a, 0x1e8a}, // + {0x1e8c, 0x1e8c}, // + {0x1e8e, 0x1e8e}, // + {0x1e90, 0x1e90}, // + {0x1e92, 0x1e92}, // + {0x1e94, 0x1e94}, // + {0x1e9e, 0x1e9e}, // + {0x1ea0, 0x1ea0}, // + {0x1ea2, 0x1ea2}, // + {0x1ea4, 0x1ea4}, // + {0x1ea6, 0x1ea6}, // + {0x1ea8, 0x1ea8}, // + {0x1eaa, 0x1eaa}, // + {0x1eac, 0x1eac}, // + {0x1eae, 0x1eae}, // + {0x1eb0, 0x1eb0}, // + {0x1eb2, 0x1eb2}, // + {0x1eb4, 0x1eb4}, // + {0x1eb6, 0x1eb6}, // + {0x1eb8, 0x1eb8}, // + {0x1eba, 0x1eba}, // + {0x1ebc, 0x1ebc}, // + {0x1ebe, 0x1ebe}, // + {0x1ec0, 0x1ec0}, // + {0x1ec2, 0x1ec2}, // + {0x1ec4, 0x1ec4}, // + {0x1ec6, 0x1ec6}, // + {0x1ec8, 0x1ec8}, // + {0x1eca, 0x1eca}, // + {0x1ecc, 0x1ecc}, // + {0x1ece, 0x1ece}, // + {0x1ed0, 0x1ed0}, // + {0x1ed2, 0x1ed2}, // + {0x1ed4, 0x1ed4}, // + {0x1ed6, 0x1ed6}, // + {0x1ed8, 0x1ed8}, // + {0x1eda, 0x1eda}, // + {0x1edc, 0x1edc}, // + {0x1ede, 0x1ede}, // + {0x1ee0, 0x1ee0}, // + {0x1ee2, 0x1ee2}, // + {0x1ee4, 0x1ee4}, // + {0x1ee6, 0x1ee6}, // + {0x1ee8, 0x1ee8}, // + {0x1eea, 0x1eea}, // + {0x1eec, 0x1eec}, // + {0x1eee, 0x1eee}, // + {0x1ef0, 0x1ef0}, // + {0x1ef2, 0x1ef2}, // + {0x1ef4, 0x1ef4}, // + {0x1ef6, 0x1ef6}, // + {0x1ef8, 0x1ef8}, // + {0x1efa, 0x1efa}, // + {0x1efc, 0x1efc}, // + {0x1efe, 0x1efe}, // + {0x1f08, 0x1f0f}, // + {0x1f18, 0x1f1d}, // + {0x1f28, 0x1f2f}, // + {0x1f38, 0x1f3f}, // + {0x1f48, 0x1f4d}, // + {0x1f59, 0x1f59}, // + {0x1f5b, 0x1f5b}, // + {0x1f5d, 0x1f5d}, // + {0x1f5f, 0x1f5f}, // + {0x1f68, 0x1f6f}, // + {0x1f88, 0x1f8f}, // + {0x1f98, 0x1f9f}, // + {0x1fa8, 0x1faf}, // + {0x1fb8, 0x1fbc}, // + {0x1fc8, 0x1fcc}, // + {0x1fd8, 0x1fdb}, // + {0x1fe8, 0x1fec}, // + {0x1ff8, 0x1ffc}, // + {0x2102, 0x2102}, // + {0x2107, 0x2107}, // + {0x210b, 0x210d}, // + {0x2110, 0x2112}, // + {0x2115, 0x2115}, // + {0x2119, 0x211d}, // + {0x2124, 0x2124}, // + {0x2126, 0x2126}, // + {0x2128, 0x2128}, // + {0x212a, 0x212d}, // + {0x2130, 0x2133}, // + {0x213e, 0x213f}, // + {0x2145, 0x2145}, // + {0x2160, 0x216f}, // + {0x2183, 0x2183}, // + {0x24b6, 0x24cf}, // + {0x2c00, 0x2c2f}, // + {0x2c60, 0x2c60}, // + {0x2c62, 0x2c64}, // + {0x2c67, 0x2c67}, // + {0x2c69, 0x2c69}, // + {0x2c6b, 0x2c6b}, // + {0x2c6d, 0x2c70}, // + {0x2c72, 0x2c72}, // + {0x2c75, 0x2c75}, // + {0x2c7e, 0x2c80}, // + {0x2c82, 0x2c82}, // + {0x2c84, 0x2c84}, // + {0x2c86, 0x2c86}, // + {0x2c88, 0x2c88}, // + {0x2c8a, 0x2c8a}, // + {0x2c8c, 0x2c8c}, // + {0x2c8e, 0x2c8e}, // + {0x2c90, 0x2c90}, // + {0x2c92, 0x2c92}, // + {0x2c94, 0x2c94}, // + {0x2c96, 0x2c96}, // + {0x2c98, 0x2c98}, // + {0x2c9a, 0x2c9a}, // + {0x2c9c, 0x2c9c}, // + {0x2c9e, 0x2c9e}, // + {0x2ca0, 0x2ca0}, // + {0x2ca2, 0x2ca2}, // + {0x2ca4, 0x2ca4}, // + {0x2ca6, 0x2ca6}, // + {0x2ca8, 0x2ca8}, // + {0x2caa, 0x2caa}, // + {0x2cac, 0x2cac}, // + {0x2cae, 0x2cae}, // + {0x2cb0, 0x2cb0}, // + {0x2cb2, 0x2cb2}, // + {0x2cb4, 0x2cb4}, // + {0x2cb6, 0x2cb6}, // + {0x2cb8, 0x2cb8}, // + {0x2cba, 0x2cba}, // + {0x2cbc, 0x2cbc}, // + {0x2cbe, 0x2cbe}, // + {0x2cc0, 0x2cc0}, // + {0x2cc2, 0x2cc2}, // + {0x2cc4, 0x2cc4}, // + {0x2cc6, 0x2cc6}, // + {0x2cc8, 0x2cc8}, // + {0x2cca, 0x2cca}, // + {0x2ccc, 0x2ccc}, // + {0x2cce, 0x2cce}, // + {0x2cd0, 0x2cd0}, // + {0x2cd2, 0x2cd2}, // + {0x2cd4, 0x2cd4}, // + {0x2cd6, 0x2cd6}, // + {0x2cd8, 0x2cd8}, // + {0x2cda, 0x2cda}, // + {0x2cdc, 0x2cdc}, // + {0x2cde, 0x2cde}, // + {0x2ce0, 0x2ce0}, // + {0x2ce2, 0x2ce2}, // + {0x2ceb, 0x2ceb}, // + {0x2ced, 0x2ced}, // + {0x2cf2, 0x2cf2}, // + {0xa640, 0xa640}, // + {0xa642, 0xa642}, // + {0xa644, 0xa644}, // + {0xa646, 0xa646}, // + {0xa648, 0xa648}, // + {0xa64a, 0xa64a}, // + {0xa64c, 0xa64c}, // + {0xa64e, 0xa64e}, // + {0xa650, 0xa650}, // + {0xa652, 0xa652}, // + {0xa654, 0xa654}, // + {0xa656, 0xa656}, // + {0xa658, 0xa658}, // + {0xa65a, 0xa65a}, // + {0xa65c, 0xa65c}, // + {0xa65e, 0xa65e}, // + {0xa660, 0xa660}, // + {0xa662, 0xa662}, // + {0xa664, 0xa664}, // + {0xa666, 0xa666}, // + {0xa668, 0xa668}, // + {0xa66a, 0xa66a}, // + {0xa66c, 0xa66c}, // + {0xa680, 0xa680}, // + {0xa682, 0xa682}, // + {0xa684, 0xa684}, // + {0xa686, 0xa686}, // + {0xa688, 0xa688}, // + {0xa68a, 0xa68a}, // + {0xa68c, 0xa68c}, // + {0xa68e, 0xa68e}, // + {0xa690, 0xa690}, // + {0xa692, 0xa692}, // + {0xa694, 0xa694}, // + {0xa696, 0xa696}, // + {0xa698, 0xa698}, // + {0xa69a, 0xa69a}, // + {0xa722, 0xa722}, // + {0xa724, 0xa724}, // + {0xa726, 0xa726}, // + {0xa728, 0xa728}, // + {0xa72a, 0xa72a}, // + {0xa72c, 0xa72c}, // + {0xa72e, 0xa72e}, // + {0xa732, 0xa732}, // + {0xa734, 0xa734}, // + {0xa736, 0xa736}, // + {0xa738, 0xa738}, // + {0xa73a, 0xa73a}, // + {0xa73c, 0xa73c}, // + {0xa73e, 0xa73e}, // + {0xa740, 0xa740}, // + {0xa742, 0xa742}, // + {0xa744, 0xa744}, // + {0xa746, 0xa746}, // + {0xa748, 0xa748}, // + {0xa74a, 0xa74a}, // + {0xa74c, 0xa74c}, // + {0xa74e, 0xa74e}, // + {0xa750, 0xa750}, // + {0xa752, 0xa752}, // + {0xa754, 0xa754}, // + {0xa756, 0xa756}, // + {0xa758, 0xa758}, // + {0xa75a, 0xa75a}, // + {0xa75c, 0xa75c}, // + {0xa75e, 0xa75e}, // + {0xa760, 0xa760}, // + {0xa762, 0xa762}, // + {0xa764, 0xa764}, // + {0xa766, 0xa766}, // + {0xa768, 0xa768}, // + {0xa76a, 0xa76a}, // + {0xa76c, 0xa76c}, // + {0xa76e, 0xa76e}, // + {0xa779, 0xa779}, // + {0xa77b, 0xa77b}, // + {0xa77d, 0xa77e}, // + {0xa780, 0xa780}, // + {0xa782, 0xa782}, // + {0xa784, 0xa784}, // + {0xa786, 0xa786}, // + {0xa78b, 0xa78b}, // + {0xa78d, 0xa78d}, // + {0xa790, 0xa790}, // + {0xa792, 0xa792}, // + {0xa796, 0xa796}, // + {0xa798, 0xa798}, // + {0xa79a, 0xa79a}, // + {0xa79c, 0xa79c}, // + {0xa79e, 0xa79e}, // + {0xa7a0, 0xa7a0}, // + {0xa7a2, 0xa7a2}, // + {0xa7a4, 0xa7a4}, // + {0xa7a6, 0xa7a6}, // + {0xa7a8, 0xa7a8}, // + {0xa7aa, 0xa7ae}, // + {0xa7b0, 0xa7b4}, // + {0xa7b6, 0xa7b6}, // + {0xa7b8, 0xa7b8}, // + {0xa7ba, 0xa7ba}, // + {0xa7bc, 0xa7bc}, // + {0xa7be, 0xa7be}, // + {0xa7c0, 0xa7c0}, // + {0xa7c2, 0xa7c2}, // + {0xa7c4, 0xa7c7}, // + {0xa7c9, 0xa7c9}, // + {0xa7d0, 0xa7d0}, // + {0xa7d6, 0xa7d6}, // + {0xa7d8, 0xa7d8}, // + {0xa7f5, 0xa7f5}, // + {0xff21, 0xff3a}, // +}; + +static const unsigned kUpperAstral[][2] = { + {0x10400, 0x10427}, // + {0x104b0, 0x104d3}, // + {0x10570, 0x1057a}, // + {0x1057c, 0x1058a}, // + {0x1058c, 0x10592}, // + {0x10594, 0x10595}, // + {0x10c80, 0x10cb2}, // + {0x118a0, 0x118bf}, // + {0x16e40, 0x16e5f}, // + {0x1d400, 0x1d419}, // + {0x1d434, 0x1d44d}, // + {0x1d468, 0x1d481}, // + {0x1d49c, 0x1d49c}, // + {0x1d49e, 0x1d49f}, // + {0x1d4a2, 0x1d4a2}, // + {0x1d4a5, 0x1d4a6}, // + {0x1d4a9, 0x1d4ac}, // + {0x1d4ae, 0x1d4b5}, // + {0x1d4d0, 0x1d4e9}, // + {0x1d504, 0x1d505}, // + {0x1d507, 0x1d50a}, // + {0x1d50d, 0x1d514}, // + {0x1d516, 0x1d51c}, // + {0x1d538, 0x1d539}, // + {0x1d53b, 0x1d53e}, // + {0x1d540, 0x1d544}, // + {0x1d546, 0x1d546}, // + {0x1d54a, 0x1d550}, // + {0x1d56c, 0x1d585}, // + {0x1d5a0, 0x1d5b9}, // + {0x1d5d4, 0x1d5ed}, // + {0x1d608, 0x1d621}, // + {0x1d63c, 0x1d655}, // + {0x1d670, 0x1d689}, // + {0x1d6a8, 0x1d6c0}, // + {0x1d6e2, 0x1d6fa}, // + {0x1d71c, 0x1d734}, // + {0x1d756, 0x1d76e}, // + {0x1d790, 0x1d7a8}, // + {0x1d7ca, 0x1d7ca}, // + {0x1e900, 0x1e921}, // + {0x1f130, 0x1f149}, // + {0x1f150, 0x1f169}, // + {0x1f170, 0x1f189}, // +}; + +/** + * Returns nonzero if c is uppercase letter. + */ +int iswupper(wint_t c) { + if (!IsTiny() && c < 128) + return 'A' <= c && c <= 'Z'; + if (c < 65536) + return has_char(kUpper, ARRAYLEN(kUpper), (unsigned short)c); + return has_char(kUpperAstral, ARRAYLEN(kUpperAstral), (unsigned)c); +} + +__weak_reference(iswupper, iswupper_l); diff --git a/libc/str/iswxdigit.c b/libc/str/iswxdigit.c index 75e4347f2..cccf9e4de 100644 --- a/libc/str/iswxdigit.c +++ b/libc/str/iswxdigit.c @@ -22,7 +22,8 @@ * Returns nonzero if c is ascii hex digit. */ int iswxdigit(wint_t c) { - return ('0' <= c && c <= '9') || ('A' <= c && c <= 'F') || + return ('0' <= c && c <= '9') || // + ('A' <= c && c <= 'F') || // ('a' <= c && c <= 'f'); } diff --git a/libc/str/isxdigit.c b/libc/str/isxdigit.c index 03af7c9cc..a5f325698 100644 --- a/libc/str/isxdigit.c +++ b/libc/str/isxdigit.c @@ -22,7 +22,8 @@ * Returns true if c is hexadecimal digit. */ int isxdigit(int c) { - return ('0' <= c && c <= '9') || ('A' <= c && c <= 'F') || + return ('0' <= c && c <= '9') || // + ('A' <= c && c <= 'F') || // ('a' <= c && c <= 'f'); } diff --git a/libc/str/iszipeocd32.c b/libc/str/iszipeocd32.c index fedc00242..a516b0f21 100644 --- a/libc/str/iszipeocd32.c +++ b/libc/str/iszipeocd32.c @@ -24,32 +24,23 @@ */ int IsZipEocd32(const uint8_t *p, size_t n, size_t i) { size_t offset; - if (i > n || n - i < kZipCdirHdrMinSize) { + if (i > n || n - i < kZipCdirHdrMinSize) return kZipErrorEocdOffsetOverflow; - } - if (ZIP_READ32(p + i) != kZipCdirHdrMagic) { + if (ZIP_READ32(p + i) != kZipCdirHdrMagic) return kZipErrorEocdMagicNotFound; - } - if (i + ZIP_CDIR_HDRSIZE(p + i) > n) { + if (i + ZIP_CDIR_HDRSIZE(p + i) > n) return kZipErrorEocdSizeOverflow; - } - if (ZIP_CDIR_DISK(p + i) != ZIP_CDIR_STARTINGDISK(p + i)) { + if (ZIP_CDIR_DISK(p + i) != ZIP_CDIR_STARTINGDISK(p + i)) return kZipErrorEocdDiskMismatch; - } - if (ZIP_CDIR_RECORDSONDISK(p + i) != ZIP_CDIR_RECORDS(p + i)) { + if (ZIP_CDIR_RECORDSONDISK(p + i) != ZIP_CDIR_RECORDS(p + i)) return kZipErrorEocdRecordsMismatch; - } - if (ZIP_CDIR_RECORDS(p + i) * kZipCfileHdrMinSize > ZIP_CDIR_SIZE(p + i)) { + if (ZIP_CDIR_RECORDS(p + i) * kZipCfileHdrMinSize > ZIP_CDIR_SIZE(p + i)) return kZipErrorEocdRecordsOverflow; - } - if (ZIP_CDIR_OFFSET(p + i) == 0xFFFFFFFFu) { + if (ZIP_CDIR_OFFSET(p + i) == 0xFFFFFFFFu) return kZipErrorEocdRecordsOverflow; - } - if (ckd_add(&offset, ZIP_CDIR_OFFSET(p + i), ZIP_CDIR_SIZE(p + i))) { + if (ckd_add(&offset, ZIP_CDIR_OFFSET(p + i), ZIP_CDIR_SIZE(p + i))) return kZipErrorEocdOffsetSizeOverflow; - } - if (offset > i) { + if (offset > i) return kZipErrorCdirOffsetPastEocd; - } return kZipOk; } diff --git a/libc/str/iswalpha.c b/libc/str/kmp.c similarity index 59% rename from libc/str/iswalpha.c rename to libc/str/kmp.c index 573392d42..d904a4378 100644 --- a/libc/str/iswalpha.c +++ b/libc/str/kmp.c @@ -1,7 +1,7 @@ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ │ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ ╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ Copyright 2024 Justine Alexandra Roberts Tunney │ │ │ │ Permission to use, copy, modify, and/or distribute this software for │ │ any purpose with or without fee is hereby granted, provided that the │ @@ -16,13 +16,59 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/wctype.h" +#include "libc/str/kmp.h" +#include "libc/mem/alloca.h" +#include "libc/runtime/stack.h" -/** - * Returns nonzero if c is alphabetical. - */ -int iswalpha(wint_t c) { - return iswupper(c) || iswlower(c); +static void computeLPS(const char *pattern, long M, long *lps) { + long len = 0; + lps[0] = 0; + long i = 1; + while (i < M) { + if (pattern[i] == pattern[len]) { + len++; + lps[i] = len; + i++; + } else { + if (len != 0) { + len = lps[len - 1]; + } else { + lps[i] = 0; + i++; + } + } + } } -__weak_reference(iswalpha, iswalpha_l); +char *__memmem_kmp(const char *s, size_t n, const char *ss, size_t m) { + if (!m) + return (char *)s; + if (n < m) + return NULL; +#pragma GCC push_options +#pragma GCC diagnostic ignored "-Walloca-larger-than=" +#pragma GCC diagnostic ignored "-Wanalyzer-out-of-bounds" + long need = sizeof(long) * m; + long *lps = (long *)alloca(need); + CheckLargeStackAllocation(lps, need); +#pragma GCC pop_options + computeLPS(ss, m, lps); + long i = 0; + long j = 0; + while (i < n) { + if (ss[j] == s[i]) { + i++; + j++; + } + if (j == m) { + return (char *)(s + i - j); + } else if (i < n && ss[j] != s[i]) { + if (j != 0) { + j = lps[j - 1]; + } else { + i++; + } + } + } + return NULL; +} diff --git a/libc/str/kmp.h b/libc/str/kmp.h new file mode 100644 index 000000000..5c5a85736 --- /dev/null +++ b/libc/str/kmp.h @@ -0,0 +1,10 @@ +#ifndef COSMOPOLITAN_LIBC_STR_KMP_H_ +#define COSMOPOLITAN_LIBC_STR_KMP_H_ +COSMOPOLITAN_C_START_ + +char *__memmem_kmp(const char *, size_t, const char *, size_t); +char16_t *__memmem_kmp16(const char16_t *, size_t, const char16_t *, size_t); +wchar_t *__memmem_kmp32(const wchar_t *, size_t, const wchar_t *, size_t); + +COSMOPOLITAN_C_END_ +#endif /* COSMOPOLITAN_LIBC_STR_KMP_H_ */ diff --git a/libc/mem/bsearch.c b/libc/str/kmp16.c similarity index 58% rename from libc/mem/bsearch.c rename to libc/str/kmp16.c index 5bfc7cb82..0e30f57ad 100644 --- a/libc/mem/bsearch.c +++ b/libc/str/kmp16.c @@ -1,7 +1,7 @@ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ │ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ ╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ Copyright 2024 Justine Alexandra Roberts Tunney │ │ │ │ Permission to use, copy, modify, and/or distribute this software for │ │ any purpose with or without fee is hereby granted, provided that the │ @@ -16,14 +16,60 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/mem/alg.h" -#include "libc/mem/bisect.internal.h" +#include "libc/mem/alloca.h" +#include "libc/runtime/stack.h" +#include "libc/str/kmp.h" -/** - * Searches sorted array for exact item in logarithmic time. - * @see bsearch_r() - */ -void *bsearch(const void *key, const void *base, size_t nmemb, size_t size, - int cmp(const void *a, const void *b)) { - return bisect(key, base, nmemb, size, (void *)cmp, NULL); +static void computeLPS(const char16_t *pattern, long M, long *lps) { + long len = 0; + lps[0] = 0; + long i = 1; + while (i < M) { + if (pattern[i] == pattern[len]) { + len++; + lps[i] = len; + i++; + } else { + if (len != 0) { + len = lps[len - 1]; + } else { + lps[i] = 0; + i++; + } + } + } +} + +char16_t *__memmem_kmp16(const char16_t *s, size_t n, const char16_t *ss, + size_t m) { + if (!m) + return (char16_t *)s; + if (n < m) + return NULL; +#pragma GCC push_options +#pragma GCC diagnostic ignored "-Walloca-larger-than=" +#pragma GCC diagnostic ignored "-Wanalyzer-out-of-bounds" + long need = sizeof(long) * m; + long *lps = (long *)alloca(need); + CheckLargeStackAllocation(lps, need); +#pragma GCC pop_options + computeLPS(ss, m, lps); + long i = 0; + long j = 0; + while (i < n) { + if (ss[j] == s[i]) { + i++; + j++; + } + if (j == m) { + return (char16_t *)(s + i - j); + } else if (i < n && ss[j] != s[i]) { + if (j != 0) { + j = lps[j - 1]; + } else { + i++; + } + } + } + return NULL; } diff --git a/libc/str/towctrans.c b/libc/str/kmp32.c similarity index 58% rename from libc/str/towctrans.c rename to libc/str/kmp32.c index ed928df67..efd1a07a8 100644 --- a/libc/str/towctrans.c +++ b/libc/str/kmp32.c @@ -1,7 +1,7 @@ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ │ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ ╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2022 Justine Alexandra Roberts Tunney │ +│ Copyright 2024 Justine Alexandra Roberts Tunney │ │ │ │ Permission to use, copy, modify, and/or distribute this software for │ │ any purpose with or without fee is hereby granted, provided that the │ @@ -16,12 +16,60 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/wctype.h" +#include "libc/mem/alloca.h" +#include "libc/runtime/stack.h" +#include "libc/str/kmp.h" -wint_t towctrans(wint_t c, wctrans_t t) { - if (t == (wctrans_t)1) - return towupper(c); - if (t == (wctrans_t)2) - return towlower(c); - return c; +static void computeLPS(const wchar_t *pattern, long M, long *lps) { + long len = 0; + lps[0] = 0; + long i = 1; + while (i < M) { + if (pattern[i] == pattern[len]) { + len++; + lps[i] = len; + i++; + } else { + if (len != 0) { + len = lps[len - 1]; + } else { + lps[i] = 0; + i++; + } + } + } +} + +wchar_t *__memmem_kmp32(const wchar_t *s, size_t n, const wchar_t *ss, + size_t m) { + if (!m) + return (wchar_t *)s; + if (n < m) + return NULL; +#pragma GCC push_options +#pragma GCC diagnostic ignored "-Walloca-larger-than=" +#pragma GCC diagnostic ignored "-Wanalyzer-out-of-bounds" + long need = sizeof(long) * m; + long *lps = (long *)alloca(need); + CheckLargeStackAllocation(lps, need); +#pragma GCC pop_options + computeLPS(ss, m, lps); + long i = 0; + long j = 0; + while (i < n) { + if (ss[j] == s[i]) { + i++; + j++; + } + if (j == m) { + return (wchar_t *)(s + i - j); + } else if (i < n && ss[j] != s[i]) { + if (j != 0) { + j = lps[j - 1]; + } else { + i++; + } + } + } + return NULL; } diff --git a/libc/str/kx86processormodels.c b/libc/str/kx86processormodels.c index 9bf5c196e..ba055d84b 100644 --- a/libc/str/kx86processormodels.c +++ b/libc/str/kx86processormodels.c @@ -20,7 +20,6 @@ #include "libc/nexgen32e/x86info.h" const struct X86ProcessorModel kX86ProcessorModels[] = { - /* */ {0x060F, X86_MARCH_CORE2, X86_GRADE_CLIENT}, {0x0616, X86_MARCH_CORE2, X86_GRADE_MOBILE}, {0x0617, X86_MARCH_CORE2, X86_GRADE_SERVER}, @@ -85,7 +84,5 @@ const struct X86ProcessorModel kX86ProcessorModels[] = { {0x06A7, X86_MARCH_ROCKETLAKE, X86_GRADE_CLIENT}, {0x06B7, X86_MARCH_RAPTORLAKE, X86_GRADE_CLIENT}, {0x06BA, X86_MARCH_RAPTORLAKE, X86_GRADE_CLIENT}, - /* */ + {0}, }; - -const size_t kX86ProcessorModelCount = ARRAYLEN(kX86ProcessorModels); diff --git a/libc/str/memmem.c b/libc/str/memmem.c index ef3f721f0..f2b072275 100644 --- a/libc/str/memmem.c +++ b/libc/str/memmem.c @@ -16,49 +16,60 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/dce.h" #include "libc/intrin/likely.h" +#include "libc/str/kmp.h" #include "libc/str/str.h" - -typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16))); +#include "third_party/aarch64/arm_neon.internal.h" +#include "third_party/intel/emmintrin.internal.h" /** * Searches for fixed-length substring in memory region. * + * This function offers assurances against pathological cases, using KMP + * if no progress is being made on the O(nm) vectorized fast path. It is + * important to note that, if `needle` is untrusted, that it not be long + * enough to overflow the stack. That's because KMP needs to allocate an + * array of longs the same length as `needle` and it needs to do it with + * stack memory because this function is safe to call in signal handlers + * * @param haystack is the region of memory to be searched * @param haystacklen is its character count * @param needle contains the memory for which we're searching * @param needlelen is its character count * @return pointer to first result or NULL if not found + * @asyncsignalsafe */ __vex void *memmem(const void *haystack, size_t haystacklen, const void *needle, size_t needlelen) { #if defined(__x86_64__) && !defined(__chibicc__) char c; - xmm_t n; - const xmm_t *v; + __m128i n; + const __m128i *v; unsigned i, k, m; + long progress = 0; const char *p, *q, *e; + long scare = -(needlelen * 10); if (!needlelen) return (void *)haystack; if (UNLIKELY(needlelen > haystacklen)) return 0; q = needle; c = *q; - n = (xmm_t){c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c}; + n = _mm_set1_epi8(c); p = haystack; e = p + haystacklen; k = (uintptr_t)p & 15; - v = (const xmm_t *)((uintptr_t)p & -16); - m = __builtin_ia32_pmovmskb128(*v == n); + v = (const __m128i *)((uintptr_t)p & -16); + m = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_load_si128(v), n)); m >>= k; m <<= k; for (;;) { while (!m) { ++v; + progress += 16; if ((const char *)v >= e) return 0; - m = __builtin_ia32_pmovmskb128(*v == n); + m = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_load_si128(v), n)); } do { k = __builtin_ctzl(m); @@ -66,6 +77,8 @@ __vex void *memmem(const void *haystack, size_t haystacklen, const void *needle, if (UNLIKELY(p + needlelen > e)) return 0; for (i = 1;; ++i) { + if (--progress <= scare) + goto OfferPathologicalAssurances; if (i == needlelen) return (/*unconst*/ char *)p; if (p[i] != q[i]) @@ -74,22 +87,59 @@ __vex void *memmem(const void *haystack, size_t haystacklen, const void *needle, m &= ~(1 << k); } while (m); } -#else - size_t i, j; +OfferPathologicalAssurances: +#elif defined(__aarch64__) && defined(__ARM_NEON) + char c; + uint8x16_t n; + const uint8x16_t *v; + size_t i, k; + uint64_t m; + long progress = 0; + const char *p, *q, *e; + long scare = -(needlelen * 10); if (!needlelen) return (void *)haystack; - if (needlelen > haystacklen) + if (UNLIKELY(needlelen > haystacklen)) return 0; - for (i = 0; i < haystacklen; ++i) { - for (j = 0;; ++j) { - if (j == needlelen) - return (/*unconst*/ char *)haystack + i; - if (i + j == haystacklen) - break; - if (((char *)haystack)[i + j] != ((char *)needle)[j]) - break; + q = needle; + c = *q; + n = vdupq_n_u8(c); + p = haystack; + e = p + haystacklen; + k = (uintptr_t)p & 15; + v = (const uint8x16_t *)((uintptr_t)p & -16); + uint8x16_t cmp = vceqq_u8(vld1q_u8((const uint8_t *)v), n); + uint8x8_t mask = vshrn_n_u16(vreinterpretq_u16_u8(cmp), 4); + vst1_u8((uint8_t *)&m, mask); + m >>= k * 4; + m <<= k * 4; + for (;;) { + while (!m) { + ++v; + progress += 16; + if ((const char *)v >= e) + return 0; + cmp = vceqq_u8(vld1q_u8((const uint8_t *)v), n); + mask = vshrn_n_u16(vreinterpretq_u16_u8(cmp), 4); + vst1_u8((uint8_t *)&m, mask); } + do { + k = __builtin_ctzll(m) >> 2; + p = (const char *)v + k; + if (UNLIKELY(p + needlelen > e)) + return 0; + for (i = 1;; ++i) { + if (--progress <= scare) + goto OfferPathologicalAssurances; + if (i == needlelen) + return (/*unconst*/ char *)p; + if (p[i] != q[i]) + break; + } + m &= ~(0xFULL << (k * 4)); + } while (m); } - return 0; +OfferPathologicalAssurances: #endif + return __memmem_kmp(haystack, haystacklen, needle, needlelen); } diff --git a/libc/str/nonspacing.inc b/libc/str/nonspacing.inc new file mode 100644 index 000000000..7746f3b60 --- /dev/null +++ b/libc/str/nonspacing.inc @@ -0,0 +1,91 @@ +16,16,16,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,16,33,16,16,16,34,35,36, +37,38,39,40,16,16,41,16,16,16,16,16,16,16,16,16,16,16,42,43,16,16,44,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,45,16,46,47,48,49,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,50,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,51,16,16,52, +53,16,54,55,56,16,16,16,16,16,16,57,16,16,58,16,59,60,61,62,63,64,65,66,67,68, +69,70,16,71,72,73,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,74,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,75,76,16,16,16,77,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,78,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,79,80,16,16,16,16,16,16,16,81,16,16,16,16,16,82,83,84,16,16,16,16,16,85, +86,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,248,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,254,255,255,255,255,191,182,0,0,0,0,0,0,0,63,0,255,23,0,0,0,0,0,248,255, +255,0,0,1,0,0,0,0,0,0,0,0,0,0,0,192,191,159,61,0,0,0,128,2,0,0,0,255,255,255, +7,0,0,0,0,0,0,0,0,0,0,192,255,1,0,0,0,0,0,0,248,15,32,0,0,192,251,239,62,0,0, +0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,248,255,255,255,255, +255,7,0,0,0,0,0,0,20,254,33,254,0,12,0,0,0,2,0,0,0,0,0,0,16,30,32,0,0,12,0,0, +64,6,0,0,0,0,0,0,16,134,57,2,0,0,0,35,0,6,0,0,0,0,0,0,16,190,33,0,0,12,0,0, +252,2,0,0,0,0,0,0,144,30,32,64,0,12,0,0,0,4,0,0,0,0,0,0,0,1,32,0,0,0,0,0,0,17, +0,0,0,0,0,0,192,193,61,96,0,12,0,0,0,2,0,0,0,0,0,0,144,64,48,0,0,12,0,0,0,3,0, +0,0,0,0,0,24,30,32,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,4,92,0,0,0,0,0,0,0,0,0,0,0, +242,7,128,127,0,0,0,0,0,0,0,0,0,0,0,0,242,31,0,63,0,0,0,0,0,0,0,0,0,3,0,0,160, +2,0,0,0,0,0,0,254,127,223,224,255,254,255,255,255,31,64,0,0,0,0,0,0,0,0,0,0,0, +0,224,253,102,0,0,0,195,1,0,30,0,100,32,0,32,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,224,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,28,0,0,0,28,0,0,0,12,0,0,0,12,0,0,0,0,0,0,0,176,63,64,254, +15,32,0,0,0,0,0,120,0,0,0,0,0,0,0,0,0,0,0,0,0,0,96,0,0,0,0,2,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,135,1,4,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +128,9,0,0,0,0,0,0,64,127,229,31,248,159,0,0,0,0,0,0,255,127,0,0,0,0,0,0,0,0, +15,0,0,0,0,0,208,23,4,0,0,0,0,248,15,0,3,0,0,0,60,59,0,0,0,0,0,0,64,163,3,0,0, +0,0,0,0,240,207,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,247,255,253,33,16, +3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255, +251,0,248,0,0,0,124,0,0,0,0,0,0,223,255,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255, +255,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,3,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,0,0,0,0, +0,60,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,128,247,63,0,0,0,192,0,0,0,0,0,0,0,0,0,0,3,0,68,8,0,0,96,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,48,0,0,0,255,255,3,128,0,0,0,0,192,63,0,0,128,255,3,0, +0,0,0,0,7,0,0,0,0,0,200,51,0,0,0,0,32,0,0, +0,0,0,0,0,0,126,102,0,8,16,0,0,0,0,0,16,0,0,0,0,0,0,157,193,2,0,0,0,0,48,64,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,32,33,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,0,0,0, +64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,0,0,255, +255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,32,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,192,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,110,240,0, +0,0,0,0,135,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,96,0,0,0,0,0,0,0,240,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,192,255,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,255,127,0,0,0,0,0,0,128, +3,0,0,0,0,0,120,38,0,32,0,0,0,0,0,0,7,0,0,0,128,239,31,0,0,0,0,0,0,0,8,0,3,0, +0,0,0,0,192,127,0,30,0,0,0,0,0,0,0,0,0,0,0,128,211,64,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,128,248,7,0,0,3,0,0,0,0,0,0,24,1,0,0,0,192,31,31,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,255,92,0,0,64,0,0,0,0,0,0,0,0,0,0,248,133,13,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,60,176,1,0,0,48,0,0,0,0,0,0,0,0,0,0, +248,167,1,0,0,0,0,0,0,0,0,0,0,0,0,40,191,0,0,0,0,0,0,0,0,0,0,0,0,224,188,15,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,255,6,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,240,12,1,0,0,0,254,7,0,0,0,0,248,121,128,0,126,14,0,0,0,0,0,252, +127,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,127,191,0,0,0,0,0,0,0,0,0,0,252,255, +255,252,109,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,126,180,191,0,0,0,0,0,0,0,0,0,163,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24,0,0,0,0,0,0,0,255, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,31,0,0,0,0,0,0,0,127,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,7,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,96,15,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,3,248,255,231,15,0,0,0,60,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,28,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255, +255,255,255,255,127,248,255,255,255,255,255,31,32,0,16,0,0,248,254,255,0,0,0, +0,0,0,0,0,0,0,127,255,255,249,219,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,127,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,240,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,127,0,0,0,0,0,0,0,0,0,0,0,0,0,240,7,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, diff --git a/libc/str/strstr.c b/libc/str/strstr.c index 6557ac91a..6b16e51d5 100644 --- a/libc/str/strstr.c +++ b/libc/str/strstr.c @@ -17,74 +17,113 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/str/str.h" -#include "libc/dce.h" - -typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16))); +#include "libc/str/kmp.h" +#include "third_party/aarch64/arm_neon.internal.h" +#include "third_party/intel/immintrin.internal.h" /** * Searches for substring. * + * This function offers assurances against pathological cases, using KMP + * if no progress is being made on the O(nm) vectorized fast path. It is + * important to note that, if `needle` is untrusted, that it not be long + * enough to overflow the stack. That's because KMP needs to allocate an + * array of longs the same length as `needle` and it needs to do it with + * stack memory since POSIX requires this function to be safe to call in + * signal handlers. + * * @param haystack is the search area, as a NUL-terminated string * @param needle is the desired substring, also NUL-terminated * @return pointer to first substring within haystack, or NULL - * @note this implementation goes fast in practice but isn't hardened - * against pathological cases, and therefore shouldn't be used on - * untrustworthy data * @asyncsignalsafe * @see strcasestr() * @see memmem() */ __vex char *strstr(const char *haystack, const char *needle) { + if (haystack == needle || !*needle) + return (char *)haystack; #if defined(__x86_64__) && !defined(__chibicc__) size_t i; unsigned k, m; - const xmm_t *p; - xmm_t v, n, z = {0}; - if (haystack == needle || !*needle) - return (char *)haystack; - n = (xmm_t){*needle, *needle, *needle, *needle, *needle, *needle, - *needle, *needle, *needle, *needle, *needle, *needle, - *needle, *needle, *needle, *needle}; + const __m128i *p; + long progress = 0; + __m128i v, n, z = _mm_setzero_si128(); + const char *hay = haystack; + n = _mm_set1_epi8(*needle); for (;;) { - k = (uintptr_t)haystack & 15; - p = (const xmm_t *)((uintptr_t)haystack & -16); - v = *p; - m = __builtin_ia32_pmovmskb128((v == z) | (v == n)); + k = (uintptr_t)hay & 15; + p = (const __m128i *)((uintptr_t)hay & -16); + v = _mm_load_si128(p); + m = _mm_movemask_epi8( + _mm_or_si128(_mm_cmpeq_epi8(v, z), _mm_cmpeq_epi8(v, n))); m >>= k; m <<= k; while (!m) { - v = *++p; - m = __builtin_ia32_pmovmskb128((v == z) | (v == n)); + progress += 16; + v = _mm_load_si128(++p); + m = _mm_movemask_epi8( + _mm_or_si128(_mm_cmpeq_epi8(v, z), _mm_cmpeq_epi8(v, n))); } - haystack = (const char *)p + __builtin_ctzl(m); + int offset = __builtin_ctzl(m); + progress += offset; + hay = (const char *)p + offset; for (i = 0;; ++i) { + if (--progress <= -512) + goto OfferPathologicalAssurances; if (!needle[i]) - return (/*unconst*/ char *)haystack; - if (!haystack[i]) + return (/*unconst*/ char *)hay; + if (!hay[i]) break; - if (needle[i] != haystack[i]) + if (needle[i] != hay[i]) break; } - if (!*haystack++) + if (!*hay++) break; } return 0; -#else +OfferPathologicalAssurances: +#elif defined(__aarch64__) && defined(__ARM_NEON) size_t i; - if (haystack == needle || !*needle) - return (void *)haystack; + const char *hay = haystack; + uint8x16_t n = vdupq_n_u8(*needle); + uint8x16_t z = vdupq_n_u8(0); + long progress = 0; for (;;) { + int k = (uintptr_t)hay & 15; + hay = (const char *)((uintptr_t)hay & -16); + uint8x16_t v = vld1q_u8((const uint8_t *)hay); + uint8x16_t cmp = vorrq_u8(vceqq_u8(v, z), vceqq_u8(v, n)); + uint8x8_t mask = vshrn_n_u16(vreinterpretq_u16_u8(cmp), 4); + uint64_t m; + vst1_u8((uint8_t *)&m, mask); + m >>= k * 4; + m <<= k * 4; + while (!m) { + hay += 16; + progress += 16; + v = vld1q_u8((const uint8_t *)hay); + cmp = vorrq_u8(vceqq_u8(v, z), vceqq_u8(v, n)); + mask = vshrn_n_u16(vreinterpretq_u16_u8(cmp), 4); + vst1_u8((uint8_t *)&m, mask); + } + int offset = __builtin_ctzll(m) >> 2; + progress += offset; + hay += offset; for (i = 0;; ++i) { + if (--progress <= -512) + goto OfferPathologicalAssurances; if (!needle[i]) - return (/*unconst*/ char *)haystack; - if (!haystack[i]) + return (/*unconst*/ char *)hay; + if (!hay[i]) break; - if (needle[i] != haystack[i]) + if (needle[i] != hay[i]) break; } - if (!*haystack++) + if (!*hay++) break; } return 0; +OfferPathologicalAssurances: #endif + return __memmem_kmp(haystack, strlen(haystack), needle, strlen(needle)); } diff --git a/libc/str/strstr16.c b/libc/str/strstr16.c index aac0f8e3e..a139d0fd4 100644 --- a/libc/str/strstr16.c +++ b/libc/str/strstr16.c @@ -16,6 +16,7 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/str/kmp.h" #include "libc/str/str.h" /** @@ -28,19 +29,5 @@ * @see memmem() */ char16_t *strstr16(const char16_t *haystack, const char16_t *needle) { - size_t i; - for (;;) { - for (i = 0;;) { - if (!needle[i]) - return (/*unconst*/ char16_t *)haystack; - if (!haystack[i]) - break; - if (needle[i] != haystack[i]) - break; - ++i; - } - if (!*haystack++) - break; - } - return NULL; + return __memmem_kmp16(haystack, strlen16(haystack), needle, strlen16(needle)); } diff --git a/libc/str/towlower.c b/libc/str/towlower.c deleted file mode 100644 index eb791adbc..000000000 --- a/libc/str/towlower.c +++ /dev/null @@ -1,236 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2021 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/dce.h" -#include "libc/macros.h" -#include "libc/str/str.h" -/* clang-format off */ - -static const struct { - unsigned short x; - unsigned short y; - short d; -} kLower[] = { - {0x00c0, 0x00d6, +32}, /* 23x À ..Ö → à ..ö Watin */ - {0x00d8, 0x00de, +32}, /* 7x Ø ..Þ → ø ..þ Watin */ - {0x0178, 0x0178, -121}, /* 1x Ÿ ..Ÿ → ÿ ..ÿ Watin-A */ - {0x0179, 0x0179, +1}, /* 1x Ź ..Ź → ź ..ź Watin-A */ - {0x017b, 0x017b, +1}, /* 1x Ż ..Ż → ż ..ż Watin-A */ - {0x017d, 0x017d, +1}, /* 1x Ž ..Ž → ž ..ž Watin-A */ - {0x0181, 0x0181, +210}, /* 1x Ɓ ..Ɓ → ɓ ..ɓ Watin-B */ - {0x0182, 0x0182, +1}, /* 1x Ƃ ..Ƃ → ƃ ..ƃ Watin-B */ - {0x0184, 0x0184, +1}, /* 1x Ƅ ..Ƅ → ƅ ..ƅ Watin-B */ - {0x0186, 0x0186, +206}, /* 1x Ɔ ..Ɔ → ɔ ..ɔ Watin-B */ - {0x0187, 0x0187, +1}, /* 1x Ƈ ..Ƈ → ƈ ..ƈ Watin-B */ - {0x0189, 0x018a, +205}, /* 2x Ɖ ..Ɗ → ɖ ..ɗ Watin-B */ - {0x018b, 0x018b, +1}, /* 1x Ƌ ..Ƌ → ƌ ..ƌ Watin-B */ - {0x018e, 0x018e, +79}, /* 1x Ǝ ..Ǝ → ǝ ..ǝ Watin-B */ - {0x018f, 0x018f, +202}, /* 1x Ə ..Ə → ə ..ə Watin-B */ - {0x0190, 0x0190, +203}, /* 1x Ɛ ..Ɛ → ɛ ..ɛ Watin-B */ - {0x0191, 0x0191, +1}, /* 1x Ƒ ..Ƒ → ƒ ..ƒ Watin-B */ - {0x0193, 0x0193, +205}, /* 1x Ɠ ..Ɠ → ɠ ..ɠ Watin-B */ - {0x0194, 0x0194, +207}, /* 1x Ɣ ..Ɣ → ɣ ..ɣ Watin-B */ - {0x0196, 0x0196, +211}, /* 1x Ɩ ..Ɩ → ɩ ..ɩ Watin-B */ - {0x0197, 0x0197, +209}, /* 1x Ɨ ..Ɨ → ɨ ..ɨ Watin-B */ - {0x0198, 0x0198, +1}, /* 1x Ƙ ..Ƙ → ƙ ..ƙ Watin-B */ - {0x019c, 0x019c, +211}, /* 1x Ɯ ..Ɯ → ɯ ..ɯ Watin-B */ - {0x019d, 0x019d, +213}, /* 1x Ɲ ..Ɲ → ɲ ..ɲ Watin-B */ - {0x019f, 0x019f, +214}, /* 1x Ɵ ..Ɵ → ɵ ..ɵ Watin-B */ - {0x01a0, 0x01a0, +1}, /* 1x Ơ ..Ơ → ơ ..ơ Watin-B */ - {0x01a2, 0x01a2, +1}, /* 1x Ƣ ..Ƣ → ƣ ..ƣ Watin-B */ - {0x01a4, 0x01a4, +1}, /* 1x Ƥ ..Ƥ → ƥ ..ƥ Watin-B */ - {0x01a6, 0x01a6, +218}, /* 1x Ʀ ..Ʀ → ʀ ..ʀ Watin-B */ - {0x01a7, 0x01a7, +1}, /* 1x Ƨ ..Ƨ → ƨ ..ƨ Watin-B */ - {0x01a9, 0x01a9, +218}, /* 1x Ʃ ..Ʃ → ʃ ..ʃ Watin-B */ - {0x01ac, 0x01ac, +1}, /* 1x Ƭ ..Ƭ → ƭ ..ƭ Watin-B */ - {0x01ae, 0x01ae, +218}, /* 1x Ʈ ..Ʈ → ʈ ..ʈ Watin-B */ - {0x01af, 0x01af, +1}, /* 1x Ư ..Ư → ư ..ư Watin-B */ - {0x01b1, 0x01b2, +217}, /* 2x Ʊ ..Ʋ → ʊ ..ʋ Watin-B */ - {0x01b3, 0x01b3, +1}, /* 1x Ƴ ..Ƴ → ƴ ..ƴ Watin-B */ - {0x01b5, 0x01b5, +1}, /* 1x Ƶ ..Ƶ → ƶ ..ƶ Watin-B */ - {0x01b7, 0x01b7, +219}, /* 1x Ʒ ..Ʒ → ʒ ..ʒ Watin-B */ - {0x01b8, 0x01b8, +1}, /* 1x Ƹ ..Ƹ → ƹ ..ƹ Watin-B */ - {0x01bc, 0x01bc, +1}, /* 1x Ƽ ..Ƽ → ƽ ..ƽ Watin-B */ - {0x01c4, 0x01c4, +2}, /* 1x DŽ ..DŽ → dž ..dž Watin-B */ - {0x01c5, 0x01c5, +1}, /* 1x Dž ..Dž → dž ..dž Watin-B */ - {0x01c7, 0x01c7, +2}, /* 1x LJ ..LJ → lj ..lj Watin-B */ - {0x01c8, 0x01c8, +1}, /* 1x Lj ..Lj → lj ..lj Watin-B */ - {0x01ca, 0x01ca, +2}, /* 1x NJ ..NJ → nj ..nj Watin-B */ - {0x01cb, 0x01cb, +1}, /* 1x Nj ..Nj → nj ..nj Watin-B */ - {0x01cd, 0x01cd, +1}, /* 1x Ǎ ..Ǎ → ǎ ..ǎ Watin-B */ - {0x01f1, 0x01f1, +2}, /* 1x DZ ..DZ → dz ..dz Watin-B */ - {0x01f2, 0x01f2, +1}, /* 1x Dz ..Dz → dz ..dz Watin-B */ - {0x01f4, 0x01f4, +1}, /* 1x Ǵ ..Ǵ → ǵ ..ǵ Watin-B */ - {0x01f6, 0x01f6, -97}, /* 1x Ƕ ..Ƕ → ƕ ..ƕ Watin-B */ - {0x01f7, 0x01f7, -56}, /* 1x Ƿ ..Ƿ → ƿ ..ƿ Watin-B */ - {0x0220, 0x0220, -130}, /* 1x Ƞ ..Ƞ → ƞ ..ƞ Watin-B */ - {0x023b, 0x023b, +1}, /* 1x Ȼ ..Ȼ → ȼ ..ȼ Watin-B */ - {0x023d, 0x023d, -163}, /* 1x Ƚ ..Ƚ → ƚ ..ƚ Watin-B */ - {0x0241, 0x0241, +1}, /* 1x Ɂ ..Ɂ → ɂ ..ɂ Watin-B */ - {0x0243, 0x0243, -195}, /* 1x Ƀ ..Ƀ → ƀ ..ƀ Watin-B */ - {0x0244, 0x0244, +69}, /* 1x Ʉ ..Ʉ → ʉ ..ʉ Watin-B */ - {0x0245, 0x0245, +71}, /* 1x Ʌ ..Ʌ → ʌ ..ʌ Watin-B */ - {0x0246, 0x0246, +1}, /* 1x Ɇ ..Ɇ → ɇ ..ɇ Watin-B */ - {0x0248, 0x0248, +1}, /* 1x Ɉ ..Ɉ → ɉ ..ɉ Watin-B */ - {0x024a, 0x024a, +1}, /* 1x Ɋ ..Ɋ → ɋ ..ɋ Watin-B */ - {0x024c, 0x024c, +1}, /* 1x Ɍ ..Ɍ → ɍ ..ɍ Watin-B */ - {0x024e, 0x024e, +1}, /* 1x Ɏ ..Ɏ → ɏ ..ɏ Watin-B */ - {0x0386, 0x0386, +38}, /* 1x Ά ..Ά → ά ..ά Greek */ - {0x0388, 0x038a, +37}, /* 3x Έ ..Ί → έ ..ί Greek */ - {0x038c, 0x038c, +64}, /* 1x Ό ..Ό → ό ..ό Greek */ - {0x038e, 0x038f, +63}, /* 2x Ύ ..Ώ → ύ ..ώ Greek */ - {0x0391, 0x03a1, +32}, /* 17x Α ..Ρ → α ..ρ Greek */ - {0x03a3, 0x03ab, +32}, /* 9x Σ ..Ϋ → σ ..ϋ Greek */ - {0x03dc, 0x03dc, +1}, /* 1x Ϝ ..Ϝ → ϝ ..ϝ Greek */ - {0x03f4, 0x03f4, -60}, /* 1x ϴ ..ϴ → θ ..θ Greek */ - {0x0400, 0x040f, +80}, /* 16x Ѐ ..Џ → ѐ ..џ Cyrillic */ - {0x0410, 0x042f, +32}, /* 32x А ..Я → а ..я Cyrillic */ - {0x0460, 0x0460, +1}, /* 1x Ѡ ..Ѡ → ѡ ..ѡ Cyrillic */ - {0x0462, 0x0462, +1}, /* 1x Ѣ ..Ѣ → ѣ ..ѣ Cyrillic */ - {0x0464, 0x0464, +1}, /* 1x Ѥ ..Ѥ → ѥ ..ѥ Cyrillic */ - {0x0472, 0x0472, +1}, /* 1x Ѳ ..Ѳ → ѳ ..ѳ Cyrillic */ - {0x0490, 0x0490, +1}, /* 1x Ґ ..Ґ → ґ ..ґ Cyrillic */ - {0x0498, 0x0498, +1}, /* 1x Ҙ ..Ҙ → ҙ ..ҙ Cyrillic */ - {0x049a, 0x049a, +1}, /* 1x Қ ..Қ → қ ..қ Cyrillic */ - {0x0531, 0x0556, +48}, /* 38x Ա ..Ֆ → ա ..ֆ Armenian */ - {0x10a0, 0x10c5, +7264}, /* 38x Ⴀ ..Ⴥ → ⴀ ..ⴥ Georgian */ - {0x10c7, 0x10c7, +7264}, /* 1x Ⴧ ..Ⴧ → ⴧ ..ⴧ Georgian */ - {0x10cd, 0x10cd, +7264}, /* 1x Ⴭ ..Ⴭ → ⴭ ..ⴭ Georgian */ - {0x13f0, 0x13f5, +8}, /* 6x Ᏸ ..Ᏽ → ᏸ ..ᏽ Cherokee */ - {0x1c90, 0x1cba, -3008}, /* 43x Ა ..Ჺ → ა ..ჺ Georgian2 */ - {0x1cbd, 0x1cbf, -3008}, /* 3x Ჽ ..Ჿ → ჽ ..ჿ Georgian2 */ - {0x1f08, 0x1f0f, -8}, /* 8x Ἀ ..Ἇ → ἀ ..ἇ Greek2 */ - {0x1f18, 0x1f1d, -8}, /* 6x Ἐ ..Ἕ → ἐ ..ἕ Greek2 */ - {0x1f28, 0x1f2f, -8}, /* 8x Ἠ ..Ἧ → ἠ ..ἧ Greek2 */ - {0x1f38, 0x1f3f, -8}, /* 8x Ἰ ..Ἷ → ἰ ..ἷ Greek2 */ - {0x1f48, 0x1f4d, -8}, /* 6x Ὀ ..Ὅ → ὀ ..ὅ Greek2 */ - {0x1f59, 0x1f59, -8}, /* 1x Ὑ ..Ὑ → ὑ ..ὑ Greek2 */ - {0x1f5b, 0x1f5b, -8}, /* 1x Ὓ ..Ὓ → ὓ ..ὓ Greek2 */ - {0x1f5d, 0x1f5d, -8}, /* 1x Ὕ ..Ὕ → ὕ ..ὕ Greek2 */ - {0x1f5f, 0x1f5f, -8}, /* 1x Ὗ ..Ὗ → ὗ ..ὗ Greek2 */ - {0x1f68, 0x1f6f, -8}, /* 8x Ὠ ..Ὧ → ὠ ..ὧ Greek2 */ - {0x1f88, 0x1f8f, -8}, /* 8x ᾈ ..ᾏ → ᾀ ..ᾇ Greek2 */ - {0x1f98, 0x1f9f, -8}, /* 8x ᾘ ..ᾟ → ᾐ ..ᾗ Greek2 */ - {0x1fa8, 0x1faf, -8}, /* 8x ᾨ ..ᾯ → ᾠ ..ᾧ Greek2 */ - {0x1fb8, 0x1fb9, -8}, /* 2x Ᾰ ..Ᾱ → ᾰ ..ᾱ Greek2 */ - {0x1fba, 0x1fbb, -74}, /* 2x Ὰ ..Ά → ὰ ..ά Greek2 */ - {0x1fbc, 0x1fbc, -9}, /* 1x ᾼ ..ᾼ → ᾳ ..ᾳ Greek2 */ - {0x1fc8, 0x1fcb, -86}, /* 4x Ὲ ..Ή → ὲ ..ή Greek2 */ - {0x1fcc, 0x1fcc, -9}, /* 1x ῌ ..ῌ → ῃ ..ῃ Greek2 */ - {0x1fd8, 0x1fd9, -8}, /* 2x Ῐ ..Ῑ → ῐ ..ῑ Greek2 */ - {0x1fda, 0x1fdb, -100}, /* 2x Ὶ ..Ί → ὶ ..ί Greek2 */ - {0x1fe8, 0x1fe9, -8}, /* 2x Ῠ ..Ῡ → ῠ ..ῡ Greek2 */ - {0x1fea, 0x1feb, -112}, /* 2x Ὺ ..Ύ → ὺ ..ύ Greek2 */ - {0x1fec, 0x1fec, -7}, /* 1x Ῥ ..Ῥ → ῥ ..ῥ Greek2 */ - {0x1ff8, 0x1ff9, -128}, /* 2x Ὸ ..Ό → ὸ ..ό Greek2 */ - {0x1ffa, 0x1ffb, -126}, /* 2x Ὼ ..Ώ → ὼ ..ώ Greek2 */ - {0x1ffc, 0x1ffc, -9}, /* 1x ῼ ..ῼ → ῳ ..ῳ Greek2 */ - {0x2126, 0x2126, -7517}, /* 1x Ω ..Ω → ω ..ω Letterlike */ - {0x212a, 0x212a, -8383}, /* 1x K ..K → k ..k Letterlike */ - {0x212b, 0x212b, -8262}, /* 1x Å ..Å → å ..å Letterlike */ - {0x2132, 0x2132, +28}, /* 1x Ⅎ ..Ⅎ → ⅎ ..ⅎ Letterlike */ - {0x2160, 0x216f, +16}, /* 16x Ⅰ ..Ⅿ → ⅰ ..ⅿ Numbery */ - {0x2183, 0x2183, +1}, /* 1x Ↄ ..Ↄ → ↄ ..ↄ Numbery */ - {0x24b6, 0x24cf, +26}, /* 26x Ⓐ ..Ⓩ → ⓐ ..ⓩ Enclosed */ - {0x2c00, 0x2c2e, +48}, /* 47x Ⰰ ..Ⱞ → ⰰ ..ⱞ Glagolitic */ - {0xff21, 0xff3a, +32}, /* 26x A..Z → a..z Dubs */ -}; - -static const int kAstralLower[][3] = { - {0x10400, 0x10427, +40}, /* 40x 𐐀 ..𐐧 → 𐐨 ..𐑏 Deseret */ - {0x104b0, 0x104d3, +40}, /* 36x 𐒰 ..𐓓 → 𐓘 ..𐓻 Osage */ - {0x1d400, 0x1d419, +26}, /* 26x 𝐀 ..𝐙 → 𝐚 ..𝐳 Math */ - {0x1d43c, 0x1d44d, +26}, /* 18x 𝐼 ..𝑍 → 𝑖 ..𝑧 Math */ - {0x1d468, 0x1d481, +26}, /* 26x 𝑨 ..𝒁 → 𝒂 ..𝒛 Math */ - {0x1d4ae, 0x1d4b5, +26}, /* 8x 𝒮 ..𝒵 → 𝓈 ..𝓏 Math */ - {0x1d4d0, 0x1d4e9, +26}, /* 26x 𝓐 ..𝓩 → 𝓪 ..𝔃 Math */ - {0x1d50d, 0x1d514, +26}, /* 8x 𝔍 ..𝔔 → 𝔧 ..𝔮 Math */ - {0x1d56c, 0x1d585, +26}, /* 26x 𝕬 ..𝖅 → 𝖆 ..𝖟 Math */ - {0x1d5a0, 0x1d5b9, +26}, /* 26x 𝖠 ..𝖹 → 𝖺 ..𝗓 Math */ - {0x1d5d4, 0x1d5ed, +26}, /* 26x 𝗔 ..𝗭 → 𝗮 ..𝘇 Math */ - {0x1d608, 0x1d621, +26}, /* 26x 𝘈 ..𝘡 → 𝘢 ..𝘻 Math */ - {0x1d63c, 0x1d655, -442}, /* 26x 𝘼 ..𝙕 → 𝒂 ..𝒛 Math */ - {0x1d670, 0x1d689, +26}, /* 26x 𝙰 ..𝚉 → 𝚊 ..𝚣 Math */ - {0x1d6a8, 0x1d6b8, +26}, /* 17x 𝚨 ..𝚸 → 𝛂 ..𝛒 Math */ - {0x1d6e2, 0x1d6f2, +26}, /* 17x 𝛢 ..𝛲 → 𝛼 ..𝜌 Math */ - {0x1d71c, 0x1d72c, +26}, /* 17x 𝜜 ..𝜬 → 𝜶 ..𝝆 Math */ - {0x1d756, 0x1d766, +26}, /* 17x 𝝖 ..𝝦 → 𝝰 ..𝞀 Math */ - {0x1d790, 0x1d7a0, -90}, /* 17x 𝞐 ..𝞠 → 𝜶 ..𝝆 Math */ -}; - -/** - * Converts wide character to lower case. - */ -wint_t towlower(wint_t c) { - int m, l, r, n; - if (c < 0200) { - if ('A' <= c && c <= 'Z') { - return c + 32; - } else { - return c; - } - } else if (c <= 0xffff) { - if ((0x0100 <= c && c <= 0x0176) || /* 60x Ā..ā → ā..ŵ Watin-A */ - (0x01de <= c && c <= 0x01ee) || /* 9x Ǟ..Ǯ → ǟ..ǯ Watin-B */ - (0x01f8 <= c && c <= 0x021e) || /* 20x Ǹ..Ȟ → ǹ..ȟ Watin-B */ - (0x0222 <= c && c <= 0x0232) || /* 9x Ȣ..Ȳ → ȣ..ȳ Watin-B */ - (0x1e00 <= c && c <= 0x1eff)) { /*256x Ḁ..Ỿ → ḁ..ỿ Watin-C */ - if (c == 0x0130) return c - 199; - if (c == 0x1e9e) return c; - return c + (~c & 1); - } else if (0x01cf <= c && c <= 0x01db) { - return c + (c & 1); /* 7x Ǐ..Ǜ → ǐ..ǜ Watin-B */ - } else if (0x13a0 <= c && c <= 0x13ef) { - return c + 38864; /* 80x Ꭰ ..Ꮿ → ꭰ ..ꮿ Cherokee */ - } else { - l = 0; - r = n = sizeof(kLower) / sizeof(kLower[0]); - while (l < r) { - m = (l & r) + ((l ^ r) >> 1); // floor((a+b)/2) - if (kLower[m].y < c) { - l = m + 1; - } else { - r = m; - } - } - if (l < n && kLower[l].x <= c && c <= kLower[l].y) { - return c + kLower[l].d; - } else { - return c; - } - } - } else { - l = 0; - r = n = sizeof(kAstralLower) / sizeof(kAstralLower[0]); - while (l < r) { - m = (l & r) + ((l ^ r) >> 1); // floor((a+b)/2) - if (kAstralLower[m][1] < c) { - l = m + 1; - } else { - r = m; - } - } - if (l < n && kAstralLower[l][0] <= c && c <= kAstralLower[l][1]) { - return c + kAstralLower[l][2]; - } else { - return c; - } - } -} - -__weak_reference(towlower, towlower_l); diff --git a/libc/str/towupper.c b/libc/str/towupper.c deleted file mode 100644 index 42dd6f374..000000000 --- a/libc/str/towupper.c +++ /dev/null @@ -1,199 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2021 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/dce.h" -#include "libc/macros.h" -#include "libc/str/str.h" -// clang-format off - -static const struct { - unsigned short x; - unsigned short y; - short d; -} kUpper[] = { - {0x00b5, 0x00b5, +743}, /* 1x µ ..µ → Μ ..Μ Watin */ - {0x00e0, 0x00f6, -32}, /* 23x à ..ö → À ..Ö Watin */ - {0x00f8, 0x00fe, -32}, /* 7x ø ..þ → Ø ..Þ Watin */ - {0x00ff, 0x00ff, +121}, /* 1x ÿ ..ÿ → Ÿ ..Ÿ Watin */ - {0x017a, 0x017a, -1}, /* 1x ź ..ź → Ź ..Ź Watin-A */ - {0x017c, 0x017c, -1}, /* 1x ż ..ż → Ż ..Ż Watin-A */ - {0x017e, 0x017e, -1}, /* 1x ž ..ž → Ž ..Ž Watin-A */ - {0x017f, 0x017f, -300}, /* 1x ſ ..ſ → S ..S Watin-A */ - {0x0180, 0x0180, +195}, /* 1x ƀ ..ƀ → Ƀ ..Ƀ Watin-B */ - {0x0183, 0x0183, -1}, /* 1x ƃ ..ƃ → Ƃ ..Ƃ Watin-B */ - {0x0185, 0x0185, -1}, /* 1x ƅ ..ƅ → Ƅ ..Ƅ Watin-B */ - {0x0188, 0x0188, -1}, /* 1x ƈ ..ƈ → Ƈ ..Ƈ Watin-B */ - {0x018c, 0x018c, -1}, /* 1x ƌ ..ƌ → Ƌ ..Ƌ Watin-B */ - {0x0192, 0x0192, -1}, /* 1x ƒ ..ƒ → Ƒ ..Ƒ Watin-B */ - {0x0195, 0x0195, +97}, /* 1x ƕ ..ƕ → Ƕ ..Ƕ Watin-B */ - {0x0199, 0x0199, -1}, /* 1x ƙ ..ƙ → Ƙ ..Ƙ Watin-B */ - {0x019a, 0x019a, +163}, /* 1x ƚ ..ƚ → Ƚ ..Ƚ Watin-B */ - {0x019e, 0x019e, +130}, /* 1x ƞ ..ƞ → Ƞ ..Ƞ Watin-B */ - {0x01a1, 0x01a1, -1}, /* 1x ơ ..ơ → Ơ ..Ơ Watin-B */ - {0x01a3, 0x01a3, -1}, /* 1x ƣ ..ƣ → Ƣ ..Ƣ Watin-B */ - {0x01a5, 0x01a5, -1}, /* 1x ƥ ..ƥ → Ƥ ..Ƥ Watin-B */ - {0x01a8, 0x01a8, -1}, /* 1x ƨ ..ƨ → Ƨ ..Ƨ Watin-B */ - {0x01ad, 0x01ad, -1}, /* 1x ƭ ..ƭ → Ƭ ..Ƭ Watin-B */ - {0x01b0, 0x01b0, -1}, /* 1x ư ..ư → Ư ..Ư Watin-B */ - {0x01b4, 0x01b4, -1}, /* 1x ƴ ..ƴ → Ƴ ..Ƴ Watin-B */ - {0x01b6, 0x01b6, -1}, /* 1x ƶ ..ƶ → Ƶ ..Ƶ Watin-B */ - {0x01b9, 0x01b9, -1}, /* 1x ƹ ..ƹ → Ƹ ..Ƹ Watin-B */ - {0x01bd, 0x01bd, -1}, /* 1x ƽ ..ƽ → Ƽ ..Ƽ Watin-B */ - {0x01bf, 0x01bf, +56}, /* 1x ƿ ..ƿ → Ƿ ..Ƿ Watin-B */ - {0x01c5, 0x01c5, -1}, /* 1x Dž ..Dž → DŽ ..DŽ Watin-B */ - {0x01c6, 0x01c6, -2}, /* 1x dž ..dž → DŽ ..DŽ Watin-B */ - {0x01c8, 0x01c8, -1}, /* 1x Lj ..Lj → LJ ..LJ Watin-B */ - {0x01c9, 0x01c9, -2}, /* 1x lj ..lj → LJ ..LJ Watin-B */ - {0x01cb, 0x01cb, -1}, /* 1x Nj ..Nj → NJ ..NJ Watin-B */ - {0x01cc, 0x01cc, -2}, /* 1x nj ..nj → NJ ..NJ Watin-B */ - {0x01ce, 0x01ce, -1}, /* 1x ǎ ..ǎ → Ǎ ..Ǎ Watin-B */ - {0x01dd, 0x01dd, -79}, /* 1x ǝ ..ǝ → Ǝ ..Ǝ Watin-B */ - {0x01f2, 0x01f2, -1}, /* 1x Dz ..Dz → DZ ..DZ Watin-B */ - {0x01f3, 0x01f3, -2}, /* 1x dz ..dz → DZ ..DZ Watin-B */ - {0x01f5, 0x01f5, -1}, /* 1x ǵ ..ǵ → Ǵ ..Ǵ Watin-B */ - {0x023c, 0x023c, -1}, /* 1x ȼ ..ȼ → Ȼ ..Ȼ Watin-B */ - {0x023f, 0x0240, +10815}, /* 2x ȿ ..ɀ → Ȿ ..Ɀ Watin-B */ - {0x0242, 0x0242, -1}, /* 1x ɂ ..ɂ → Ɂ ..Ɂ Watin-B */ - {0x0247, 0x0247, -1}, /* 1x ɇ ..ɇ → Ɇ ..Ɇ Watin-B */ - {0x0249, 0x0249, -1}, /* 1x ɉ ..ɉ → Ɉ ..Ɉ Watin-B */ - {0x024b, 0x024b, -1}, /* 1x ɋ ..ɋ → Ɋ ..Ɋ Watin-B */ - {0x024d, 0x024d, -1}, /* 1x ɍ ..ɍ → Ɍ ..Ɍ Watin-B */ - {0x024f, 0x024f, -1}, /* 1x ɏ ..ɏ → Ɏ ..Ɏ Watin-B */ - {0x037b, 0x037d, +130}, /* 3x ͻ ..ͽ → Ͻ ..Ͽ Greek */ - {0x03ac, 0x03ac, -38}, /* 1x ά ..ά → Ά ..Ά Greek */ - {0x03ad, 0x03af, -37}, /* 3x έ ..ί → Έ ..Ί Greek */ - {0x03b1, 0x03c1, -32}, /* 17x α ..ρ → Α ..Ρ Greek */ - {0x03c2, 0x03c2, -31}, /* 1x ς ..ς → Σ ..Σ Greek */ - {0x03c3, 0x03cb, -32}, /* 9x σ ..ϋ → Σ ..Ϋ Greek */ - {0x03cc, 0x03cc, -64}, /* 1x ό ..ό → Ό ..Ό Greek */ - {0x03cd, 0x03ce, -63}, /* 2x ύ ..ώ → Ύ ..Ώ Greek */ - {0x03d0, 0x03d0, -62}, /* 1x ϐ ..ϐ → Β ..Β Greek */ - {0x03d1, 0x03d1, -57}, /* 1x ϑ ..ϑ → Θ ..Θ Greek */ - {0x03d5, 0x03d5, -47}, /* 1x ϕ ..ϕ → Φ ..Φ Greek */ - {0x03d6, 0x03d6, -54}, /* 1x ϖ ..ϖ → Π ..Π Greek */ - {0x03dd, 0x03dd, -1}, /* 1x ϝ ..ϝ → Ϝ ..Ϝ Greek */ - {0x03f0, 0x03f0, -86}, /* 1x ϰ ..ϰ → Κ ..Κ Greek */ - {0x03f1, 0x03f1, -80}, /* 1x ϱ ..ϱ → Ρ ..Ρ Greek */ - {0x03f5, 0x03f5, -96}, /* 1x ϵ ..ϵ → Ε ..Ε Greek */ - {0x0430, 0x044f, -32}, /* 32x а ..я → А ..Я Cyrillic */ - {0x0450, 0x045f, -80}, /* 16x ѐ ..џ → Ѐ ..Џ Cyrillic */ - {0x0461, 0x0461, -1}, /* 1x ѡ ..ѡ → Ѡ ..Ѡ Cyrillic */ - {0x0463, 0x0463, -1}, /* 1x ѣ ..ѣ → Ѣ ..Ѣ Cyrillic */ - {0x0465, 0x0465, -1}, /* 1x ѥ ..ѥ → Ѥ ..Ѥ Cyrillic */ - {0x0473, 0x0473, -1}, /* 1x ѳ ..ѳ → Ѳ ..Ѳ Cyrillic */ - {0x0491, 0x0491, -1}, /* 1x ґ ..ґ → Ґ ..Ґ Cyrillic */ - {0x0499, 0x0499, -1}, /* 1x ҙ ..ҙ → Ҙ ..Ҙ Cyrillic */ - {0x049b, 0x049b, -1}, /* 1x қ ..қ → Қ ..Қ Cyrillic */ - {0x0561, 0x0586, -48}, /* 38x ա ..ֆ → Ա ..Ֆ Armenian */ - {0x10d0, 0x10fa, +3008}, /* 43x ა ..ჺ → Ა ..Ჺ Georgian */ - {0x10fd, 0x10ff, +3008}, /* 3x ჽ ..ჿ → Ჽ ..Ჿ Georgian */ - {0x13f8, 0x13fd, -8}, /* 6x ᏸ ..ᏽ → Ᏸ ..Ᏽ Cherokee */ - {0x214e, 0x214e, -28}, /* 1x ⅎ ..ⅎ → Ⅎ ..Ⅎ Letterlike */ - {0x2170, 0x217f, -16}, /* 16x ⅰ ..ⅿ → Ⅰ ..Ⅿ Numbery */ - {0x2184, 0x2184, -1}, /* 1x ↄ ..ↄ → Ↄ ..Ↄ Numbery */ - {0x24d0, 0x24e9, -26}, /* 26x ⓐ ..ⓩ → Ⓐ ..Ⓩ Enclosed */ - {0x2c30, 0x2c5e, -48}, /* 47x ⰰ ..ⱞ → Ⰰ ..Ⱞ Glagolitic */ - {0x2d00, 0x2d25, -7264}, /* 38x ⴀ ..ⴥ → Ⴀ ..Ⴥ Georgian2 */ - {0x2d27, 0x2d27, -7264}, /* 1x ⴧ ..ⴧ → Ⴧ ..Ⴧ Georgian2 */ - {0x2d2d, 0x2d2d, -7264}, /* 1x ⴭ ..ⴭ → Ⴭ ..Ⴭ Georgian2 */ - {0xff41, 0xff5a, -32}, /* 26x a..z → A..Z Dubs */ -}; - -static const int kAstralUpper[][3] = { - {0x10428, 0x1044f, -40}, /* 40x 𐐨..𐑏 → 𐐀..𐐧 Deseret */ - {0x104d8, 0x104fb, -40}, /* 36x 𐓘..𐓻 → 𐒰..𐓓 Osage */ - {0x1d41a, 0x1d433, -26}, /* 26x 𝐚..𝐳 → 𝐀..𝐙 Math */ - {0x1d456, 0x1d467, -26}, /* 18x 𝑖..𝑧 → 𝐼..𝑍 Math */ - {0x1d482, 0x1d49b, -26}, /* 26x 𝒂..𝒛 → 𝑨..𝒁 Math */ - {0x1d4c8, 0x1d4cf, -26}, /* 8x 𝓈..𝓏 → 𝒮..𝒵 Math */ - {0x1d4ea, 0x1d503, -26}, /* 26x 𝓪..𝔃 → 𝓐..𝓩 Math */ - {0x1d527, 0x1d52e, -26}, /* 8x 𝔧..𝔮 → 𝔍..𝔔 Math */ - {0x1d586, 0x1d59f, -26}, /* 26x 𝖆..𝖟 → 𝕬..𝖅 Math */ - {0x1d5ba, 0x1d5d3, -26}, /* 26x 𝖺..𝗓 → 𝖠..𝖹 Math */ - {0x1d5ee, 0x1d607, -26}, /* 26x 𝗮..𝘇 → 𝗔..𝗭 Math */ - {0x1d622, 0x1d63b, -26}, /* 26x 𝘢..𝘻 → 𝘈..𝘡 Math */ - {0x1d68a, 0x1d6a3, +442}, /* 26x 𝒂..𝒛 → 𝘼..𝙕 Math */ - {0x1d6c2, 0x1d6d2, -26}, /* 26x 𝚊..𝚣 → 𝙰..𝚉 Math */ - {0x1d6fc, 0x1d70c, -26}, /* 17x 𝛂..𝛒 → 𝚨..𝚸 Math */ - {0x1d736, 0x1d746, -26}, /* 17x 𝛼..𝜌 → 𝛢..𝛲 Math */ - {0x1d770, 0x1d780, -26}, /* 17x 𝜶..𝝆 → 𝜜..𝜬 Math */ - {0x1d770, 0x1d756, -26}, /* 17x 𝝰..𝞀 → 𝝖..𝝦 Math */ - {0x1d736, 0x1d790, -90}, /* 17x 𝜶..𝝆 → 𝞐..𝞠 Math */ -}; - -/** - * Converts wide character to upper case. - */ -wint_t towupper(wint_t c) { - int m, l, r, n; - if (c < 0200) { - if ('a' <= c && c <= 'z') { - return c - 32; - } else { - return c; - } - } else if (c <= 0xffff) { - if ((0x0101 <= c && c <= 0x0177) || /* 60x ā..ŵ → Ā..ā Watin-A */ - (0x01df <= c && c <= 0x01ef) || /* 9x ǟ..ǯ → Ǟ..Ǯ Watin-B */ - (0x01f8 <= c && c <= 0x021e) || /* 20x ǹ..ȟ → Ǹ..Ȟ Watin-B */ - (0x0222 <= c && c <= 0x0232) || /* 9x ȣ..ȳ → Ȣ..Ȳ Watin-B */ - (0x1e01 <= c && c <= 0x1eff)) { /*256x ḁ..ỿ → Ḁ..Ỿ Watin-C */ - if (c == 0x0131) return c + 232; - if (c == 0x1e9e) return c; - return c - (c & 1); - } else if (0x01d0 <= c && c <= 0x01dc) { - return c - (~c & 1); /* 7x ǐ..ǜ → Ǐ..Ǜ Watin-B */ - } else if (0xab70 <= c && c <= 0xabbf) { - return c - 38864; /* 80x ꭰ ..ꮿ → Ꭰ ..Ꮿ Cherokee Supplement */ - } else { - l = 0; - r = n = sizeof(kUpper) / sizeof(kUpper[0]); - while (l < r) { - m = (l & r) + ((l ^ r) >> 1); // floor((a+b)/2) - if (kUpper[m].y < c) { - l = m + 1; - } else { - r = m; - } - } - if (l < n && kUpper[l].x <= c && c <= kUpper[l].y) { - return c + kUpper[l].d; - } else { - return c; - } - } - } else { - l = 0; - r = n = sizeof(kAstralUpper) / sizeof(kAstralUpper[0]); - while (l < r) { - m = (l & r) + ((l ^ r) >> 1); // floor((a+b)/2) - if (kAstralUpper[m][1] < c) { - l = m + 1; - } else { - r = m; - } - } - if (l < n && kAstralUpper[l][0] <= c && c <= kAstralUpper[l][1]) { - return c + kAstralUpper[l][2]; - } else { - return c; - } - } -} - -__weak_reference(towupper, towupper_l); diff --git a/libc/str/wcsstr.c b/libc/str/wcsstr.c index 1867ecd93..bbf064c59 100644 --- a/libc/str/wcsstr.c +++ b/libc/str/wcsstr.c @@ -16,6 +16,7 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/str/kmp.h" #include "libc/str/str.h" /** @@ -28,19 +29,5 @@ * @see memmem() */ wchar_t *wcsstr(const wchar_t *haystack, const wchar_t *needle) { - size_t i; - for (;;) { - for (i = 0;;) { - if (!needle[i]) - return (/*unconst*/ wchar_t *)haystack; - if (!haystack[i]) - break; - if (needle[i] != haystack[i]) - break; - ++i; - } - if (!*haystack++) - break; - } - return NULL; + return __memmem_kmp32(haystack, wcslen(haystack), needle, wcslen(needle)); } diff --git a/libc/str/wctrans.c b/libc/str/wctrans.c deleted file mode 100644 index 19c4fa376..000000000 --- a/libc/str/wctrans.c +++ /dev/null @@ -1,28 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2022 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/str/str.h" -#include "libc/wctype.h" - -wctrans_t wctrans(const char *s) { - if (!strcmp(s, "toupper")) - return (wctrans_t)1; - if (!strcmp(s, "tolower")) - return (wctrans_t)2; - return 0; -} diff --git a/libc/str/wcwidth.c b/libc/str/wcwidth.c index 66a7a9113..fb8e076cb 100644 --- a/libc/str/wcwidth.c +++ b/libc/str/wcwidth.c @@ -1,44 +1,61 @@ -/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ +/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ +│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ +╚──────────────────────────────────────────────────────────────────────────────╝ │ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ +│ Musl Libc │ +│ Copyright © 2005-2014 Rich Felker, et al. │ +│ │ +│ Permission is hereby granted, free of charge, to any person obtaining │ +│ a copy of this software and associated documentation files (the │ +│ "Software"), to deal in the Software without restriction, including │ +│ without limitation the rights to use, copy, modify, merge, publish, │ +│ distribute, sublicense, and/or sell copies of the Software, and to │ +│ permit persons to whom the Software is furnished to do so, subject to │ +│ the following conditions: │ +│ │ +│ The above copyright notice and this permission notice shall be │ +│ included in all copies or substantial portions of the Software. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ +│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ +│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ +│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ +│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ +│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ +│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ │ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/intrin/likely.h" #include "libc/str/unicode.h" -#include "libc/str/wcwidth_osx.internal.h" -#include "libc/wctype.h" +__static_yoink("musl_libc_notice"); +// clang-format off -/** - * Returns cell width of monospace character. - */ -int wcwidth(wchar_t c) { - int res; - if (LIKELY(32 <= c && c < 127)) - return 1; - if (VERY_UNLIKELY((uint32_t)c >= 0x100000)) { - if ((uint32_t)c <= 0x10FFFD) - return 1; - return -1; - } - res = _wcwidth_osx(c); - if (VERY_UNLIKELY(!res)) { - if (!c) - return 0; - if (iswcntrl(c)) - return -1; - } - return res; +static const unsigned char table[] = { +#include "nonspacing.inc" +}; + +static const unsigned char wtable[] = { +#include "wide.inc" +}; + +int wcwidth(wchar_t wc) +{ + if (wc < 0xff) { + if (wc >= 0) + return ((wc+1) & 0x7f) >= 0x21 ? 1 : wc ? -1 : 0; + return -1; + } + if ((wc & 0xfffeffffU) < 0xfffe) { + if ((table[table[wc>>8]*32+((wc&255)>>3)]>>(wc&7))&1) + return 0; + if ((wtable[wtable[wc>>8]*32+((wc&255)>>3)]>>(wc&7))&1) + return 2; + return 1; + } + if ((wc & 0xfffe) == 0xfffe) + return -1; + if (wc-0x20000U < 0x20000) + return 2; + if (wc == 0xe0001 || wc-0xe0020U < 0x5f || wc-0xe0100U < 0xef) + return 0; + return 1; } diff --git a/libc/str/wcwidth_osx.c b/libc/str/wcwidth_osx.c deleted file mode 100644 index 43a1bda9d..000000000 --- a/libc/str/wcwidth_osx.c +++ /dev/null @@ -1,238 +0,0 @@ -// Copyright (c) 2012 Byron Lai -// -// Permission is hereby granted, free of charge, to any person obtaining -// a copy of this software and associated documentation files (the -// "Software"), to deal in the Software without restriction, including -// without limitation the rights to use, copy, modify, merge, publish, -// distribute, sublicense, and/or sell copies of the Software, and to -// permit persons to whom the Software is furnished to do so, subject to -// the following conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -// ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -// CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include "libc/macros.h" -#include "libc/str/wcwidth_osx.internal.h" - -const uint8_t kWcwidthOsxIndex1[] = { - 0, 16, 26, 33, 34, 50, 56, 72, 88, 104, 107, 107, 107, 107, - 115, 127, 143, 143, 143, 143, 143, 156, 160, 164, 178, 178, 178, 178, - 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, - 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, - 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, - 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, - 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, - 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, - 178, 178, 178, 178, 178, 178, 178, 178, 194, 194, 194, 194, 194, 194, - 194, 195, 211, 211, 211, 211, 211, 211, 211, 212, -}; - -const uint16_t kWcwidthOsxIndex2[] = { - 0, 8, 22, 38, 54, 70, 86, 102, 118, 134, 150, 163, 179, 195, 211, - 227, 243, 256, 272, 284, 299, 305, 321, 336, 352, 368, 376, 376, 376, 376, - 376, 376, 379, 393, 393, 393, 393, 393, 393, 393, 393, 393, 393, 393, 393, - 393, 393, 393, 393, 396, 412, 412, 424, 439, 455, 471, 487, 487, 487, 487, - 487, 487, 487, 487, 487, 487, 487, 490, 504, 504, 504, 504, 520, 520, 520, - 520, 520, 520, 520, 520, 520, 520, 520, 520, 529, 544, 559, 575, 591, 607, - 623, 629, 645, 661, 664, 664, 664, 664, 664, 664, 664, 664, 664, 664, 680, - 685, 701, 705, 705, 705, 705, 705, 705, 705, 705, 705, 705, 705, 705, 705, - 705, 705, 705, 721, 737, 753, 764, 780, 780, 780, 780, 780, 780, 780, 780, - 796, 801, 801, 801, 801, 801, 801, 801, 817, 817, 817, 817, 817, 817, 817, - 817, 817, 817, 817, 817, 817, 817, 817, 817, 827, 834, 834, 834, 834, 834, - 834, 834, 834, 834, 834, 834, 834, 834, 834, 834, 834, 850, 866, 867, 867, - 867, 867, 867, 867, 867, 867, 867, 867, 867, 867, 867, 867, 867, 867, 883, - 883, 883, 883, 883, 883, 883, 883, 883, 883, 883, 883, 883, 883, 883, 883, - 884, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900, - 900, 900, 901, -}; - -const uint32_t kWcwidthOsxIndex3[] = { - 0, 32, 32, 33, 64, 96, 96, 96, 96, 96, 96, 96, - 96, 96, 96, 96, 128, 128, 128, 144, 175, 205, 208, 208, - 208, 208, 237, 247, 247, 247, 247, 275, 292, 316, 340, 351, - 381, 402, 428, 457, 478, 510, 527, 527, 537, 564, 582, 600, - 619, 632, 632, 658, 690, 711, 738, 738, 738, 738, 738, 738, - 738, 738, 767, 773, 804, 834, 866, 889, 920, 951, 980, 1003, - 1034, 1065, 1094, 1117, 1148, 1180, 1210, 1233, 1263, 1294, 1323, 1355, - 1384, 1410, 1441, 1464, 1495, 1527, 1559, 1582, 1611, 1643, 1673, 1696, - 1727, 1759, 1791, 1817, 1849, 1881, 1912, 1927, 1958, 1986, 2017, 2049, - 2081, 2111, 2143, 2169, 2195, 2214, 2240, 2252, 2282, 2303, 2335, 2354, - 2380, 2406, 2412, 2442, 2468, 2484, 2516, 2516, 2522, 2554, 2554, 2554, - 2554, 2554, 2586, 2586, 2609, 2641, 2664, 2680, 2710, 2734, 2749, 2773, - 2778, 2810, 2813, 2845, 2845, 2856, 2887, 2888, 2888, 2888, 2888, 2888, - 2888, 2888, 2888, 2888, 2888, 2888, 2888, 2888, 2888, 2888, 2897, 2929, - 2961, 2961, 2976, 3008, 3040, 3072, 3104, 3136, 3148, 3178, 3210, 3242, - 3274, 3274, 3282, 3314, 3337, 3348, 3348, 3380, 3409, 3441, 3459, 3491, - 3513, 3535, 3565, 3574, 3606, 3606, 3606, 3606, 3606, 3606, 3606, 3634, - 3646, 3676, 3697, 3729, 3750, 3776, 3776, 3808, 3816, 3830, 3843, 3875, - 3875, 3875, 3875, 3907, 3907, 3907, 3907, 3907, 3907, 3939, 3964, 3996, - 3996, 3996, 3996, 3996, 3996, 3996, 3996, 4006, 4038, 4064, 4095, 4127, - 4138, 4154, 4183, 4215, 4239, 4254, 4286, 4306, 4338, 4360, 4376, 4408, - 4408, 4424, 4443, 4466, 4482, 4482, 4482, 4482, 4482, 4482, 4482, 4482, - 4482, 4505, 4516, 4516, 4516, 4516, 4516, 4540, 4572, 4597, 4629, 4661, - 4661, 4661, 4661, 4661, 4661, 4661, 4661, 4661, 4661, 4661, 4661, 4661, - 4663, 4695, 4723, 4727, 4758, 4782, 4802, 4833, 4844, 4868, 4888, 4904, - 4904, 4904, 4904, 4904, 4904, 4904, 4904, 4904, 4904, 4904, 4904, 4904, - 4904, 4904, 4904, 4923, 4944, 4944, 4944, 4944, 4944, 4976, 4993, 5009, - 5024, 5056, 5056, 5056, 5077, 5102, 5128, 5144, 5170, 5202, 5234, 5234, - 5266, 5281, 5298, 5298, 5330, 5357, 5357, 5369, 5401, 5401, 5401, 5401, - 5401, 5401, 5411, 5433, 5465, 5487, 5519, 5520, 5529, 5556, 5556, 5556, - 5588, 5606, 5623, 5623, 5640, 5656, 5664, 5680, 5696, 5728, 5756, 5772, - 5772, 5772, 5772, 5773, 5805, 5805, 5805, 5805, 5805, 5805, 5805, 5805, - 5805, 5805, 5805, 5805, 5805, 5805, 5805, 5805, 5815, 5847, 5847, 5847, - 5847, 5847, 5847, 5847, 5847, 5847, 5847, 5847, 5847, 5847, 5847, 5847, - 5847, 5851, 5879, 5879, 5911, 5911, 5911, 5911, 5911, 5911, 5911, 5911, - 5911, 5911, 5911, 5911, 5911, 5911, 5911, 5911, 5930, 5946, 5971, 5978, - 6010, 6010, 6010, 6010, 6010, 6010, 6010, 6010, 6030, 6062, 6094, 6122, - 6146, 6146, 6146, 6178, 6178, 6178, 6178, 6197, 6210, 6210, 6215, 6245, - 6272, 6304, 6312, 6344, 6344, 6371, 6397, 6429, 6429, 6441, 6473, 6473, - 6473, 6473, 6473, 6505, 6514, 6546, 6578, 6578, 6578, 6578, 6578, 6578, - 6578, 6578, 6578, 6578, 6578, 6578, 6578, 6610, 6610, 6610, 6610, 6610, - 6610, 6610, 6610, 6610, 6610, 6610, 6610, 6610, 6610, 6610, 6610, 6638, - 6642, 6642, 6642, 6642, 6642, 6642, 6642, 6642, 6642, 6642, 6642, 6642, - 6642, 6642, 6642, 6642, 6674, 6674, 6674, 6674, 6674, 6674, 6674, 6674, - 6674, 6674, 6674, 6674, 6674, 6674, 6674, 6674, 6690, 6722, 6722, 6722, - 6722, 6722, 6722, 6722, 6722, 6740, 6756, 6777, 6793, 6793, 6799, 6825, - 6857, 6888, 6920, 6926, 6926, 6940, 6958, 6977, 6977, 6977, 6977, 6977, - 6977, 6977, 6977, 6977, 6977, 7009, 7025, 7041, 7059, 7083, 7099, 7129, - 7157, 7173, 7198, 7220, 7220, 7220, 7223, 7254, 7255, 7255, 7286, 7287, - 7288, 7319, 7351, 7383, 7388, 7419, 7449, 7481, 7481, 7481, 7486, 7518, - 7530, 7553, 7553, 7574, 7602, 7618, 7634, 7664, 7664, 7664, 7664, 7696, - 7728, 7743, 7760, 7792, 7819, 7840, 7851, 7883, 7914, 7942, 7964, 7996, - 7996, 7996, 7996, 7998, 8018, 8028, 8028, 8028, 8028, 8028, 8028, 8028, - 8028, 8028, 8028, 8028, 8028, 8028, 8028, 8028, 8028, 8060, 8070, 8102, - 8102, 8102, 8102, 8102, 8102, 8134, 8166, 8166, 8166, 8166, 8166, 8166, - 8166, 8198, 8223, 8255, 8280, 8280, 8280, 8280, 8280, 8280, 8280, 8280, - 8280, 8280, 8280, 8280, 8280, 8280, 8280, 8280, 8312, 8312, 8312, 8312, - 8312, 8312, 8312, 8312, 8312, 8312, 8312, 8312, 8312, 8312, 8312, 8312, - 8329, 8344, 8344, 8344, 8344, 8376, 8376, 8376, 8405, 8425, 8425, 8425, - 8425, 8425, 8425, 8425, 8425, 8425, 8425, 8425, 8425, 8425, 8425, 8425, - 8425, 8457, 8457, 8457, 8457, 8457, 8457, 8457, 8467, 8499, 8524, 8533, - 8558, 8587, 8609, 8623, 8653, 8685, 8685, 8715, 8721, 8721, 8721, 8721, - 8721, 8753, 8753, 8762, 8767, 8785, 8785, 8785, 8785, 8817, 8817, 8828, - 8850, 8853, 8885, 8914, 8919, 8945, 8975, 9007, 9025, 9025, 9025, 9025, - 9025, 9051, 9059, 9059, 9059, 9059, 9059, 9059, 9059, 9059, 9079, 9093, - 9125, 9125, 9125, 9125, 9125, 9125, 9125, 9125, 9125, 9125, 9125, 9125, - 9125, 9125, 9125, 9125, 9157, 9177, 9193, 9193, 9205, 9225, 9225, 9225, - 9225, 9225, 9225, 9225, 9225, 9225, 9225, 9225, 9225, 9225, 9225, 9225, - 9225, 9257, 9257, 9257, 9257, 9257, 9257, 9257, 9257, 9257, 9257, 9257, - 9257, 9257, 9257, 9257, 9257, 9266, 9289, 9289, 9289, 9289, 9289, 9289, - 9289, 9289, 9289, 9289, 9289, 9289, 9289, 9289, 9289, 9289, 9321, 9321, - 9321, 9321, 9321, 9321, 9321, 9321, 9321, 9321, 9321, 9321, 9321, 9321, - 9321, 9321, 9323, 9353, 9353, 9353, 9353, 9353, 9353, 9353, 9353, 9353, - 9353, 9353, 9353, 9353, 9353, 9353, 9353, 9385, 9385, 9385, 9385, 9385, - 9385, 9385, 9385, 9385, 9385, 9385, 9385, 9385, 9385, 9385, 9385, 9387, - 9419, 9419, 9419, 9419, 9419, 9419, 9419, 9419, 9419, 9419, 9419, 9419, - 9419, 9419, 9419, 9419, 9421, -}; - -const uint32_t kWcwidthOsx[] = { - 0x00000000, 0x00000000, 0x55555555, 0x55555555, 0x00000000, 0x00000000, - 0x55555555, 0x55555555, 0x00000000, 0x00000000, 0x15505555, 0x54455540, - 0x15555555, 0x55555555, 0x55555555, 0x55554000, 0x55555555, 0x00001555, - 0x55555500, 0x54155555, 0x55555555, 0x14555555, 0x00000000, 0x04000000, - 0x54000041, 0x01555555, 0x00001550, 0x00555550, 0x55505550, 0x55555555, - 0x00015555, 0x50000000, 0x45555555, 0x55555555, 0x15555555, 0x04140000, - 0x55555550, 0x55551055, 0x00005555, 0x55550000, 0x55555555, 0x00005555, - 0x00000040, 0x55555550, 0x55555555, 0x55400005, 0x00000005, 0x00000000, - 0x55555550, 0x15555555, 0x54000150, 0x55000101, 0x55555055, 0x54000155, - 0x15554505, 0x55555414, 0x40455545, 0x40015015, 0x40001141, 0x54014500, - 0x55555555, 0x15544005, 0x55554140, 0x14555455, 0x00140145, 0x00400000, - 0x40011540, 0x15415555, 0x55440000, 0x55545455, 0x45554555, 0x01501551, - 0x01014400, 0x05000000, 0x04555550, 0x45000000, 0x54141555, 0x55455555, - 0x50155145, 0x00505040, 0x51401000, 0x55555505, 0x10000000, 0x54540555, - 0x50144501, 0x55540540, 0x50140155, 0x40010151, 0x55550000, 0x01555555, - 0x45555150, 0x55555545, 0x54555551, 0x00550405, 0x40000000, 0x54154001, - 0x40001555, 0x55141555, 0x55545455, 0x55551555, 0x55405545, 0x00001454, - 0x01440005, 0x05155554, 0x51400000, 0x55454555, 0x55515555, 0x54055555, - 0x00545440, 0x40001000, 0x55555415, 0x15550155, 0x55555514, 0x55540555, - 0x55155555, 0x55541155, 0x00150000, 0x00015554, 0x05400000, 0x55540000, - 0x55555555, 0x00145555, 0x01555000, 0x55555400, 0x00000005, 0x00000000, - 0x10450450, 0x55515400, 0x51411151, 0x10000145, 0x00004554, 0x14155554, - 0x00000000, 0x40000000, 0x55555555, 0x55541555, 0x45555555, 0x55155544, - 0x55555555, 0x00000015, 0x00550400, 0x00000000, 0x55500000, 0x15551554, - 0x00000000, 0x40000000, 0x55555555, 0x15555555, 0x55105440, 0x55555555, - 0x55555055, 0x55555555, 0x55500555, 0x55555555, 0x00055555, 0x55555500, - 0x55555555, 0xaaaaaa01, 0xaaaaaaaa, 0x000800aa, 0x00000000, 0x55500000, - 0x55555555, 0x15455555, 0x15445554, 0x55555554, 0x55555555, 0x55550551, - 0x05515555, 0x50551555, 0x51555555, 0x55555555, 0x45555555, 0x55555415, - 0x55555555, 0x55500155, 0x55555555, 0x54001555, 0x55555555, 0x01555555, - 0x55550000, 0x55555555, 0x00005555, 0x55555554, 0x05555555, 0x55555554, - 0x55555555, 0x00000001, 0x51555555, 0x00000005, 0x55555555, 0x00001405, - 0x55555555, 0x00000005, 0x51555555, 0x00000001, 0x55555555, 0x55555555, - 0x55500010, 0x50000014, 0x55501555, 0x55500055, 0x55500055, 0x55510155, - 0x55500055, 0x55555555, 0x00055555, 0x55555550, 0x55555555, 0x00000045, - 0x00000000, 0x55555500, 0x55555555, 0x00005501, 0x00055514, 0x55555404, - 0x55555555, 0x00005541, 0x55555540, 0x55555555, 0x55540015, 0x40015555, - 0x54015555, 0x55555555, 0x41555555, 0x00000505, 0x00000000, 0x55555000, - 0x55555555, 0x45440045, 0x55005555, 0x00555555, 0x05555400, 0x55555554, - 0x55555555, 0x55555501, 0x00000000, 0x00000000, 0x55555555, 0x55555555, - 0x55555540, 0x55555555, 0x00000015, 0x00000000, 0x55555540, 0x55555555, - 0x50000015, 0x55555555, 0x00000015, 0x55000000, 0x55555555, 0x50555555, - 0x55555055, 0x55555555, 0x05550555, 0x44445555, 0x55555555, 0x41555555, - 0x55555555, 0x15555555, 0x05555555, 0x55554555, 0x54541555, 0x55554555, - 0x55554005, 0x50001555, 0x55555555, 0x05555555, 0x50000010, 0x55555550, - 0x00001551, 0x55555550, 0x00005555, 0x00000000, 0x00010000, 0x55550000, - 0x55555555, 0x55405555, 0x55555555, 0x00155555, 0x55555550, 0x55555555, - 0x555555a5, 0x55555555, 0x00000055, 0x55000000, 0x55555555, 0x00555555, - 0x00000000, 0x55555400, 0x00000000, 0x55555400, 0x55555555, 0x55554155, - 0x55555555, 0x00001555, 0x00000000, 0x55154000, 0x55555550, 0x55554555, - 0x45555555, 0x55510154, 0x55555551, 0x55555555, 0x55555501, 0x55555455, - 0x55550115, 0x55555555, 0x55405555, 0x00000000, 0x00000000, 0x55555555, - 0x55555555, 0x55555554, 0x55555555, 0x05555554, 0x55555555, 0x55555555, - 0x50000000, 0x55555555, 0x05555555, 0x55550000, 0x55555555, 0x00005555, - 0x00000004, 0x55555550, 0x00015555, 0x55515550, 0x55515551, 0x55555551, - 0x55555555, 0x00000005, 0x00000000, 0xaaaaaaa0, 0xa8aaaaaa, 0xaaaaaaaa, - 0x02aaaaaa, 0xaaa80000, 0xaaaaaaaa, 0x0002aaaa, 0xaaa80000, 0xaaa802aa, - 0xaaaaaaaa, 0x8002aaaa, 0x1aaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaa00, - 0xaaaaaaaa, 0xaaa800aa, 0xaaaaaaaa, 0xaaaa80aa, 0xaaaaaaaa, 0xaaaa2aaa, - 0xaaaaaaaa, 0x00000000, 0xaaaaaaaa, 0x2aaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, - 0xaa000000, 0xaaaaaaaa, 0xa8aaaaaa, 0xaaaaaaaa, 0x02aaaaaa, 0xaaaa8000, - 0xaaaaaaaa, 0x00002aaa, 0x00000000, 0xaaaa8000, 0xaaaaaaaa, 0xaaa02aaa, - 0xaaaaaaaa, 0x000aaaaa, 0x00000000, 0x55500000, 0x55555555, 0x00055555, - 0x50000000, 0x55555555, 0x05555555, 0x55555555, 0x55500005, 0x55555555, - 0x00000005, 0x00000000, 0x55555550, 0x55555555, 0x00000005, 0x00000000, - 0x55151550, 0x55555554, 0x00554155, 0x00000000, 0x55555555, 0x55555555, - 0x55550000, 0x55555555, 0x00005555, 0x01555554, 0x00000000, 0x54000000, - 0x55555555, 0x01555555, 0x00010000, 0x00000000, 0x55540000, 0x55555555, - 0x00015555, 0x55555550, 0x50555550, 0x00000005, 0x00000000, 0xaaaaaaa0, - 0xaaaaaaaa, 0x0000000a, 0x00000000, 0x55555550, 0x55555555, 0x00000005, - 0xaaaaaaa0, 0xaaaaaaaa, 0xaaaaaa0a, 0xaaaaaaaa, 0xaaa800aa, 0xaaaaaaaa, - 0x0002aaaa, 0x00000000, 0x55540000, 0x55000000, 0x55551001, 0x15555555, - 0x51451155, 0x55555555, 0x05555555, 0x00000000, 0x55555554, 0x55555555, - 0x00000001, 0x55555554, 0x55555555, 0x55555541, 0x55555555, 0x00000015, - 0x55400000, 0x00015555, 0xaaa80000, 0x0054002a, 0xaaaaa800, 0xaaa8aaaa, - 0x500aa2aa, 0x55555515, 0x55555555, 0xaaaa8055, 0xaaaaaaaa, 0x55556aaa, - 0x55555555, 0x15541555, 0x15541554, 0x4aaa8054, 0x00000555, 0x55554140, - 0x55555515, 0x55451555, 0x55415555, 0x00015555, 0x00000000, 0x55540000, - 0x55555555, 0x50015555, 0x55555401, 0x05555555, 0x55555554, 0x55555555, - 0x55555001, 0x00000005, 0x00000000, 0x55555550, 0x55555555, 0x00000000, - 0x00000000, 0x55555555, 0x01555555, 0x55555555, 0x55555555, 0x00000000, - 0x00000000, 0x55555555, 0x15555555, 0x55400000, 0x00155555, 0x00000000, - 0x55400000, 0x55555555, 0x55515555, 0x55555555, 0x50055555, 0x00555555, - 0x00000000, 0x55000000, 0x55555555, 0x00555555, 0x00000000, 0x55000000, - 0x55555105, 0x14555555, 0x00000410, 0x00000000, 0x55555000, 0x55555555, - 0x00000400, 0x00000000, 0x00001000, 0x45455000, 0x55555555, 0x40000015, - 0x40001555, 0x00005555, 0x00000000, 0x55550000, 0x55555555, 0x00005555, - 0x00000000, 0x55550000, 0x55555555, 0x00005555, 0x00015400, 0x00000000, - 0x55540000, 0x55555555, 0x00015555, 0x55555540, 0x55555555, 0x55555415, - 0x55555555, 0x55550155, 0x50000001, 0x55554000, 0x40155555, 0x55555555, - 0x01555555, 0x00000000, 0x54000000, 0x55555555, 0x01555555, 0x00000001, - 0x00000000, 0x55555554, 0x55555555, 0x00000001, 0x00000000, 0x55555554, - 0x55555555, 0x55555551, 0x55555555, 0x50504145, 0x15555545, 0x55554551, - 0x55555555, 0x50551555, 0x45554555, 0x55555555, 0x45515555, 0x55540455, - 0x55555554, 0x55555555, 0x55555541, 0x55555555, 0x55555415, 0x55555555, - 0x00000155, 0x00000000, 0x55555400, 0x55555555, 0x55540155, 0x55555555, - 0x00015555, 0x00000000, 0xaaa80000, 0xaaaaaaaa, 0x0002aaaa, 0x00000000, - 0xaaa80000, 0xaaaaaaaa, 0x0002aaaa, 0x00000000, 0x55540000, 0x55555555, - 0x55415555, 0x55555555, 0x00155555, -}; diff --git a/libc/str/wcwidth_osx.internal.h b/libc/str/wcwidth_osx.internal.h deleted file mode 100644 index 89ff0808c..000000000 --- a/libc/str/wcwidth_osx.internal.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef COSMOPOLITAN_LIBC_STR_WCWIDTH_OSX_H_ -#define COSMOPOLITAN_LIBC_STR_WCWIDTH_OSX_H_ -COSMOPOLITAN_C_START_ - -extern const uint32_t kWcwidthOsx[591]; -extern const uint8_t kWcwidthOsxIndex1[136]; -extern const uint16_t kWcwidthOsxIndex2[228]; -extern const uint32_t kWcwidthOsxIndex3[917]; - -static inline int _wcwidth_osx(uint32_t codePoint) { - uint32_t a, b, c, d; - a = kWcwidthOsxIndex1[codePoint >> 13]; - b = kWcwidthOsxIndex2[a + ((codePoint >> 9) & 0xf)]; - c = kWcwidthOsxIndex3[b + ((codePoint >> 5) & 0xf)]; - d = c + (codePoint & 0x1f); - return (kWcwidthOsx[d >> 4] >> ((d & 0xf) << 1)) & 3; -} - -COSMOPOLITAN_C_END_ -#endif /* COSMOPOLITAN_LIBC_STR_WCWIDTH_OSX_H_ */ diff --git a/libc/str/wide.inc b/libc/str/wide.inc new file mode 100644 index 000000000..e403c9a5a --- /dev/null +++ b/libc/str/wide.inc @@ -0,0 +1,65 @@ +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,18,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,19,16,20,21,22,16,16,16,23,16,16,24,25,26,27,28,17, +17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,29, +17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17, +17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17, +17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17, +17,17,17,17,17,17,17,17,30,16,16,16,16,31,16,16,17,17,17,17,17,17,17,17,17,17, +17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17, +17,17,17,17,17,17,17,32,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,17,17,16,16,16,33, +34,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,35,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17, +17,17,17,17,17,17,36,17,17,37,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,17,38,39,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,40,41,42,43,44,45,46,47,16,48,49,16,16,16,16, +16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,0,6,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,30,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,96,0,0,48,0,0,0,0,0,0,255,15,0,0,0,0,128,0,0,8, +0,2,12,0,96,48,64,16,0,0,4,44,36,32,12,0,0,0,1,0,0,0,80,184,0,0,0,0,0,0,0,224, +0,0,0,1,128,0,0,0,0,0,0,0,0,0,0,0,24,0,0,0,0,0,0,33,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,251,255,255,255,255,255,255,255, +255,255,255,15,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,63,0,0,0,255,15,255,255,255,255, +255,255,255,127,254,255,255,255,255,255,255,255,255,255,127,254,255,255,255, +255,255,255,255,255,255,255,255,255,224,255,255,255,255,255,254,255,255,255, +255,255,255,255,255,255,255,127,255,255,255,255,255,7,255,255,255,255,15,0, +255,255,255,255,255,127,255,255,255,255,255,0,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0, +0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,31,255,255,255,255,255,255,127,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255, +255,255,31,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,15,0,0,0,0,0,0,0,0,0,0,0,0,0,255,3,0,0,255,255,255,255,247,255,127,15,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,254,255,255,255,255,255,255,255,255,255,255, +255,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,127,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,15,0,0,0,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,7,0,255,255,255,127,0,0,0,0,0, +0,7,0,240,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255, +15,16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,64,254,7,0,0,0,0,0,0,0,0,0,0,0,0,7,0,255,255,255, +255,255,15,255,1,3,0,63,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255, +1,224,191,255,255,255,255,255,255,255,255,223,255,255,15,0,255,255,255,255, +255,135,15,0,255,255,17,255,255,255,255,255,255,255,255,127,253,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +159,255,255,255,255,255,255,255,63,0,120,255,255,255,0,0,4,0,0,96,0,16,0,0,0, +0,0,0,0,0,0,0,248,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,255,255, +255,255,255,255,255,255,63,16,39,0,0,24,240,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,255,15,0, +0,0,224,255,255,255,255,255,255,255,255,255,255,255,255,123,252,255,255,255, +255,231,199,255,255,255,231,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,15,7,7,0,63,0,0,0,0,0,0,0,0,0,0,0,0,0, diff --git a/libc/testlib/BUILD.mk b/libc/testlib/BUILD.mk index 95e11d95a..ddd3ce16a 100644 --- a/libc/testlib/BUILD.mk +++ b/libc/testlib/BUILD.mk @@ -112,8 +112,9 @@ LIBC_TESTLIB_A_DIRECTDEPS = \ THIRD_PARTY_COMPILER_RT \ THIRD_PARTY_DLMALLOC \ THIRD_PARTY_GDTOA \ + THIRD_PARTY_MUSL \ + THIRD_PARTY_TZ \ THIRD_PARTY_XED \ - THIRD_PARTY_TZ LIBC_TESTLIB_A_DEPS := \ $(call uniq,$(foreach x,$(LIBC_TESTLIB_A_DIRECTDEPS),$($(x)))) diff --git a/test/ctl/shared_ptr_test.cc b/test/ctl/shared_ptr_test.cc index c9f9f0516..27dd0b76c 100644 --- a/test/ctl/shared_ptr_test.cc +++ b/test/ctl/shared_ptr_test.cc @@ -69,7 +69,7 @@ struct Derived : Base int main() { - int a, b; + int a; { // Shouldn't cause memory leaks. @@ -182,6 +182,7 @@ main() return 13; } +#if 0 // TODO(mrdomino): find a different way { // owner_before works across shared and weak pointers. shared_ptr x(&a, CallG()); @@ -191,6 +192,7 @@ main() if (!x.owner_before(weak_ptr(y))) return 15; } +#endif { // Use counts work like you'd expect diff --git a/test/libc/intrin/BUILD.mk b/test/libc/intrin/BUILD.mk index dcde4ae37..1072638fe 100644 --- a/test/libc/intrin/BUILD.mk +++ b/test/libc/intrin/BUILD.mk @@ -37,15 +37,16 @@ TEST_LIBC_INTRIN_DIRECTDEPS = \ LIBC_STR \ LIBC_SYSV \ LIBC_SYSV_CALLS \ - LIBC_THREAD \ LIBC_TESTLIB \ + LIBC_THREAD \ LIBC_TINYMATH \ LIBC_X \ - TOOL_VIZ_LIB \ THIRD_PARTY_COMPILER_RT \ + THIRD_PARTY_MUSL \ THIRD_PARTY_NSYNC \ THIRD_PARTY_OPENMP \ - THIRD_PARTY_XED + THIRD_PARTY_XED \ + TOOL_VIZ_LIB \ TEST_LIBC_INTRIN_DEPS := \ $(call uniq,$(foreach x,$(TEST_LIBC_INTRIN_DIRECTDEPS),$($(x)))) diff --git a/test/libc/str/memmem_test.c b/test/libc/str/memmem_test.c index 413397be8..881700537 100644 --- a/test/libc/str/memmem_test.c +++ b/test/libc/str/memmem_test.c @@ -17,10 +17,17 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/mem/mem.h" +#include "libc/assert.h" +#include "libc/calls/calls.h" #include "libc/intrin/likely.h" +#include "libc/intrin/safemacros.h" #include "libc/mem/alg.h" +#include "libc/runtime/runtime.h" +#include "libc/runtime/sysconf.h" #include "libc/stdio/rand.h" #include "libc/str/str.h" +#include "libc/sysv/consts/map.h" +#include "libc/sysv/consts/prot.h" #include "libc/testlib/ezbench.h" #include "libc/testlib/hyperion.h" #include "libc/testlib/testlib.h" @@ -172,6 +179,26 @@ TEST(memmem, fuzz) { } } +TEST(memmem, safety) { + int pagesz = sysconf(_SC_PAGESIZE); + char *map = (char *)mmap(0, pagesz * 2, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + npassert(map != MAP_FAILED); + npassert(!mprotect(map + pagesz, pagesz, PROT_NONE)); + for (int haylen = 1; haylen < 128; ++haylen) { + char *hay = map + pagesz - (haylen + 1); + for (int i = 0; i < haylen; ++i) + hay[i] = max(rand() & 255, 1); + hay[haylen] = 0; + for (int neelen = 1; neelen < haylen; ++neelen) { + char *nee = hay + (haylen + 1) - (neelen + 1); + ASSERT_EQ(memmem_naive(hay, haylen, nee, neelen), + memmem(hay, haylen, nee, neelen)); + } + } + munmap(map, pagesz * 2); +} + /* * memmem naive l: 43,783c 14,142ns m: 31,285c 10,105ns * memmem l: 2,597c 839ns m: 2,612c 844ns @@ -201,7 +228,12 @@ BENCH(memmem, bench) { EZBENCH2("memmem", donothing, __expropriate(memmem(kHyperion, kHyperionSize, "THE END", 7))); EZBENCH2("memmem", donothing, - __expropriate(memmem( - "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab", - 62, "aaaaaab", 7))); + __expropriate( + memmem("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab", + 152, + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaaaaaaaaaaaaaaaaaaaab", + 81))); } diff --git a/test/libc/str/strstr_test.c b/test/libc/str/strstr_test.c index 929185e6f..086dd6e15 100644 --- a/test/libc/str/strstr_test.c +++ b/test/libc/str/strstr_test.c @@ -17,11 +17,23 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/str/str.h" +#include "libc/assert.h" +#include "libc/calls/calls.h" #include "libc/dce.h" +#include "libc/intrin/kprintf.h" +#include "libc/intrin/safemacros.h" #include "libc/mem/alg.h" #include "libc/mem/gc.h" #include "libc/mem/mem.h" #include "libc/nexgen32e/x86feature.h" +#include "libc/runtime/runtime.h" +#include "libc/runtime/sysconf.h" +#include "libc/stdalign.h" +#include "libc/stdio/rand.h" +#include "libc/stdio/stdio.h" +#include "libc/stdio/sysparam.h" +#include "libc/sysv/consts/map.h" +#include "libc/sysv/consts/prot.h" #include "libc/testlib/ezbench.h" #include "libc/testlib/hyperion.h" #include "libc/testlib/testlib.h" @@ -48,6 +60,13 @@ char *strstr_naive(const char *haystack, const char *needle) { return 0; } +TEST(strstr, special) { + MAKESTRING(haystack, "abc123def"); + ASSERT_STREQ(&haystack[0], strstr(haystack, haystack)); + ASSERT_STREQ(&haystack[0], strstr(haystack, "")); + free(haystack); +} + TEST(strstr, test_emptyString_isFoundAtBeginning) { MAKESTRING(haystack, "abc123def"); ASSERT_STREQ(&haystack[0], strstr(haystack, gc(strdup("")))); @@ -67,7 +86,8 @@ TEST(strstr, test_notFound1) { } TEST(strstr, test_middleOfString) { - MAKESTRING(haystack, "abc123def"); + alignas(16) char hog[] = "abc123def"; + MAKESTRING(haystack, hog); ASSERT_STREQ(&haystack[3], strstr(haystack, gc(strdup("123")))); free(haystack); } @@ -98,6 +118,25 @@ TEST(strstr, test) { ASSERT_STREQ("x", strstr("x", "x")); } +TEST(strstr, safety) { + int pagesz = sysconf(_SC_PAGESIZE); + char *map = (char *)mmap(0, pagesz * 2, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + npassert(map != MAP_FAILED); + npassert(!mprotect(map + pagesz, pagesz, PROT_NONE)); + for (int haylen = 1; haylen < 128; ++haylen) { + char *hay = map + pagesz - (haylen + 1); + for (int i = 0; i < haylen; ++i) + hay[i] = max(rand() & 255, 1); + hay[haylen] = 0; + for (int neelen = 1; neelen < haylen; ++neelen) { + char *nee = hay + (haylen + 1) - (neelen + 1); + ASSERT_EQ(strstr_naive(hay, nee), strstr(hay, nee)); + } + } + munmap(map, pagesz * 2); +} + TEST(strstr, breakit) { char *p; p = gc(calloc(1, 32)); diff --git a/test/libc/str/towupper_test.c b/test/libc/str/towupper_test.c index b0779d608..b5096be03 100644 --- a/test/libc/str/towupper_test.c +++ b/test/libc/str/towupper_test.c @@ -30,7 +30,7 @@ TEST(towupper, test) { EXPECT_EQ(u'!', towupper(u'!')); EXPECT_EQ(u'A', towupper(u'a')); EXPECT_EQ(u'À', towupper(u'à')); - EXPECT_EQ(L'𝛥', towupper(L'𝛿')); + /* EXPECT_EQ(L'𝛥', towupper(L'𝛿')); */ EXPECT_EQ(L'B', towupper(L'b')); EXPECT_EQ(u'Ꭰ', towupper(u'ꭰ')); } @@ -39,7 +39,7 @@ TEST(towlower, test) { EXPECT_EQ(u'!', towlower(u'!')); EXPECT_EQ(u'a', towlower(u'A')); EXPECT_EQ(u'à', towlower(u'À')); - EXPECT_EQ(L'𝛿', towlower(L'𝛥')); + /* EXPECT_EQ(L'𝛿', towlower(L'𝛥')); */ EXPECT_EQ(L'b', towlower(L'B')); EXPECT_EQ(u'ꭰ', towlower(u'Ꭰ')); } diff --git a/test/libc/str/wcwidth_test.c b/test/libc/str/wcwidth_test.c index a57e837c9..e79ea59f4 100644 --- a/test/libc/str/wcwidth_test.c +++ b/test/libc/str/wcwidth_test.c @@ -16,9 +16,11 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/stdio/stdio.h" #include "libc/str/str.h" #include "libc/str/strwidth.h" #include "libc/str/unicode.h" +#include "libc/testlib/benchmark.h" #include "libc/testlib/ezbench.h" #include "libc/testlib/testlib.h" @@ -28,6 +30,7 @@ TEST(wcwidth, test) { ASSERT_EQ(-1, wcwidth(-7)); ASSERT_EQ(1, wcwidth(0x10FFFD)); ASSERT_EQ(-1, wcwidth(0x10FFFD + 1)); + ASSERT_EQ(2, wcwidth(L'😀')); } TEST(strwidth, testCjkWidesAndCombiningLowLines_withThompsonPikeEncoding) { @@ -74,6 +77,12 @@ TEST(strwidth, testTextDelimitingControlCodes_dontHaveSubstance) { EXPECT_EQ(0, strwidth("\1", 0)); } +#define WCWIDTH(x) __expropriate(wcwidth(__veil("r", x))) + BENCH(wcwidth, bench) { - EZBENCH2("wcwidth", donothing, __expropriate(wcwidth(__veil("r", u'→')))); + BENCHMARK(1000, 1, WCWIDTH(u'a')); + BENCHMARK(1000, 1, WCWIDTH(u'a')); + BENCHMARK(1000, 1, WCWIDTH(u'→')); + BENCHMARK(1000, 1, WCWIDTH(L'😀')); + BENCHMARK(1000, 1, WCWIDTH(0)); } diff --git a/third_party/linenoise/BUILD.mk b/third_party/linenoise/BUILD.mk index 70414264f..8ee501529 100644 --- a/third_party/linenoise/BUILD.mk +++ b/third_party/linenoise/BUILD.mk @@ -19,16 +19,17 @@ THIRD_PARTY_LINENOISE_A_DIRECTDEPS = \ LIBC_CALLS \ LIBC_FMT \ LIBC_INTRIN \ - LIBC_NEXGEN32E \ + LIBC_LOG \ LIBC_MEM \ - LIBC_SYSV \ + LIBC_NEXGEN32E \ + LIBC_RUNTIME \ LIBC_SOCK \ LIBC_STDIO \ - LIBC_RUNTIME \ - LIBC_LOG \ - LIBC_SYSV_CALLS \ LIBC_STR \ - NET_HTTP + LIBC_SYSV \ + LIBC_SYSV_CALLS \ + NET_HTTP \ + THIRD_PARTY_MUSL \ THIRD_PARTY_LINENOISE_A_DEPS := \ $(call uniq,$(foreach x,$(THIRD_PARTY_LINENOISE_A_DIRECTDEPS),$($(x)))) diff --git a/third_party/musl/BUILD.mk b/third_party/musl/BUILD.mk index 8a0dd5488..940bd7cf9 100644 --- a/third_party/musl/BUILD.mk +++ b/third_party/musl/BUILD.mk @@ -11,6 +11,7 @@ THIRD_PARTY_MUSL = $(THIRD_PARTY_MUSL_A_DEPS) $(THIRD_PARTY_MUSL_A) THIRD_PARTY_MUSL_A = o/$(MODE)/third_party/musl/musl.a THIRD_PARTY_MUSL_A_FILES := $(wildcard third_party/musl/*) THIRD_PARTY_MUSL_A_HDRS = $(filter %.h,$(THIRD_PARTY_MUSL_A_FILES)) +THIRD_PARTY_MUSL_A_INCS = $(filter %.inc,$(THIRD_PARTY_MUSL_A_FILES)) THIRD_PARTY_MUSL_A_SRCS = $(filter %.c,$(THIRD_PARTY_MUSL_A_FILES)) THIRD_PARTY_MUSL_A_OBJS = \ @@ -60,6 +61,8 @@ $(THIRD_PARTY_MUSL_A_OBJS): private COPTS += -Wframe-larger-than=4096 -Walloca-l THIRD_PARTY_MUSL_LIBS = $(foreach x,$(THIRD_PARTY_MUSL_ARTIFACTS),$($(x))) THIRD_PARTY_MUSL_SRCS = $(foreach x,$(THIRD_PARTY_MUSL_ARTIFACTS),$($(x)_SRCS)) +THIRD_PARTY_MUSL_HDRS = $(foreach x,$(THIRD_PARTY_MUSL_ARTIFACTS),$($(x)_HDRS)) +THIRD_PARTY_MUSL_INCS = $(foreach x,$(THIRD_PARTY_MUSL_ARTIFACTS),$($(x)_INCS)) THIRD_PARTY_MUSL_CHECKS = $(foreach x,$(THIRD_PARTY_MUSL_ARTIFACTS),$($(x)_CHECKS)) THIRD_PARTY_MUSL_OBJS = $(foreach x,$(THIRD_PARTY_MUSL_ARTIFACTS),$($(x)_OBJS)) $(THIRD_PARTY_MUSL_OBJS): third_party/musl/BUILD.mk diff --git a/third_party/musl/alpha.inc b/third_party/musl/alpha.inc new file mode 100644 index 000000000..4167f3876 --- /dev/null +++ b/third_party/musl/alpha.inc @@ -0,0 +1,172 @@ +18,17,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,17,34,35,36,17,37,38,39,40, +41,42,43,44,17,45,46,47,16,16,48,16,16,16,16,16,16,16,49,50,51,16,52,53,16,16, +17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,54, +17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17, +17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17, +17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17, +17,17,17,55,17,17,17,17,56,17,57,58,59,60,61,62,17,17,17,17,17,17,17,17,17,17, +17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17, +17,17,17,17,17,17,17,63,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,17,64,65,17,66,67, +68,69,70,71,72,73,74,17,75,76,77,78,79,80,81,16,82,83,84,85,86,87,88,89,90,91, +92,93,16,94,95,96,16,17,17,17,97,98,99,16,16,16,16,16,16,16,16,16,16,17,17,17, +17,100,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,17,17,101,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,17,17,102,103,16,16,104,105,17,17,17,17,17,17,17,17,17,17,17,17,17,17, +17,17,17,17,17,17,17,17,17,106,17,17,107,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,17, +108,109,16,16,16,16,16,16,16,16,16,110,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,111,112,113,114,16,16,16,16,16,16,16,16,115,116, +117,16,16,16,16,16,118,119,16,16,16,16,120,16,16,121,16,16,16,16,16,16,16,16, +16,16,16,16,16, +16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,254,255,255,7,254, +255,255,7,0,0,0,0,0,4,32,4,255,255,127,255,255,255,127,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,195,255,3,0,31,80,0,0,0,0,0,0,0,0,0,0,32,0,0,0,0,0,223,188,64,215,255,255, +251,255,255,255,255,255,255,255,255,255,191,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,3,252,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,254,255,255,255,127,2,255,255,255, +255,255,1,0,0,0,0,255,191,182,0,255,255,255,135,7,0,0,0,255,7,255,255,255,255, +255,255,255,254,255,195,255,255,255,255,255,255,255,255,255,255,255,255,239, +31,254,225,255, +159,0,0,255,255,255,255,255,255,0,224,255,255,255,255,255,255,255,255,255,255, +255,255,3,0,255,255,255,255,255,7,48,4,255,255,255,252,255,31,0,0,255,255,255, +1,255,7,0,0,0,0,0,0,255,255,223,63,0,0,240,255,248,3,255,255,255,255,255,255, +255,255,255,239,255,223,225,255,207,255,254,255,239,159,249,255,255,253,197, +227,159,89,128,176,207,255,3,16,238,135,249,255,255,253,109,195,135,25,2,94, +192,255,63,0,238,191,251,255,255,253,237,227,191,27,1,0,207,255,0,30,238,159, +249,255,255,253,237,227,159,25,192,176,207,255,2,0,236,199,61,214,24,199,255, +195,199,29,129,0,192,255,0,0,239,223,253,255,255,253,255,227,223,29,96,7,207, +255,0,0,239,223,253,255,255,253,239,227,223,29,96,64,207,255,6,0,239,223,253, +255,255,255,255,231,223,93,240,128,207,255,0,252,236,255,127,252,255,255,251, +47,127,128,95,255,192,255,12,0,254,255,255,255,255,127,255,7,63,32,255,3,0,0, +0,0,214,247,255,255,175,255,255,59,95,32,255,243,0,0,0, +0,1,0,0,0,255,3,0,0,255,254,255,255,255,31,254,255,3,255,255,254,255,255,255, +31,0,0,0,0,0,0,0,0,255,255,255,255,255,255,127,249,255,3,255,255,255,255,255, +255,255,255,255,63,255,255,255,255,191,32,255,255,255,255,255,247,255,255,255, +255,255,255,255,255,255,61,127,61,255,255,255,255,255,61,255,255,255,255,61, +127,61,255,127,255,255,255,255,255,255,255,61,255,255,255,255,255,255,255,255, +7,0,0,0,0,255,255,0,0,255,255,255,255,255,255,255,255,255,255,63,63,254,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,159,255,255,254,255,255,7,255,255,255,255,255,255,255,255, +255,199,255,1,255,223,15,0,255,255,15,0,255,255,15,0,255,223,13,0,255,255,255, +255,255,255,207,255,255,1,128,16,255,3,0,0,0,0,255,3,255,255,255,255,255,255, +255,255,255,255,255,1,255,255,255,255,255,7,255,255,255,255,255,255,255,255, +63, +0,255,255,255,127,255,15,255,1,192,255,255,255,255,63,31,0,255,255,255,255, +255,15,255,255,255,3,255,3,0,0,0,0,255,255,255,15,255,255,255,255,255,255,255, +127,254,255,31,0,255,3,255,3,128,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255, +255,239,255,239,15,255,3,0,0,0,0,255,255,255,255,255,243,255,255,255,255,255, +255,191,255,3,0,255,255,255,255,255,255,127,0,255,227,255,255,255,255,255,63, +255,1,255,255,255,255,255,231,0,0,0,0,0,222,111,4,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0, +128,255,31,0,255,255,63,63,255,255,255,255,63,63,255,170,255,255,255,63,255, +255,255,255,255,255,223,95,220,31,207,15,255,31,220,31,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,2,128,0,0,255,31,0,0,0,0,0,0,0,0,0,0,0,0,132,252,47,62,80,189,255,243, +224,67,0,0,255,255,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,192,255,255,255,255,255,255,3,0, +0,255,255,255,255,255,127,255,255,255,255,255,127,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,31,120,12,0,255,255,255,255,191,32,255, +255,255,255,255,255,255,128,0,0,255,255,127,0,127,127,127,127,127,127,127,127, +255,255,255,255,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,224,0,0,0,254,3,62,31,254,255,255,255,255,255,255,255,255,255,127,224,254, +255,255,255,255,255,255,255,255,255,255,247,224,255,255,255,255,255,254,255, +255,255,255,255,255,255,255,255,255,127,0,0,255,255,255,7,0,0,0,0,0,0,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,63,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0, +0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,31,0,0, +0,0,0,0,0,0,255,255,255,255,255,63,255,31,255,255,255,15,0,0,255,255,255,255, +255,127,240,143,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0, +0,128,255,252,255,255,255,255,255,255,255,255,255,255,255,255,249,255,255,255, +255,255,255,124,0,0,0,0,0,128,255,191,255,255,255,255,0,0,0,255,255,255,255, +255,255,15,0,255,255,255,255,255,255,255,255,47,0,255,3,0,0,252,232,255,255, +255,255,255,7,255,255,255,255,7,0,255,255,255,31,255,255,255,255,255,255,247, +255,0,128,255,3,255,255,255,127,255,255,255,255,255,255,127,0,255,63,255,3, +255,255,127,252,255,255,255,255,255,255,255,127,5,0,0,56,255,255,60,0,126,126, +126,0,127,127,255,255,255,255,255,247,255,0,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,7,255,3,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,15,0,255,255,127,248,255,255,255,255, +255, +15,255,255,255,255,255,255,255,255,255,255,255,255,255,63,255,255,255,255,255, +255,255,255,255,255,255,255,255,3,0,0,0,0,127,0,248,224,255,253,127,95,219, +255,255,255,255,255,255,255,255,255,255,255,255,255,3,0,0,0,248,255,255,255, +255,255,255,255,255,255,255,255,255,63,0,0,255,255,255,255,255,255,255,255, +252,255,255,255,255,255,255,0,0,0,0,0,255,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,223, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,31,0,0,255,3, +254,255,255,7,254,255,255,7,192,255,255,255,255,255,255,255,255,255,255,127, +252,252,252,28,0,0,0,0,255,239,255,255,127,255,255,183,255,63,255,63,0,0,0,0, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,7,0,0,0,0,0,0,0,0, +255,255,255,255,255,255,31,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,255,255,255,31,255,255,255,255,255,255,1,0,0,0,0, +0,255,255,255,255,0,224,255,255,255,7,255,255,255,255,255,7,255,255,255,63, +255,255,255,255,15,255,62,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,63,255,3,255,255,255,255,15,255,255,255, +255,15,255,255,255,255,255,0,255,255,255,255,255,255,15,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,255,255,255,255,255,255,127,0,255,255,63,0,255,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,63,253,255,255,255,255,191,145,255,255,63,0,255,255, +127,0,255,255,255,127,0,0,0,0,0,0,0,0,255,255,55,0,255,255,63,0,255,255,255,3, +0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,192,0,0,0,0,0,0,0,0,111,240,239, +254,255,255,63,0,0,0,0,0,255,255,255,31,255,255,255,31,0,0,0,0,255,254,255, +255,31,0,0,0,255,255,255,255,255,255,63,0,255,255,63,0,255,255,7,0,255,255,3, +0,0,0,0,0,0,0,0,0,0,0,0, +0,255,255,255,255,255,255,255,255,255,1,0,0,0,0,0,0,255,255,255,255,255,255,7, +0,255,255,255,255,255,255,7,0,255,255,255,255,255,0,255,3,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,31,128,0,255,255,63,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,255,255,127,0,255,255,255,255,255,255,255,255,63,0,0,0, +192,255,0,0,252,255,255,255,255,255,255,1,0,0,255,255,255,1,255,3,255,255,255, +255,255,255,199,255,112,0,255,255,255,255,71,0,255,255,255,255,255,255,255, +255,30,0,255,23,0,0,0,0,255,255,251,255,255,255,159,64,0,0,0,0,0,0,0,0,127, +189,255,191,255,1,255,255,255,255,255,255,255,1,255,3,239,159,249,255,255,253, +237,227,159,25,129,224,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255, +255,255,255,255,255,187,7,255,131,0,0,0,0,255,255,255,255,255,255,255,255,179, +0,255,3,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,63,127,0,0,0,63,0,0, +0,0,255,255,255,255,255,255,255,127,17,0,255,3,0,0,0,0,255,255,255,255,255, +255,63,1,255,3,0,0,0,0,0,0,255,255,255,231,255,7,255,3,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0, +0,255,255,255,255,255,255,255,255,255,3,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,255,252,255,255,255,255,255,252,26,0,0,0,255,255,255,255,255,255,231, +127,0,0,255,255,255,255,255,255,255,255,255,32,0,0,0,0,255,255,255,255,255, +255,255,1,255,253,255,255,255,255,127,127,1,0,255,3,0,0,252,255,255,255,252, +255,255,254,127,0,0,0,0,0,0,0,0,0,127,251,255,255,255,255,127,180,203,0,255,3, +191,253,255,255,255,127,123,1,255,3,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,127,0,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,3,0,0, +0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,127,0, +0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255, +255,255,255,127,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255, +255,255,255,255,255,255,127,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255, +255,255,255,255,255,255,1,255,255,255,127,255,3,0,0,0,0,0,0,0,0,0,0,0,0,255, +255,255,63,0,0,255,255,255,255,255,255,0,0,15,0,255,3,248,255,255,224,255,255, +0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,255,255,255,255,255,255,255,255,255,135,255,255,255,255,255,255,255,128, +255,255,0,0,0,0,0,0,0,0,11,0,0,0,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,7,0,255,255,255,127,0,0,0,0,0, +0,7,0,240,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,15,255,255,255,255, +255,255,255,255,255,255,255,255,255,7,255,31,255,1,255,67,0,0,0,0,0,0,0,0,0,0, +0,0,255,255,255,255,255,255,255,255,255,255,223,255,255,255,255,255,255,255, +255,223,100,222,255,235,239,255,255,255,255,255,255, +255,191,231,223,223,255,255,255,123,95,252,253,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,63,255,255,255, +253,255,255,247,255,255,255,247,255,255,223,255,255,255,223,255,255,127,255, +255,255,127,255,255,255,253,255,255,255,253,255,255,247,207,255,255,255,255, +255,255,127,255,255,249,219,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,255,255,255,255,255,31,128,63,255,67,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255, +15,255,3,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,31,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255, +143,8,255,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,239,255,255,255,150,254,247,10,132,234,150,170,150,247,247,94,255,251,255, +15,238,251,255,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,3,255,255,255,3,255, +255,255,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0, diff --git a/third_party/musl/bsearch.c b/third_party/musl/bsearch.c new file mode 100644 index 000000000..fe050ea30 --- /dev/null +++ b/third_party/musl/bsearch.c @@ -0,0 +1,20 @@ +#include + +void *bsearch(const void *key, const void *base, size_t nel, size_t width, int (*cmp)(const void *, const void *)) +{ + void *try; + int sign; + while (nel > 0) { + try = (char *)base + width*(nel/2); + sign = cmp(key, try); + if (sign < 0) { + nel /= 2; + } else if (sign > 0) { + base = (char *)try + width; + nel -= nel/2+1; + } else { + return try; + } + } + return NULL; +} diff --git a/third_party/musl/casemap.inc b/third_party/musl/casemap.inc new file mode 100644 index 000000000..6ee1209b9 --- /dev/null +++ b/third_party/musl/casemap.inc @@ -0,0 +1,297 @@ +static const unsigned char tab[] = { + 7, 8, 9, 10, 11, 12, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 13, 6, 6, 14, 6, 6, 6, 6, 6, 6, 6, 6, 15, 16, 17, 18, + 6, 19, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 20, 21, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 22, 23, 6, 6, 6, 24, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 25, + 6, 6, 6, 6, 26, 6, 6, 6, 6, 6, 6, 6, 27, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 28, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 29, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 30, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 36, + 43, 43, 43, 43, 43, 43, 43, 43, 1, 0, 84, 86, 86, 86, 86, 86, + 86, 86, 86, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 24, 0, 0, 0, 43, 43, 43, 43, 43, 43, + 43, 7, 43, 43, 91, 86, 86, 86, 86, 86, 86, 86, 74, 86, 86, 5, + 49, 80, 49, 80, 49, 80, 49, 80, 49, 80, 49, 80, 49, 80, 49, 80, + 36, 80, 121, 49, 80, 49, 80, 49, 56, 80, 49, 80, 49, 80, 49, 80, + 49, 80, 49, 80, 49, 80, 49, 80, 78, 49, 2, 78, 13, 13, 78, 3, + 78, 0, 36, 110, 0, 78, 49, 38, 110, 81, 78, 36, 80, 78, 57, 20, + 129, 27, 29, 29, 83, 49, 80, 49, 80, 13, 49, 80, 49, 80, 49, 80, + 27, 83, 36, 80, 49, 2, 92, 123, 92, 123, 92, 123, 92, 123, 92, 123, + 20, 121, 92, 123, 92, 123, 92, 45, 43, 73, 3, 72, 3, 120, 92, 123, + 20, 0, 150, 10, 1, 43, 40, 6, 6, 0, 42, 6, 42, 42, 43, 7, + 187, 181, 43, 30, 0, 43, 7, 43, 43, 43, 1, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 1, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 42, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, + 43, 205, 70, 205, 43, 0, 37, 43, 7, 1, 6, 1, 85, 86, 86, 86, + 86, 86, 85, 86, 86, 2, 36, 129, 129, 129, 129, 129, 21, 129, 129, 129, + 0, 0, 43, 0, 178, 209, 178, 209, 178, 209, 178, 209, 0, 0, 205, 204, + 1, 0, 215, 215, 215, 215, 215, 131, 129, 129, 129, 129, 129, 129, 129, 129, + 129, 129, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 28, 0, 0, 0, + 0, 0, 49, 80, 49, 80, 49, 80, 49, 80, 49, 80, 49, 2, 0, 0, + 49, 80, 49, 80, 49, 80, 49, 80, 49, 80, 49, 80, 49, 80, 49, 80, + 49, 80, 78, 49, 80, 49, 80, 78, 49, 80, 49, 80, 49, 80, 49, 80, + 49, 80, 49, 80, 49, 80, 49, 2, 135, 166, 135, 166, 135, 166, 135, 166, + 135, 166, 135, 166, 135, 166, 135, 166, 42, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 0, 0, 0, 84, 86, 86, 86, 86, 86, 86, 86, + 86, 86, 86, 86, 86, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 84, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, + 12, 0, 12, 42, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, + 43, 7, 42, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 86, 86, 108, 129, 21, 0, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 7, 108, 3, 65, 43, 43, 86, 86, 86, 86, 86, 86, + 86, 86, 86, 86, 86, 86, 86, 86, 44, 86, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 12, 108, 0, 0, 0, 0, 0, 6, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 6, 37, 6, 37, 6, 37, 6, 37, 6, 37, + 6, 37, 6, 37, 6, 37, 6, 37, 6, 37, 6, 37, 6, 37, 6, 37, + 6, 37, 6, 37, 6, 37, 6, 37, 6, 37, 6, 37, 6, 37, 6, 37, + 6, 37, 6, 37, 6, 37, 6, 37, 86, 122, 158, 38, 6, 37, 6, 37, + 6, 37, 6, 37, 6, 37, 6, 37, 6, 37, 6, 37, 6, 37, 6, 37, + 6, 37, 6, 37, 6, 37, 6, 37, 6, 37, 6, 1, 43, 43, 79, 86, + 86, 44, 43, 127, 86, 86, 57, 43, 43, 85, 86, 86, 43, 43, 79, 86, + 86, 44, 43, 127, 86, 86, 129, 55, 117, 91, 123, 92, 43, 43, 79, 86, + 86, 2, 172, 4, 0, 0, 57, 43, 43, 85, 86, 86, 43, 43, 79, 86, + 86, 44, 43, 43, 86, 86, 50, 19, 129, 87, 0, 111, 129, 126, 201, 215, + 126, 45, 129, 129, 14, 126, 57, 127, 111, 87, 0, 129, 129, 126, 21, 0, + 126, 3, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 7, 43, + 36, 43, 151, 43, 43, 43, 43, 43, 43, 43, 43, 43, 42, 43, 43, 43, + 43, 43, 86, 86, 86, 86, 86, 128, 129, 129, 129, 129, 57, 187, 42, 43, + 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 1, 129, 129, 129, 129, 129, 129, 129, 129, + 129, 129, 129, 129, 129, 129, 129, 201, 172, 172, 172, 172, 172, 172, 172, 172, + 172, 172, 172, 172, 172, 172, 172, 208, 13, 0, 78, 49, 2, 180, 193, 193, + 215, 215, 36, 80, 49, 80, 49, 80, 49, 80, 49, 80, 49, 80, 49, 80, + 49, 80, 49, 80, 49, 80, 49, 80, 49, 80, 49, 80, 49, 80, 49, 80, + 49, 80, 49, 80, 215, 215, 83, 193, 71, 212, 215, 215, 215, 5, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 7, 1, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 78, 49, 80, 49, 80, 49, 80, + 49, 80, 49, 80, 49, 80, 49, 80, 13, 0, 0, 0, 0, 0, 36, 80, + 49, 80, 49, 80, 49, 80, 49, 80, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 121, 92, 123, 92, 123, 79, 123, 92, 123, 92, 123, + 92, 123, 92, 123, 92, 123, 92, 123, 92, 123, 92, 123, 92, 123, 92, 45, + 43, 43, 121, 20, 92, 123, 92, 45, 121, 42, 92, 39, 92, 123, 92, 123, + 92, 123, 164, 0, 10, 180, 92, 123, 92, 123, 79, 3, 42, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 72, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 42, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 43, 43, 43, 43, 43, 43, 43, 43, 7, 0, 72, 86, 86, 86, 86, + 86, 86, 86, 86, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, 43, 85, 86, 86, 86, 86, 86, 86, + 86, 86, 86, 86, 86, 86, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 36, 43, 43, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 7, 0, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 36, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 7, 0, 0, + 0, 0, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, + 86, 86, 86, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, 86, 86, 86, 86, 86, 86, 86, 86, + 86, 86, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 42, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 86, 86, + 86, 86, 86, 86, 86, 86, 86, 86, 14, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 85, + 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 14, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; +static const int rules[] = { + 0x0, 0x2001, -0x2000, 0x1dbf00, 0x2e700, 0x7900, + 0x2402, 0x101, -0x100, 0x0, 0x201, -0x200, + -0xc6ff, -0xe800, -0x78ff, -0x12c00, 0xc300, 0xd201, + 0xce01, 0xcd01, 0x4f01, 0xca01, 0xcb01, 0xcf01, + 0x6100, 0xd301, 0xd101, 0xa300, 0xd501, 0x8200, + 0xd601, 0xda01, 0xd901, 0xdb01, 0x3800, 0x3, + -0x4f00, -0x60ff, -0x37ff, 0x242802, 0x0, 0x101, + -0x100, -0xcd00, -0xda00, -0x81ff, 0x2a2b01, -0xa2ff, + 0x2a2801, 0x2a3f00, -0xc2ff, 0x4501, 0x4701, 0x2a1f00, + 0x2a1c00, 0x2a1e00, -0xd200, -0xce00, -0xca00, -0xcb00, + 0xa54f00, 0xa54b00, -0xcf00, 0xa52800, 0xa54400, -0xd100, + -0xd300, 0x29f700, 0xa54100, 0x29fd00, -0xd500, -0xd600, + 0x29e700, 0xa54300, 0xa52a00, -0x4500, -0xd900, -0x4700, + -0xdb00, 0xa51500, 0xa51200, 0x4c2402, 0x0, 0x2001, + -0x2000, 0x101, -0x100, 0x5400, 0x7401, 0x2601, + 0x2501, 0x4001, 0x3f01, -0x2600, -0x2500, -0x1f00, + -0x4000, -0x3f00, 0x801, -0x3e00, -0x3900, -0x2f00, + -0x3600, -0x800, -0x5600, -0x5000, 0x700, -0x7400, + -0x3bff, -0x6000, -0x6ff, 0x701a02, 0x101, -0x100, + 0x2001, -0x2000, 0x5001, 0xf01, -0xf00, 0x0, + 0x3001, -0x3000, 0x101, -0x100, 0x0, 0xbc000, + 0x1c6001, 0x0, 0x97d001, 0x801, -0x800, 0x8a0502, + 0x0, -0xbbfff, -0x186200, 0x89c200, -0x182500, -0x186e00, + -0x186d00, -0x186400, -0x186300, -0x185c00, 0x0, 0x8a3800, + 0x8a0400, 0xee600, 0x101, -0x100, 0x0, -0x3b00, + -0x1dbeff, 0x8f1d02, 0x800, -0x7ff, 0x0, 0x5600, + -0x55ff, 0x4a00, 0x6400, 0x8000, 0x7000, 0x7e00, + 0x900, -0x49ff, -0x8ff, -0x1c2500, -0x63ff, -0x6fff, + -0x7fff, -0x7dff, 0xac0502, 0x0, 0x1001, -0x1000, + 0x1c01, 0x101, -0x1d5cff, -0x20beff, -0x2045ff, -0x1c00, + 0xb10b02, 0x101, -0x100, 0x3001, -0x3000, 0x0, + -0x29f6ff, -0xee5ff, -0x29e6ff, -0x2a2b00, -0x2a2800, -0x2a1bff, + -0x29fcff, -0x2a1eff, -0x2a1dff, -0x2a3eff, 0x0, -0x1c6000, + 0x0, 0x101, -0x100, 0xbc0c02, 0x0, 0x101, + -0x100, -0xa543ff, 0x3a001, -0x8a03ff, -0xa527ff, 0x3000, + -0xa54eff, -0xa54aff, -0xa540ff, -0xa511ff, -0xa529ff, -0xa514ff, + -0x2fff, -0xa542ff, -0x8a37ff, 0x0, -0x97d000, -0x3a000, + 0x0, 0x2001, -0x2000, 0x0, 0x2801, -0x2800, + 0x0, 0x4001, -0x4000, 0x0, 0x2001, -0x2000, + 0x0, 0x2001, -0x2000, 0x0, 0x2201, -0x2200, +}; +static const unsigned char rulebases[] = { + 0, 6, 39, 81, 111, 119, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 124, 0, 0, 127, 0, 0, 0, 0, 0, 0, 0, 0, 131, 142, 146, 151, + 0, 170, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 180, 196, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 198, 201, 0, 0, 0, 219, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 222, + 0, 0, 0, 0, 225, 0, 0, 0, 0, 0, 0, 0, 228, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 231, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 234, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 237, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; +static const unsigned char exceptions[][2] = { + { 48, 12 }, { 49, 13 }, { 120, 14 }, { 127, 15 }, + { 128, 16 }, { 129, 17 }, { 134, 18 }, { 137, 19 }, + { 138, 19 }, { 142, 20 }, { 143, 21 }, { 144, 22 }, + { 147, 19 }, { 148, 23 }, { 149, 24 }, { 150, 25 }, + { 151, 26 }, { 154, 27 }, { 156, 25 }, { 157, 28 }, + { 158, 29 }, { 159, 30 }, { 166, 31 }, { 169, 31 }, + { 174, 31 }, { 177, 32 }, { 178, 32 }, { 183, 33 }, + { 191, 34 }, { 197, 35 }, { 200, 35 }, { 203, 35 }, + { 221, 36 }, { 242, 35 }, { 246, 37 }, { 247, 38 }, + { 32, 45 }, { 58, 46 }, { 61, 47 }, { 62, 48 }, + { 63, 49 }, { 64, 49 }, { 67, 50 }, { 68, 51 }, + { 69, 52 }, { 80, 53 }, { 81, 54 }, { 82, 55 }, + { 83, 56 }, { 84, 57 }, { 89, 58 }, { 91, 59 }, + { 92, 60 }, { 97, 61 }, { 99, 62 }, { 101, 63 }, + { 102, 64 }, { 104, 65 }, { 105, 66 }, { 106, 64 }, + { 107, 67 }, { 108, 68 }, { 111, 66 }, { 113, 69 }, + { 114, 70 }, { 117, 71 }, { 125, 72 }, { 130, 73 }, + { 135, 74 }, { 137, 75 }, { 138, 76 }, { 139, 76 }, + { 140, 77 }, { 146, 78 }, { 157, 79 }, { 158, 80 }, + { 69, 87 }, { 123, 29 }, { 124, 29 }, { 125, 29 }, + { 127, 88 }, { 134, 89 }, { 136, 90 }, { 137, 90 }, + { 138, 90 }, { 140, 91 }, { 142, 92 }, { 143, 92 }, + { 172, 93 }, { 173, 94 }, { 174, 94 }, { 175, 94 }, + { 194, 95 }, { 204, 96 }, { 205, 97 }, { 206, 97 }, + { 207, 98 }, { 208, 99 }, { 209, 100 }, { 213, 101 }, + { 214, 102 }, { 215, 103 }, { 240, 104 }, { 241, 105 }, + { 242, 106 }, { 243, 107 }, { 244, 108 }, { 245, 109 }, + { 249, 110 }, { 253, 45 }, { 254, 45 }, { 255, 45 }, + { 80, 105 }, { 81, 105 }, { 82, 105 }, { 83, 105 }, + { 84, 105 }, { 85, 105 }, { 86, 105 }, { 87, 105 }, + { 88, 105 }, { 89, 105 }, { 90, 105 }, { 91, 105 }, + { 92, 105 }, { 93, 105 }, { 94, 105 }, { 95, 105 }, + { 130, 0 }, { 131, 0 }, { 132, 0 }, { 133, 0 }, + { 134, 0 }, { 135, 0 }, { 136, 0 }, { 137, 0 }, + { 192, 117 }, { 207, 118 }, { 128, 137 }, { 129, 138 }, + { 130, 139 }, { 133, 140 }, { 134, 141 }, { 112, 157 }, + { 113, 157 }, { 118, 158 }, { 119, 158 }, { 120, 159 }, + { 121, 159 }, { 122, 160 }, { 123, 160 }, { 124, 161 }, + { 125, 161 }, { 179, 162 }, { 186, 163 }, { 187, 163 }, + { 188, 164 }, { 190, 165 }, { 195, 162 }, { 204, 164 }, + { 218, 166 }, { 219, 166 }, { 229, 106 }, { 234, 167 }, + { 235, 167 }, { 236, 110 }, { 243, 162 }, { 248, 168 }, + { 249, 168 }, { 250, 169 }, { 251, 169 }, { 252, 164 }, + { 38, 176 }, { 42, 177 }, { 43, 178 }, { 78, 179 }, + { 132, 8 }, { 98, 186 }, { 99, 187 }, { 100, 188 }, + { 101, 189 }, { 102, 190 }, { 109, 191 }, { 110, 192 }, + { 111, 193 }, { 112, 194 }, { 126, 195 }, { 127, 195 }, + { 125, 207 }, { 141, 208 }, { 148, 209 }, { 171, 210 }, + { 172, 211 }, { 173, 212 }, { 176, 213 }, { 177, 214 }, + { 178, 215 }, { 196, 216 }, { 197, 217 }, { 198, 218 }, +}; diff --git a/libc/str/iswalnum.c b/third_party/musl/iswalnum.c similarity index 100% rename from libc/str/iswalnum.c rename to third_party/musl/iswalnum.c diff --git a/third_party/musl/iswalpha.c b/third_party/musl/iswalpha.c new file mode 100644 index 000000000..33157ea29 --- /dev/null +++ b/third_party/musl/iswalpha.c @@ -0,0 +1,50 @@ +/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ +│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ +╚──────────────────────────────────────────────────────────────────────────────╝ +│ │ +│ Musl Libc │ +│ Copyright © 2005-2014 Rich Felker, et al. │ +│ │ +│ Permission is hereby granted, free of charge, to any person obtaining │ +│ a copy of this software and associated documentation files (the │ +│ "Software"), to deal in the Software without restriction, including │ +│ without limitation the rights to use, copy, modify, merge, publish, │ +│ distribute, sublicense, and/or sell copies of the Software, and to │ +│ permit persons to whom the Software is furnished to do so, subject to │ +│ the following conditions: │ +│ │ +│ The above copyright notice and this permission notice shall be │ +│ included in all copies or substantial portions of the Software. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ +│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ +│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ +│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ +│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ +│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ +│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ +│ │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include +#include +__static_yoink("musl_libc_notice"); + +static const unsigned char table[] = { +#include "alpha.inc" +}; + +int iswalpha(wint_t wc) +{ + if (wc<0x20000U) + return (table[table[wc>>8]*32+((wc&255)>>3)]>>(wc&7))&1; + if (wc<0x2fffeU) + return 1; + return 0; +} + +int __iswalpha_l(wint_t c, locale_t l) +{ + return iswalpha(c); +} + +__weak_reference(__iswalpha_l, iswalpha_l); diff --git a/libc/str/iswctype.c b/third_party/musl/iswctype.c similarity index 100% rename from libc/str/iswctype.c rename to third_party/musl/iswctype.c diff --git a/third_party/musl/iswpunct.c b/third_party/musl/iswpunct.c new file mode 100644 index 000000000..6434bb790 --- /dev/null +++ b/third_party/musl/iswpunct.c @@ -0,0 +1,48 @@ +/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ +│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ +╚──────────────────────────────────────────────────────────────────────────────╝ +│ │ +│ Musl Libc │ +│ Copyright © 2005-2014 Rich Felker, et al. │ +│ │ +│ Permission is hereby granted, free of charge, to any person obtaining │ +│ a copy of this software and associated documentation files (the │ +│ "Software"), to deal in the Software without restriction, including │ +│ without limitation the rights to use, copy, modify, merge, publish, │ +│ distribute, sublicense, and/or sell copies of the Software, and to │ +│ permit persons to whom the Software is furnished to do so, subject to │ +│ the following conditions: │ +│ │ +│ The above copyright notice and this permission notice shall be │ +│ included in all copies or substantial portions of the Software. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ +│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ +│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ +│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ +│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ +│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ +│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ +│ │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include +#include +__static_yoink("musl_libc_notice"); + +static const unsigned char table[] = { +#include "punct.inc" +}; + +int iswpunct(wint_t wc) +{ + if (wc<0x20000U) + return (table[table[wc>>8]*32+((wc&255)>>3)]>>(wc&7))&1; + return 0; +} + +int __iswpunct_l(wint_t c, locale_t l) +{ + return iswpunct(c); +} + +__weak_reference(__iswpunct_l, iswpunct_l); diff --git a/third_party/musl/punct.inc b/third_party/musl/punct.inc new file mode 100644 index 000000000..67929470c --- /dev/null +++ b/third_party/musl/punct.inc @@ -0,0 +1,141 @@ +18,16,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,16,16,34,35,16,36,37,38,39, +40,41,42,43,16,44,45,46,17,17,47,17,17,17,17,17,17,48,49,50,51,52,53,54,55,17, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,56, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,57,16,58,59,60,61,62,63,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,64,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,65,16,16,66,16,67,68, +69,16,70,71,72,16,73,16,16,74,75,76,77,78,16,79,80,81,82,83,84,85,86,87,88,89, +90,91,16,92,93,94,95,16,16,16,16,96,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,97,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,98,99,16,16,100,101,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,102,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,103,104,105,106,16,16,107,108,17,17,109,16,16,16,16,16,16,110,111,16, +16,16,16,16,112,113,16,16,114,115,116,16,117,118,119,17,17,17,120,121,122,123, +124,16,16,16,16, +16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,254,255,0,252,1,0,0,248,1, +0,0,120,0,0,0,0,255,251,223,251,0,0,128,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,60,0,252,255,224,175,255,255,255,255,255,255,255,255, +255,255,223,255,255,255,255,255,32,64,176,0,0,0,0,0,0,0,0,0,0,0,0,0,64,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,252,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,252,0,0,0,0,0,230,254,255,255,255,0,64,73,0,0,0,0,0,24,0,255,255,0,216, +0,0,0,0,0,0,0,1,0,60,0,0,0,0,0,0,0,0,0,0,0,0,16,224,1,30,0, +96,255,191,0,0,0,0,0,0,255,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,248,207, +227,0,0,0,3,0,32,255,127,0,0,0,78,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,7,252,0,0,0, +0,0,0,0,0,0,16,0,32,30,0,48,0,1,0,0,0,0,0,0,0,0,16,0,32,0,0,0,0,252,111,0,0,0, +0,0,0,0,16,0,32,0,0,0,0,64,0,0,0,0,0,0,0,0,16,0,32,0,0,0,0,3,224,0,0,0,0,0,0, +0,16,0,32,0,0,0,0,253,0,0,0,0,0,0,0,0,0,0,32,0,0,0,0,255,7,16,0,0,0,0,0,0,0,0, +32,0,0,0,0,128,255,16,0,0,0,0,0,0,16,0,32,0,0,0,0,0,0,0,0,0,0,0,0,0,24,0,160, +0,127,0,0,255,3,0,0,0,0,0,0,0,0,0,4,0,0,0,0,16,0,0,0,0,0,0,128,0,128,192,223, +0,12,0,0,0,0,0,0,0,0,0,0,0,4,0,31,0,0,0,0,0, +0,254,255,255,255,0,252,255,255,0,0,0,0,0,0,0,0,252,0,0,0,0,0,0,192,255,223, +255,7,0,0,0,0,0,0,0,0,0,0,128,6,0,252,0,0,0,0,0,0,0,0,0,192,0,0,0,0,0,0,0,0,0, +0,0,8,0,0,0,0,0,0,0,0,0,0,0,224,255,255,255,31,0,0,255,3,0,0,0,0,0,0,0,0,0,0, +0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,96,0,0,1,0,0,24,0,0,0,0,0,0,0,0,0,56,0,0,0,0,16,0,0,0,112,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,48,0,0,254,127,47,0,0,255,3,255,127,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,49,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,196,255,255,255, +255,0,0,0,192,0,0,0,0,0,0,0,0,1,0,224,159,0,0,0,0,127,63,255,127,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,16,0,16,0,0,252,255,255,255,31,0,0,0,0,0,12,0,0,0,0,0,0,64,0, +12,240,0,0,0,0,0,0,128,248,0,0,0,0,0,0,0,192,0,0,0,0,0,0,0,0,255,0,255,255, +255,33,144,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255, +127,0,224,251,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,160,3,224,0,224,0, +224,0,96,128,248,255,255,255,252,255,255,255,255,255,127,223,255,241,127,255, +127,0,0,255,255,255,255,0,0,255,255,255,255,1,0,123,3,208,193,175,66,0,12,31, +188,255,255,0,0,0,0,0,14,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,127,0,0,0,255,7,0,0,255,255,255,255,255,255,255,255,255, +255,63,0,0,0,0,0,0,252,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,207,255,255,255, +63,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,224,135,3,254,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,127,255,255,255,255,0, +0,0,0,0,0,255,255,255,251,255,255,255,255,255,255,255,255,255,255,15,0,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,63,0,0,0,255,15,30,255,255,255,1,252,193,224,0,0,0,0, +0,0,0,0,0,0,0,30,1,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +255,255,0,0,0,0,255,255,255,255,15,0,0,0,255,255,255,127,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255, +255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255, +255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,127,0,0,0, +0,0,0,192,0,224,0,0,0,0,0,0,0,0,0,0,0,128,15,112,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +255,0,255,255,127,0,3,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +64,0,0,0,0,15,255,3,0,0,0,0,0,0,240,0,0,0,0,0,0,0,0,0,16,192,0,0,255,255,3,23, +0,0,0,0,0,248,0,0,0,0,8,128,0,0,0,0,0,0,0,0,0,0,8,0,255,63,0,192,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,240,0,0,128,3,0,0,0,0,0,0,0,128,2,0,0,192,0,0,67,0,0,0,0,0, +0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,56,0, +0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,2,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,252,255,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,192,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,48,255,255,255,3,255,255,255,255,255,255,247, +255,127,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,254,255,0,252,1,0,0,248,1,0, +0,248,63,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,127,127,0,48,135,255,255,255,255,255, +143,255,0,0,0,0,0,0,224,255,255,127,255,15,1,0,0,0,0,0,255,255,255,255,255,63, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255, +15,0,0,0,0,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +128,255,0,0,128,255,0,0,0,0,128,255,0,0,0,0,0,0,0,0,0,248,0,0,192,143,0,0,0, +128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,48,255,255,252,255,255,255,255,255,0,0,0,0, +0,0,0,135,255,1,255,1,0,0,0,224,0,0,0,224,0,0,0,0,0,1,0,0,96,248,127,0,0,0,0, +0,0,0,0,254,0,0,0,255,0,0,0,255,0,0,0,30,0,254,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,252,0,0,0,0,0,0,0,0,0,0,0, +0,255,255,255,127,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,224,127,0,0,0,192,255,255,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,192,63,252,255,63,0,0,128,3,0,0,0,0,0,0,254,3,32,0,0,0,0,0,0,0, +0,0,0,0,0,24,0,15,0,0,0,0,0,56,0,0,0,0,0,0,0,0,0,225,63,0,232,254,255,31,0,0, +0,0,0,0,0,96,63,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0, +24,0,32,0,0,192,31,31,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,68, +248,0,104,0,0,0,0,0,0,0,0,0,0,0,0,76,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,128,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,128,14,0,0,0,255, +31,0,0,0,0,0,0,0,0,192,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,8,0,252,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,252,7,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,24,128,255,0,0,0,0,0, +0,0,0,0,0,223,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,62,0,0,252,255,31,3,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,52,0,0,0,0,0,0,0,0,0,128,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,128,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255, +255,3, +128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,31,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,255,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,192,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,63,0,0,0,0,0,0,0,255,255,48,0,0,248, +3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255, +255,255,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,176,15,0,0,0,0,0,0, +0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,63, +0,255,255,255,255,127,254,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,1,0,0,255,255,255,255,255,255,255,255, +63,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,15,0,255,255,255,255,255,255, +255,255,255,255,127,0,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,8,0,0,0,8,0,0,32,0,0,0,32,0,0,128, +0,0,0,128,0,0,0,2,0,0,0,2,0,0,8,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,15,0,248,254,255,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,127,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,240,0, +128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,255,127,0,0,0,0,0,0,0, +0,0,0,0,0,0,112,7,0,192,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,254,255,255,255,255,255,255,255,31,0,0,0,0,0,0,0,0,0,254,255, +255,255,255,255,255,63,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,255,255,255,255,255, +15,255,255,255,255,255,255,255,255,255,255,255,255,15,0,255,127,254,255,254, +255,254,255,255,255,63,0,255,31,255,255,255,255,0,0,0,252,0,0,0,28,0,0,0,252, +255,255,255,31,0,0,0,0,0,0,192,255,255,255,7,0,255,255,255,255,255,15,255,1,3, +0,63,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,63,0,255,31,255,7,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,15,0,255,255,255,255,255,255,255,255,255,255,255,1, +255,15,0,0,255,15,255,255,255,255,255,255,255,0,255,3,255,255,255,255,255,0, +255,255,255,63,0,0,0,0,0,0,0,0,0,0,255,239,255,255,255,255,255,255,255,255, +255,255,255,255,123,252,255,255,255,255,231,199,255,255,255,231,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,15,0,255,63,15,7,7,0,63,0, +0,0,0,0,0,0,0,0,0,0,0,0, diff --git a/libc/str/strcasecmp16.c b/third_party/musl/strcasecmp16.c similarity index 100% rename from libc/str/strcasecmp16.c rename to third_party/musl/strcasecmp16.c diff --git a/libc/str/strncasecmp16.c b/third_party/musl/strncasecmp16.c similarity index 100% rename from libc/str/strncasecmp16.c rename to third_party/musl/strncasecmp16.c diff --git a/third_party/musl/towctrans.c b/third_party/musl/towctrans.c new file mode 100644 index 000000000..07c63f266 --- /dev/null +++ b/third_party/musl/towctrans.c @@ -0,0 +1,113 @@ +/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ +│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ +╚──────────────────────────────────────────────────────────────────────────────╝ +│ │ +│ Musl Libc │ +│ Copyright © 2005-2014 Rich Felker, et al. │ +│ │ +│ Permission is hereby granted, free of charge, to any person obtaining │ +│ a copy of this software and associated documentation files (the │ +│ "Software"), to deal in the Software without restriction, including │ +│ without limitation the rights to use, copy, modify, merge, publish, │ +│ distribute, sublicense, and/or sell copies of the Software, and to │ +│ permit persons to whom the Software is furnished to do so, subject to │ +│ the following conditions: │ +│ │ +│ The above copyright notice and this permission notice shall be │ +│ included in all copies or substantial portions of the Software. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ +│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ +│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ +│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ +│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ +│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ +│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ +│ │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include +#include +__static_yoink("musl_libc_notice"); + +static const unsigned char tab[]; + +static const unsigned char rulebases[512]; +static const int rules[]; + +static const unsigned char exceptions[][2]; + +#include "casemap.inc" + +static int casemap(unsigned c, int dir) +{ + unsigned b, x, y, v, rt, xb, xn; + int r, rd, c0 = c; + + if (c >= 0x20000) return c; + + b = c>>8; + c &= 255; + x = c/3; + y = c%3; + + /* lookup entry in two-level base-6 table */ + v = tab[tab[b]*86+x]; + static const int mt[] = { 2048, 342, 57 }; + v = (v*mt[y]>>11)%6; + + /* use the bit vector out of the tables as an index into + * a block-specific set of rules and decode the rule into + * a type and a case-mapping delta. */ + r = rules[rulebases[b]+v]; + rt = r & 255; + rd = r >> 8; + + /* rules 0/1 are simple lower/upper case with a delta. + * apply according to desired mapping direction. */ + if (rt < 2) return c0 + (rd & -(rt^dir)); + + /* binary search. endpoints of the binary search for + * this block are stored in the rule delta field. */ + xn = rd & 0xff; + xb = (unsigned)rd >> 8; + while (xn) { + unsigned try = exceptions[xb+xn/2][0]; + if (try == c) { + r = rules[exceptions[xb+xn/2][1]]; + rt = r & 255; + rd = r >> 8; + if (rt < 2) return c0 + (rd & -(rt^dir)); + /* Hard-coded for the four exceptional titlecase */ + return c0 + (dir ? -1 : 1); + } else if (try > c) { + xn /= 2; + } else { + xb += xn/2; + xn -= xn/2; + } + } + return c0; +} + +wint_t towlower(wint_t wc) +{ + return casemap(wc, 0); +} + +wint_t towupper(wint_t wc) +{ + return casemap(wc, 1); +} + +wint_t __towupper_l(wint_t c, locale_t l) +{ + return towupper(c); +} + +wint_t __towlower_l(wint_t c, locale_t l) +{ + return towlower(c); +} + +__weak_reference(__towupper_l, towupper_l); +__weak_reference(__towlower_l, towlower_l); diff --git a/libc/str/wcscasecmp.c b/third_party/musl/wcscasecmp.c similarity index 100% rename from libc/str/wcscasecmp.c rename to third_party/musl/wcscasecmp.c diff --git a/libc/str/wcsncasecmp.c b/third_party/musl/wcsncasecmp.c similarity index 100% rename from libc/str/wcsncasecmp.c rename to third_party/musl/wcsncasecmp.c diff --git a/third_party/musl/wctrans.c b/third_party/musl/wctrans.c new file mode 100644 index 000000000..ce57220fb --- /dev/null +++ b/third_party/musl/wctrans.c @@ -0,0 +1,30 @@ +#include +#include +#include + +wctrans_t wctrans(const char *class) +{ + if (!strcmp(class, "toupper")) return (wctrans_t)1; + if (!strcmp(class, "tolower")) return (wctrans_t)2; + return 0; +} + +wint_t towctrans(wint_t wc, wctrans_t trans) +{ + if (trans == (wctrans_t)1) return towupper(wc); + if (trans == (wctrans_t)2) return towlower(wc); + return wc; +} + +wctrans_t __wctrans_l(const char *s, locale_t l) +{ + return wctrans(s); +} + +wint_t __towctrans_l(wint_t c, wctrans_t t, locale_t l) +{ + return towctrans(c, t); +} + +__weak_reference(__wctrans_l, wctrans_l); +__weak_reference(__towctrans_l, towctrans_l); diff --git a/third_party/tr/BUILD.mk b/third_party/tr/BUILD.mk index 54a17731b..313bfe150 100644 --- a/third_party/tr/BUILD.mk +++ b/third_party/tr/BUILD.mk @@ -22,7 +22,8 @@ THIRD_PARTY_TR_DIRECTDEPS = \ LIBC_RUNTIME \ LIBC_STDIO \ LIBC_STR \ - THIRD_PARTY_GETOPT + THIRD_PARTY_GETOPT \ + THIRD_PARTY_MUSL \ THIRD_PARTY_TR_DEPS := \ $(call uniq,$(foreach x,$(THIRD_PARTY_TR_DIRECTDEPS),$($(x)))) diff --git a/tool/plinko/lib/iswide.c b/tool/plinko/lib/iswide.c index dcad18abf..087a21df3 100644 --- a/tool/plinko/lib/iswide.c +++ b/tool/plinko/lib/iswide.c @@ -16,327 +16,12 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.h" +#include "libc/str/unicode.h" #include "tool/plinko/lib/char.h" -static const unsigned short kWides[][2] = { - {0x1100, 0x115F}, // HANGUL CHOSEONG KIYEOK..HANGUL CHOSEONG FILLER - {0x231A, 0x231B}, // WATCH..HOURGLASS - {0x2329, 0x2329}, // LEFT-POINTING ANGLE BRACKET - {0x232A, 0x232A}, // RIGHT-POINTING ANGLE BRACKET - {0x23E9, 0x23EC}, // BLACK RIGHT-POINTING DOUBLE TRIANGLE... - {0x23F0, 0x23F0}, // ALARM CLOCK - {0x23F3, 0x23F3}, // HOURGLASS WITH FLOWING SAND - {0x25FD, 0x25FE}, // WHITE MEDIUM SMALL SQUARE..BLACK MEDIUM SMALL SQUARE - {0x2614, 0x2615}, // UMBRELLA WITH RAIN DROPS..HOT BEVERAGE - {0x2648, 0x2653}, // ARIES..PISCES - {0x267F, 0x267F}, // WHEELCHAIR SYMBOL - {0x2693, 0x2693}, // ANCHOR - {0x26A1, 0x26A1}, // HIGH VOLTAGE SIGN - {0x26AA, 0x26AB}, // MEDIUM WHITE CIRCLE..MEDIUM BLACK CIRCLE - {0x26BD, 0x26BE}, // SOCCER BALL..BASEBALL - {0x26C4, 0x26C5}, // SNOWMAN WITHOUT SNOW..SUN BEHIND CLOUD - {0x26CE, 0x26CE}, // OPHIUCHUS - {0x26D4, 0x26D4}, // NO ENTRY - {0x26EA, 0x26EA}, // CHURCH - {0x26F2, 0x26F3}, // FOUNTAIN..FLAG IN HOLE - {0x26F5, 0x26F5}, // SAILBOAT - {0x26FA, 0x26FA}, // TENT - {0x26FD, 0x26FD}, // FUEL PUMP - {0x2705, 0x2705}, // WHITE HEAVY CHECK MARK - {0x270A, 0x270B}, // RaiseD FIST..RaiseD HAND - {0x2728, 0x2728}, // SPARKLES - {0x274C, 0x274C}, // CROSS MARK - {0x274E, 0x274E}, // NEGATIVE SQUARED CROSS MARK - {0x2753, 0x2755}, // BLACK QUESTION MARK ORNAMENT..WHITE EXCLAMATION MARK - {0x2757, 0x2757}, // HEAVY EXCLAMATION MARK SYMBOL - {0x2795, 0x2797}, // HEAVY PLUS SIGN..HEAVY DIVISION SIGN - {0x27B0, 0x27B0}, // CURLY LOOP - {0x27BF, 0x27BF}, // DOUBLE CURLY LOOP - {0x2B1B, 0x2B1C}, // BLACK LARGE SQUARE..WHITE LARGE SQUARE - {0x2B50, 0x2B50}, // WHITE MEDIUM STAR - {0x2B55, 0x2B55}, // HEAVY LARGE CIRCLE - {0x2E80, 0x2E99}, // CJK RADICAL REPEAT..CJK RADICAL RAP - {0x2E9B, 0x2EF3}, // CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE - {0x2F00, 0x2FD5}, // KANGXI RADICAL ONE..KANGXI RADICAL FLUTE - {0x2FF0, 0x2FFB}, // IDEOGRAPHIC DESCRIPTION CHARACTER LTR..OVERLAID - {0x3000, 0x3000}, // IDEOGRAPHIC SPACE - {0x3001, 0x3003}, // IDEOGRAPHIC COMMA..DITTO MARK - {0x3004, 0x3004}, // JAPANESE INDUSTRIAL STANDARD SYMBOL - {0x3005, 0x3005}, // IDEOGRAPHIC ITERATION MARK - {0x3006, 0x3006}, // IDEOGRAPHIC CLOSING MARK - {0x3007, 0x3007}, // IDEOGRAPHIC NUMBER ZERO - {0x3008, 0x3008}, // LEFT ANGLE BRACKET - {0x3009, 0x3009}, // RIGHT ANGLE BRACKET - {0x300A, 0x300A}, // LEFT DOUBLE ANGLE BRACKET - {0x300B, 0x300B}, // RIGHT DOUBLE ANGLE BRACKET - {0x300C, 0x300C}, // LEFT CORNER BRACKET - {0x300D, 0x300D}, // RIGHT CORNER BRACKET - {0x300E, 0x300E}, // LEFT WHITE CORNER BRACKET - {0x300F, 0x300F}, // RIGHT WHITE CORNER BRACKET - {0x3010, 0x3010}, // LEFT BLACK LENTICULAR BRACKET - {0x3011, 0x3011}, // RIGHT BLACK LENTICULAR BRACKET - {0x3012, 0x3013}, // POSTAL MARK..GETA MARK - {0x3014, 0x3014}, // LEFT TORTOISE SHELL BRACKET - {0x3015, 0x3015}, // RIGHT TORTOISE SHELL BRACKET - {0x3016, 0x3016}, // LEFT WHITE LENTICULAR BRACKET - {0x3017, 0x3017}, // RIGHT WHITE LENTICULAR BRACKET - {0x3018, 0x3018}, // LEFT WHITE TORTOISE SHELL BRACKET - {0x3019, 0x3019}, // RIGHT WHITE TORTOISE SHELL BRACKET - {0x301A, 0x301A}, // LEFT WHITE SQUARE BRACKET - {0x301B, 0x301B}, // RIGHT WHITE SQUARE BRACKET - {0x301C, 0x301C}, // WAVE DASH - {0x301D, 0x301D}, // REVERSED DOUBLE PRIME QUOTATION MARK - {0x301E, 0x301F}, // DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME - {0x3020, 0x3020}, // POSTAL MARK FACE - {0x3021, 0x3029}, // HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE - {0x302A, 0x302D}, // IDEOGRAPHIC LEVEL TONE MARK..ENTERING TONE MARK - {0x302E, 0x302F}, // HANGUL SINGLE DOT TONE MARK..DOUBLE DOT TONE MARK - {0x3030, 0x3030}, // WAVY DASH - {0x3031, 0x3035}, // VERTICAL KANA REPEAT MARK..KANA REPEAT MARK LOWER - {0x3036, 0x3037}, // CIRCLED POSTAL MARK..IDEOGRAPHIC TELEGRAPH LF SYMBOL - {0x3038, 0x303A}, // HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY - {0x303B, 0x303B}, // VERTICAL IDEOGRAPHIC ITERATION MARK - {0x303C, 0x303C}, // MASU MARK - {0x303D, 0x303D}, // PART ALTERNATION MARK - {0x303E, 0x303E}, // IDEOGRAPHIC VARIATION INDICATOR - {0x3041, 0x3096}, // HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE - {0x3099, 0x309A}, // COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK... - {0x309B, 0x309C}, // KATAKANA-HIRAGANA VOICED SOUND MARK... - {0x309D, 0x309E}, // HIRAGANA ITERATION MARK..VOICED ITERATION MARK - {0x309F, 0x309F}, // HIRAGANA DIGRAPH YORI - {0x30A0, 0x30A0}, // KATAKANA-HIRAGANA DOUBLE HYPHEN - {0x30A1, 0x30FA}, // KATAKANA LETTER SMALL A..KATAKANA LETTER VO - {0x30FB, 0x30FB}, // KATAKANA MIDDLE DOT - {0x30FC, 0x30FE}, // KATAKANA-HIRAGANA PROLONGED SOUND MARK..ITERATION - {0x30FF, 0x30FF}, // KATAKANA DIGRAPH KOTO - {0x3105, 0x312F}, // BOPOMOFO LETTER B..BOPOMOFO LETTER NN - {0x3131, 0x318E}, // HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE - {0x3190, 0x3191}, // IDEOGRAPHIC ANNOTATION LINKING MARK..REVERSE - {0x3192, 0x3195}, // IDEOGRAPHIC ANNOTATION ONE MARK..FOUR - {0x3196, 0x319F}, // IDEOGRAPHIC ANNOTATION TOP MARK..MAN - {0x31A0, 0x31BF}, // BOPOMOFO LETTER BU..BOPOMOFO LETTER AH - {0x31C0, 0x31E3}, // CJK STROKE T..CJK STROKE Q - {0x31F0, 0x31FF}, // KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO - {0x3200, 0x321E}, // PARENTHESIZED HANGUL KIYEOK..CHARACTER O HU - {0x3220, 0x3229}, // PARENTHESIZED IDEOGRAPH ONE..TEN - {0x322A, 0x3247}, // PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO - {0x3250, 0x3250}, // PARTNERSHIP SIGN - {0x3251, 0x325F}, // CIRCLED NUMBER TWENTY ONE..CIRCLED 35 - {0x3260, 0x327F}, // CIRCLED HANGUL KIYEOK..KOREAN STANDARD SYMBOL - {0x3280, 0x3289}, // CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN - {0x328A, 0x32B0}, // CIRCLED IDEOGRAPH MOON..CIRCLED IDEOGRAPH NIGHT - {0x32B1, 0x32BF}, // CIRCLED NUMBER THIRTY SIX..CIRCLED NUMBER FIFTY - {0x32C0, 0x32FF}, // TELEGRAPH SYMBOL FOR JANUARY..SQUARE ERA NAME REIWA - {0x3300, 0x33FF}, // SQUARE APAATO..SQUARE GAL - {0x3400, 0x4DBF}, // CJK UNIFIED IDEOGRAPH - {0x4E00, 0x9FFF}, // CJK UNIFIED IDEOGRAPH - {0xA000, 0xA014}, // YI SYLLABLE IT..YI SYLLABLE E - {0xA015, 0xA015}, // YI SYLLABLE WU - {0xA016, 0xA48C}, // YI SYLLABLE BIT..YI SYLLABLE YYR - {0xA490, 0xA4C6}, // YI RADICAL QOT..YI RADICAL KE - {0xA960, 0xA97C}, // HANGUL CHOSEONG TIKEUT-MIEUM..SSANGYEORINHIEUH - {0xAC00, 0xD7A3}, // HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH - {0xF900, 0xFA6D}, // CJK COMPATIBILITY IDEOGRAPH - {0xFA6E, 0xFA6F}, // RESERVED - {0xFA70, 0xFAD9}, // CJK COMPATIBILITY IDEOGRAPH - {0xFADA, 0xFAFF}, // RESERVED - {0xFE10, 0xFE16}, // PRESENTATION FORM FOR VERTICAL COMMA..QUESTION - {0xFE17, 0xFE17}, // VERTICAL LEFT WHITE LENTICULAR BRACKET - {0xFE18, 0xFE18}, // VERTICAL RIGHT WHITE LENTICULAR BRAKCET - {0xFE19, 0xFE19}, // PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS - {0xFE30, 0xFE30}, // PRESENTATION FORM FOR VERTICAL TWO DOT LEADER - {0xFE31, 0xFE32}, // VERTICAL EM DASH..VERTICAL EN DASH - {0xFE33, 0xFE34}, // VERTICAL LOW LINE..VERTICAL WAVY LOW LINE - {0xFE35, 0xFE35}, // PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS - {0xFE36, 0xFE36}, // PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS - {0xFE37, 0xFE37}, // PRESENTATION FORM FOR VERTICAL LEFT CURLY BRACKET - {0xFE38, 0xFE38}, // PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET - {0xFE39, 0xFE39}, // VERTICAL LEFT TORTOISE SHELL BRACKET - {0xFE3A, 0xFE3A}, // VERTICAL RIGHT TORTOISE SHELL BRACKET - {0xFE3B, 0xFE3B}, // VERTICAL LEFT BLACK LENTICULAR BRACKET - {0xFE3C, 0xFE3C}, // VERTICAL RIGHT BLACK LENTICULAR BRACKET - {0xFE3D, 0xFE3D}, // VERTICAL LEFT DOUBLE ANGLE BRACKET - {0xFE3E, 0xFE3E}, // VERTICAL RIGHT DOUBLE ANGLE BRACKET - {0xFE3F, 0xFE3F}, // VERTICAL LEFT ANGLE BRACKET - {0xFE40, 0xFE40}, // VERTICAL RIGHT ANGLE BRACKET - {0xFE41, 0xFE41}, // VERTICAL LEFT CORNER BRACKET - {0xFE42, 0xFE42}, // VERTICAL RIGHT CORNER BRACKET - {0xFE43, 0xFE43}, // VERTICAL LEFT WHITE CORNER BRACKET - {0xFE44, 0xFE44}, // VERTICAL RIGHT WHITE CORNER BRACKET - {0xFE45, 0xFE46}, // SESAME DOT..WHITE SESAME DOT - {0xFE47, 0xFE47}, // VERTICAL LEFT SQUARE BRACKET - {0xFE48, 0xFE48}, // VERTICAL RIGHT SQUARE BRACKET - {0xFE49, 0xFE4C}, // DASHED OVERLINE..DOUBLE WAVY OVERLINE - {0xFE4D, 0xFE4F}, // DASHED LOW LINE..WAVY LOW LINE - {0xFE50, 0xFE52}, // SMALL COMMA..SMALL FULL STOP - {0xFE54, 0xFE57}, // SMALL SEMICOLON..SMALL EXCLAMATION MARK - {0xFE58, 0xFE58}, // SMALL EM DASH - {0xFE59, 0xFE59}, // SMALL LEFT PARENTHESIS - {0xFE5A, 0xFE5A}, // SMALL RIGHT PARENTHESIS - {0xFE5B, 0xFE5B}, // SMALL LEFT CURLY BRACKET - {0xFE5C, 0xFE5C}, // SMALL RIGHT CURLY BRACKET - {0xFE5D, 0xFE5D}, // SMALL LEFT TORTOISE SHELL BRACKET - {0xFE5E, 0xFE5E}, // SMALL RIGHT TORTOISE SHELL BRACKET - {0xFE5F, 0xFE61}, // SMALL NUMBER SIGN..SMALL ASTERISK - {0xFE62, 0xFE62}, // SMALL PLUS SIGN - {0xFE63, 0xFE63}, // SMALL HYPHEN-MINUS - {0xFE64, 0xFE66}, // SMALL LESS-THAN SIGN..SMALL EQUALS SIGN - {0xFE68, 0xFE68}, // SMALL REVERSE SOLIDUS - {0xFE69, 0xFE69}, // SMALL DOLLAR SIGN - {0xFE6A, 0xFE6B}, // SMALL PERCENT SIGN..SMALL COMMERCIAL AT - {0xFF01, 0xFF03}, // EXCLAMATION MARK..NUMBER SIGN - {0xFF04, 0xFF04}, // DOLLAR SIGN - {0xFF05, 0xFF07}, // PERCENT SIGN..APOSTROPHE - {0xFF08, 0xFF08}, // LEFT PARENTHESIS - {0xFF09, 0xFF09}, // RIGHT PARENTHESIS - {0xFF0A, 0xFF0A}, // ASTERISK - {0xFF0B, 0xFF0B}, // PLUS SIGN - {0xFF0C, 0xFF0C}, // COMMA - {0xFF0D, 0xFF0D}, // HYPHEN-MINUS - {0xFF0E, 0xFF0F}, // FULL STOP..SOLIDUS - {0xFF10, 0xFF19}, // DIGIT ZERO..DIGIT NINE - {0xFF1A, 0xFF1B}, // COLON..SEMICOLON - {0xFF1C, 0xFF1E}, // LESS-THAN..GREATER-THAN - {0xFF1F, 0xFF20}, // QUESTION MARK..COMMERCIAL AT - {0xFF21, 0xFF3A}, // LATIN CAPITAL LETTER A..Z - {0xFF3B, 0xFF3B}, // LEFT SQUARE BRACKET - {0xFF3C, 0xFF3C}, // REVERSE SOLIDUS - {0xFF3D, 0xFF3D}, // RIGHT SQUARE BRACKET - {0xFF3E, 0xFF3E}, // CIRCUMFLEX ACCENT - {0xFF3F, 0xFF3F}, // LOW LINE - {0xFF40, 0xFF40}, // GRAVE ACCENT - {0xFF41, 0xFF5A}, // LATIN SMALL LETTER A..Z - {0xFF5B, 0xFF5B}, // LEFT CURLY BRACKET - {0xFF5C, 0xFF5C}, // VERTICAL LINE - {0xFF5D, 0xFF5D}, // RIGHT CURLY BRACKET - {0xFF5E, 0xFF5E}, // TILDE - {0xFF5F, 0xFF5F}, // LEFT WHITE PARENTHESIS - {0xFF60, 0xFF60}, // RIGHT WHITE PARENTHESIS - {0xFFE0, 0xFFE1}, // CENT SIGN..POUND SIGN - {0xFFE2, 0xFFE2}, // NOT SIGN - {0xFFE3, 0xFFE3}, // MACRON - {0xFFE4, 0xFFE4}, // BROKEN BAR - {0xFFE5, 0xFFE6}, // YEN SIGN..WON SIGN -}; - -static const int kAstralWides[][2] = { - {0x16FE0, 0x16FE1}, // TANGUT ITERATION MARK..NUSHU ITERATION MARK - {0x16FE2, 0x16FE2}, // OLD CHINESE HOOK MARK - {0x16FE3, 0x16FE3}, // OLD CHINESE ITERATION MARK - {0x16FE4, 0x16FE4}, // KHITAN SMALL SCRIPT FILLER - {0x16FF0, 0x16FF1}, // VIETNAMESE ALTERNATE READING MARK CA..NHAY - {0x17000, 0x187F7}, // TANGUT IDEOGRAPH - {0x18800, 0x18AFF}, // TANGUT COMPONENT - {0x18B00, 0x18CD5}, // KHITAN SMALL SCRIPT CHARACTER - {0x18D00, 0x18D08}, // TANGUT IDEOGRAPH - {0x1AFF0, 0x1AFF3}, // KATAKANA LETTER MINNAN TONE-2..5 - {0x1AFF5, 0x1AFFB}, // KATAKANA LETTER MINNAN TONE-7..5 - {0x1AFFD, 0x1AFFE}, // KATAKANA LETTER MINNAN NASALIZED TONE-7..8 - {0x1B000, 0x1B0FF}, // KATAKANA LETTER ARCHAIC E..HENTAIGANA LETTER RE-2 - {0x1B100, 0x1B122}, // HENTAIGANA LETTER RE-3..KATAKANA LETTER ARCHAIC WU - {0x1B150, 0x1B152}, // HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO - {0x1B164, 0x1B167}, // KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N - {0x1B170, 0x1B2FB}, // NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB - {0x1F004, 0x1F004}, // MAHJONG TILE RED DRAGON - {0x1F0CF, 0x1F0CF}, // PLAYING CARD BLACK JOKER - {0x1F18E, 0x1F18E}, // NEGATIVE SQUARED AB - {0x1F191, 0x1F19A}, // SQUARED CL..SQUARED VS - {0x1F200, 0x1F202}, // SQUARE HIRAGANA HOKA..SQUARED KATAKANA SA - {0x1F210, 0x1F23B}, // SQUARED CJK UNIFIED IDEOGRAPH - {0x1F240, 0x1F248}, // TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH - {0x1F250, 0x1F251}, // CIRCLED IDEOGRAPH ADVANTAGE..ACCEPT - {0x1F260, 0x1F265}, // ROUNDED SYMBOL FOR FU..ROUNDED SYMBOL FOR CAI - {0x1F300, 0x1F320}, // CYCLONE..SHOOTING STAR - {0x1F32D, 0x1F335}, // HOT DOG..CACTUS - {0x1F337, 0x1F37C}, // TULIP..BABY BOTTLE - {0x1F37E, 0x1F393}, // BOTTLE WITH POPPING CORK..GRADUATION CAP - {0x1F3A0, 0x1F3CA}, // CAROUSEL HORSE..SWIMMER - {0x1F3CF, 0x1F3D3}, // CRICKET BAT AND BALL..TABLE TENNIS PADDLE AND BALL - {0x1F3E0, 0x1F3F0}, // HOUSE BUILDING..EUROPEAN CASTLE - {0x1F3F4, 0x1F3F4}, // WAVING BLACK FLAG - {0x1F3F8, 0x1F3FA}, // BADMINTON RACQUET AND SHUTTLECOCK..AMPHORA - {0x1F3FB, 0x1F3FF}, // EMOJI MODIFIER FITZPATRICK TYPE-1-2..6 - {0x1F400, 0x1F43E}, // RAT..PAW PRINTS - {0x1F440, 0x1F440}, // EYES - {0x1F442, 0x1F4FC}, // EAR..VIDEOCASSETTE - {0x1F4FF, 0x1F53D}, // PRAYER BEADS..DOWN-POINTING SMALL RED TRIANGLE - {0x1F54B, 0x1F54E}, // KAABA..MENORAH WITH NINE BRANCHES - {0x1F550, 0x1F567}, // CLOCK FACE ONE OCLOCK..CLOCK FACE TWELVE-THIRTY - {0x1F57A, 0x1F57A}, // MAN DANCING - {0x1F595, 0x1F596}, // REVERSED HAND WITH MIDDLE FINGER EXTENDED..FINGERS - {0x1F5A4, 0x1F5A4}, // BLACK HEART - {0x1F5FB, 0x1F5FF}, // MOUNT FUJI..MOYAI - {0x1F600, 0x1F64F}, // GRINNING FACE..PERSON WITH FOLDED HANDS - {0x1F680, 0x1F6C5}, // ROCKET..LEFT LUGGAGE - {0x1F6CC, 0x1F6CC}, // SLEEPING ACCOMMODATION - {0x1F6D0, 0x1F6D2}, // PLACE OF WORSHIP..SHOPPING TROLLEY - {0x1F6D5, 0x1F6D7}, // HINDU TEMPLE..ELEVATOR - {0x1F6DD, 0x1F6DF}, // PLAYGROUND SLIDE..RING BUOY - {0x1F6EB, 0x1F6EC}, // AIRPLANE DEPARTURE..AIRPLANE ARRIVING - {0x1F6F4, 0x1F6FC}, // SCOOTER..ROLLER SKATE - {0x1F7E0, 0x1F7EB}, // LARGE ORANGE CIRCLE..LARGE BROWN SQUARE - {0x1F7F0, 0x1F7F0}, // HEAVY EQUALS SIGN - {0x1F90C, 0x1F93A}, // PINCHED FINGERS..FENCER - {0x1F93C, 0x1F945}, // WRESTLERS..GOAL NET - {0x1F947, 0x1F9FF}, // FIRST PLACE MEDAL..NAZAR AMULET - {0x1FA70, 0x1FA74}, // BALLET SHOES..THONG SANDAL - {0x1FA78, 0x1FA7C}, // DROP OF BLOOD..CRUTCH - {0x1FA80, 0x1FA86}, // YO-YO..NESTING DOLLS - {0x1FA90, 0x1FAAC}, // RINGED PLANET..HAMSA - {0x1FAB0, 0x1FABA}, // FLY..NEST WITH EGGS - {0x1FAC0, 0x1FAC5}, // ANATOMICAL HEART..PERSON WITH CROWN - {0x1FAD0, 0x1FAD9}, // BLUEBERRIES..JAR - {0x1FAE0, 0x1FAE7}, // MELTING FACE..BUBBLES - {0x1FAF0, 0x1FAF6}, // HAND WITH INDEX FINGER THUMB CROSSED..HEART HANDS - {0x20000, 0x2A6DF}, // CJK UNIFIED IDEOGRAPH - {0x2A6E0, 0x2A6FF}, // RESERVED - {0x2A700, 0x2B738}, // CJK UNIFIED IDEOGRAPH - {0x2B739, 0x2B73F}, // RESERVED - {0x2B740, 0x2B81D}, // CJK UNIFIED IDEOGRAPH - {0x2B81E, 0x2B81F}, // RESERVED - {0x2B820, 0x2CEA1}, // CJK UNIFIED IDEOGRAPH - {0x2CEA2, 0x2CEAF}, // RESERVED - {0x2CEB0, 0x2EBE0}, // CJK UNIFIED IDEOGRAPH - {0x2EBE1, 0x2F7FF}, // RESERVED - {0x2F800, 0x2FA1D}, // CJK COMPATIBILITY IDEOGRAPH - {0x2FA1E, 0x2FA1F}, // RESERVED - {0x2FA20, 0x2FFFD}, // RESERVED - {0x30000, 0x3134A}, // CJK UNIFIED IDEOGRAPH - {0x3134B, 0x3FFFD}, // RESERVED -}; - -pureconst bool IsWide(int c) { - int m, l, r, n; - if (c < 0x1100) { - return false; - } else if (c < 0x10000) { - l = 0; - r = n = sizeof(kWides) / sizeof(kWides[0]); - while (l < r) { - m = (l & r) + ((l ^ r) >> 1); // floor((a+b)/2) - if (kWides[m][1] < c) { - l = m + 1; - } else { - r = m; - } - } - return l < n && kWides[l][0] <= c && c <= kWides[l][1]; - } else { - l = 0; - r = n = sizeof(kAstralWides) / sizeof(kAstralWides[0]); - while (l < r) { - m = (l & r) + ((l ^ r) >> 1); // floor((a+b)/2) - if (kAstralWides[m][1] < c) { - l = m + 1; - } else { - r = m; - } - } - return l < n && kAstralWides[l][0] <= c && c <= kAstralWides[l][1]; - } -} - -pureconst int GetMonospaceCharacterWidth(int c) { - return !IsControl(c) + IsWide(c); +int GetMonospaceCharacterWidth(int c) { + int w = wcwidth(c); + if (w < 0) + w = 0; + return w; }