mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-05-22 21:32:31 +00:00
Improve some unicode functions
This commit is contained in:
parent
b9187061a7
commit
1b5a5719c3
33 changed files with 8366 additions and 197 deletions
|
@ -19,7 +19,8 @@
|
|||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Returns true if c is space, \t, \r, \n, \f, or \v.
|
||||
* Returns nonzero if c is space, \t, \r, \n, \f, or \v.
|
||||
* @see isblank()
|
||||
*/
|
||||
int isspace(int c) {
|
||||
return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f' ||
|
||||
|
|
|
@ -18,6 +18,9 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/str/str.h"
|
||||
|
||||
int iswalnum(wint_t wc) {
|
||||
return isalnum(wc);
|
||||
/**
|
||||
* Returns nonzero if c is lower, alpha, or digit.
|
||||
*/
|
||||
int iswalnum(wint_t c) {
|
||||
return iswdigit(c) || iswalpha(c);
|
||||
}
|
||||
|
|
|
@ -18,6 +18,9 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/str/str.h"
|
||||
|
||||
int iswalpha(wint_t wc) {
|
||||
return isalpha(wc);
|
||||
/**
|
||||
* Returns nonzero if c is alphabetical.
|
||||
*/
|
||||
int iswalpha(wint_t c) {
|
||||
return iswupper(c) || iswlower(c);
|
||||
}
|
||||
|
|
|
@ -18,6 +18,9 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/str/str.h"
|
||||
|
||||
int iswblank(wint_t wc) {
|
||||
return isblank(wc);
|
||||
/**
|
||||
* Returns nonzero if c is space or tab.
|
||||
*/
|
||||
int iswblank(wint_t c) {
|
||||
return c == ' ' || c == '\t';
|
||||
}
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Returns nonzero if wc is C0 or C1 control code.
|
||||
* Returns nonzero if c is C0 or C1 control code.
|
||||
*/
|
||||
int iswcntrl(wint_t wc) {
|
||||
return (0x00 <= wc && wc <= 0x1F) || (0x7F <= wc && wc <= 0x9F);
|
||||
int iswcntrl(wint_t c) {
|
||||
return (0x00 <= c && c <= 0x1F) || (0x7F <= c && c <= 0x9F);
|
||||
}
|
||||
|
|
|
@ -19,71 +19,30 @@
|
|||
#include "libc/macros.internal.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
#define ALNUM 1
|
||||
#define ALPHA 2
|
||||
#define BLANK 3
|
||||
#define CNTRL 4
|
||||
#define DIGIT 5
|
||||
#define GRAPH 6
|
||||
#define LOWER 7
|
||||
#define PRINT 8
|
||||
#define PUNCT 9
|
||||
#define SPACE 10
|
||||
#define UPPER 11
|
||||
#define XDIGIT 12
|
||||
|
||||
static const struct {
|
||||
char name[7];
|
||||
char type;
|
||||
} kWcTypes[] = {
|
||||
{"alnum", ALNUM}, {"alpha", ALPHA}, {"blank", BLANK}, {"cntrl", CNTRL},
|
||||
{"digit", DIGIT}, {"graph", GRAPH}, {"lower", LOWER}, {"print", PRINT},
|
||||
{"punct", PUNCT}, {"space", SPACE}, {"upper", UPPER}, {"xdigit", XDIGIT},
|
||||
static const int (*const kWcTypeFuncs[])(wint_t) = {
|
||||
iswalnum, //
|
||||
iswalpha, //
|
||||
iswblank, //
|
||||
iswcntrl, //
|
||||
iswdigit, //
|
||||
iswgraph, //
|
||||
iswlower, //
|
||||
iswprint, //
|
||||
iswpunct, //
|
||||
iswspace, //
|
||||
iswupper, //
|
||||
iswxdigit, //
|
||||
};
|
||||
|
||||
static int CompareStrings(const char *l, const char *r) {
|
||||
size_t i = 0;
|
||||
while (l[i] == r[i] && r[i]) ++i;
|
||||
return (l[i] & 0xff) - (r[i] & 0xff);
|
||||
}
|
||||
|
||||
wctype_t wctype(const char *name) {
|
||||
unsigned i;
|
||||
for (i = 0; i < ARRAYLEN(kWcTypes); ++i) {
|
||||
if (CompareStrings(name, kWcTypes[i].name) == 0) {
|
||||
return kWcTypes[i].type;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int iswctype(wint_t wc, wctype_t type) {
|
||||
switch (type) {
|
||||
case ALNUM:
|
||||
return iswalnum(wc);
|
||||
case ALPHA:
|
||||
return iswalpha(wc);
|
||||
case BLANK:
|
||||
return iswblank(wc);
|
||||
case CNTRL:
|
||||
return iswcntrl(wc);
|
||||
case DIGIT:
|
||||
return iswdigit(wc);
|
||||
case GRAPH:
|
||||
return iswgraph(wc);
|
||||
case LOWER:
|
||||
return iswlower(wc);
|
||||
case PRINT:
|
||||
return iswprint(wc);
|
||||
case PUNCT:
|
||||
return iswpunct(wc);
|
||||
case SPACE:
|
||||
return iswspace(wc);
|
||||
case UPPER:
|
||||
return iswupper(wc);
|
||||
case XDIGIT:
|
||||
return iswxdigit(wc);
|
||||
default:
|
||||
return 0;
|
||||
/**
|
||||
* Returns nonzero if c has property.
|
||||
*
|
||||
* @param t is number returned by wctype
|
||||
*/
|
||||
int iswctype(wint_t c, wctype_t t) {
|
||||
if (1 <= t && t <= ARRAYLEN(kWcTypeFuncs)) {
|
||||
return kWcTypeFuncs[t - 1](c);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,6 +18,9 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/str/str.h"
|
||||
|
||||
int iswdigit(wint_t wc) {
|
||||
return isdigit(wc);
|
||||
/**
|
||||
* Returns nonzero if c is decimal digit.
|
||||
*/
|
||||
int iswdigit(wint_t c) {
|
||||
return '0' <= c && c <= '9';
|
||||
}
|
||||
|
|
|
@ -18,6 +18,9 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/str/str.h"
|
||||
|
||||
int iswgraph(wint_t wc) {
|
||||
return isgraph(wc);
|
||||
/**
|
||||
* Returns nonzero if c is printable and not a space.
|
||||
*/
|
||||
int iswgraph(wint_t c) {
|
||||
return iswprint(c) && !iswspace(c);
|
||||
}
|
||||
|
|
2139
libc/str/iswlower.c
2139
libc/str/iswlower.c
File diff suppressed because it is too large
Load diff
|
@ -18,6 +18,10 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/str/str.h"
|
||||
|
||||
int iswprint(wint_t wc) {
|
||||
return !iswcntrl(wc);
|
||||
/**
|
||||
* Returns nonzero if c is printable.
|
||||
*/
|
||||
int iswprint(wint_t c) {
|
||||
return !((0x00 <= c && c <= 0x1F) || (0x7F <= c && c <= 0x9F) ||
|
||||
(0xFFF9 <= c && c <= 0xFFFB) || c == 0x2028 || c == 0x2029);
|
||||
}
|
||||
|
|
|
@ -18,6 +18,524 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/str/str.h"
|
||||
|
||||
int iswpunct(wint_t wc) {
|
||||
return ispunct(wc);
|
||||
/**
|
||||
* Returns nonzero if c is punctuation mark.
|
||||
*/
|
||||
int iswpunct(wint_t c) {
|
||||
if (c < 0xa0) {
|
||||
switch (c) {
|
||||
case '!':
|
||||
case '"':
|
||||
case '#':
|
||||
case '$':
|
||||
case '%':
|
||||
case '&':
|
||||
case '\'':
|
||||
case '(':
|
||||
case ')':
|
||||
case '*':
|
||||
case '+':
|
||||
case ',':
|
||||
case '-':
|
||||
case '.':
|
||||
case '/':
|
||||
case ':':
|
||||
case ';':
|
||||
case '<':
|
||||
case '=':
|
||||
case '>':
|
||||
case '?':
|
||||
case '@':
|
||||
case '[':
|
||||
case '\\':
|
||||
case ']':
|
||||
case '^':
|
||||
case '_':
|
||||
case '`':
|
||||
case '{':
|
||||
case '|':
|
||||
case '}':
|
||||
case '~':
|
||||
return 1;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
switch (c) {
|
||||
case u'¡': // INVERTED EXCLAMATION MARK (0x00a1 Po)
|
||||
case u'§': // SECTION SIGN (0x00a7 Po)
|
||||
case u'«': // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (0x00ab Pi)
|
||||
case u'¶': // PILCROW SIGN (0x00b6 Po)
|
||||
case u'·': // MIDDLE DOT (0x00b7 Po)
|
||||
case u'»': // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (0x00bb Pf)
|
||||
case u'¿': // INVERTED QUESTION MARK (0x00bf Po)
|
||||
case u';': // GREEK QUESTION MARK (0x037e Po)
|
||||
case u'·': // GREEK ANO TELEIA (0x0387 Po)
|
||||
case u'՚': // ARMENIAN APOSTROPHE (0x055a Po)
|
||||
case u'՛': // ARMENIAN EMPHASIS MARK (0x055b Po)
|
||||
case u'՜': // ARMENIAN EXCLAMATION MARK (0x055c Po)
|
||||
case u'՝': // ARMENIAN COMMA (0x055d Po)
|
||||
case u'՞': // ARMENIAN QUESTION MARK (0x055e Po)
|
||||
case u'՟': // ARMENIAN ABBREVIATION MARK (0x055f Po)
|
||||
case u'։': // ARMENIAN FULL STOP (0x0589 Po)
|
||||
case u'֊': // ARMENIAN HYPHEN (0x058a Pd)
|
||||
case 0x05be: // HEBREW PUNCTUATION MAQAF (0x05be Pd)
|
||||
case 0x05c0: // HEBREW PUNCTUATION PASEQ (0x05c0 Po)
|
||||
case 0x05c3: // HEBREW PUNCTUATION SOF PASUQ (0x05c3 Po)
|
||||
case 0x05c6: // HEBREW PUNCTUATION NUN HAFUKHA (0x05c6 Po)
|
||||
case 0x05f3: // HEBREW PUNCTUATION GERESH (0x05f3 Po)
|
||||
case 0x05f4: // HEBREW PUNCTUATION GERSHAYIM (0x05f4 Po)
|
||||
case 0x0609: // ARABIC-INDIC PER MILLE SIGN (0x0609 Po)
|
||||
case 0x060a: // ARABIC-INDIC PER TEN THOUSAND SIGN (0x060a Po)
|
||||
case 0x060c: // ARABIC COMMA (0x060c Po)
|
||||
case 0x060d: // ARABIC DATE SEPARATOR (0x060d Po)
|
||||
case 0x061b: // ARABIC SEMICOLON (0x061b Po)
|
||||
case u'؞': // ARABIC TRIPLE DOT PUNCTUATION MARK (0x061e Po)
|
||||
case u'؟': // ARABIC QUESTION MARK (0x061f Po)
|
||||
case u'٪': // ARABIC PERCENT SIGN (0x066a Po)
|
||||
case u'٫': // ARABIC DECIMAL SEPARATOR (0x066b Po)
|
||||
case u'٬': // ARABIC THOUSANDS SEPARATOR (0x066c Po)
|
||||
case u'٭': // ARABIC FIVE POINTED STAR (0x066d Po)
|
||||
case u'۔': // ARABIC FULL STOP (0x06d4 Po)
|
||||
case u'߷': // NKO SYMBOL GBAKURUNEN (0x07f7 Po)
|
||||
case u'߸': // NKO COMMA (0x07f8 Po)
|
||||
case u'߹': // NKO EXCLAMATION MARK (0x07f9 Po)
|
||||
case u'।': // DEVANAGARI DANDA (0x0964 Po)
|
||||
case u'॥': // DEVANAGARI DOUBLE DANDA (0x0965 Po)
|
||||
case u'॰': // DEVANAGARI ABBREVIATION SIGN (0x0970 Po)
|
||||
case 0x09fd: // BENGALI ABBREVIATION SIGN (0x09fd Po)
|
||||
case 0x0a76: // GURMUKHI ABBREVIATION SIGN (0x0a76 Po)
|
||||
case 0x0af0: // GUJARATI ABBREVIATION SIGN (0x0af0 Po)
|
||||
case 0x0c77: // TELUGU SIGN SIDDHAM (0x0c77 Po)
|
||||
case 0x0c84: // KANNADA SIGN SIDDHAM (0x0c84 Po)
|
||||
case u'෴': // SINHALA PUNCTUATION KUNDDALIYA (0x0df4 Po)
|
||||
case u'๏': // THAI CHARACTER FONGMAN (0x0e4f Po)
|
||||
case u'๚': // THAI CHARACTER ANGKHANKHU (0x0e5a Po)
|
||||
case u'๛': // THAI CHARACTER KHOMUT (0x0e5b Po)
|
||||
case u'༄': // TIBETAN MARK INITIAL YIG MGO MDUN MA (0x0f04 Po)
|
||||
case u'༅': // TIBETAN MARK CLOSING YIG MGO SGAB MA (0x0f05 Po)
|
||||
case u'༆': // TIBETAN MARK CARET YIG MGO PHUR SHAD MA (0x0f06 Po)
|
||||
case u'༇': // TIBETAN MARK YIG MGO TSHEG SHAD MA (0x0f07 Po)
|
||||
case u'༈': // TIBETAN MARK SBRUL SHAD (0x0f08 Po)
|
||||
case u'༉': // TIBETAN MARK BSKUR YIG MGO (0x0f09 Po)
|
||||
case u'༊': // TIBETAN MARK BKA- SHOG YIG MGO (0x0f0a Po)
|
||||
case u'་': // TIBETAN MARK INTERSYLLABIC TSHEG (0x0f0b Po)
|
||||
case u'༌': // TIBETAN MARK DELIMITER TSHEG BSTAR (0x0f0c Po)
|
||||
case u'།': // TIBETAN MARK SHAD (0x0f0d Po)
|
||||
case u'༎': // TIBETAN MARK NYIS SHAD (0x0f0e Po)
|
||||
case u'༏': // TIBETAN MARK TSHEG SHAD (0x0f0f Po)
|
||||
case u'༐': // TIBETAN MARK NYIS TSHEG SHAD (0x0f10 Po)
|
||||
case u'༑': // TIBETAN MARK RIN CHEN SPUNGS SHAD (0x0f11 Po)
|
||||
case u'༒': // TIBETAN MARK RGYA GRAM SHAD (0x0f12 Po)
|
||||
case u'༔': // TIBETAN MARK GTER TSHEG (0x0f14 Po)
|
||||
case u'༺': // TIBETAN MARK GUG RTAGS GYON (0x0f3a Ps)
|
||||
case u'༻': // TIBETAN MARK GUG RTAGS GYAS (0x0f3b Pe)
|
||||
case u'༼': // TIBETAN MARK ANG KHANG GYON (0x0f3c Ps)
|
||||
case u'༽': // TIBETAN MARK ANG KHANG GYAS (0x0f3d Pe)
|
||||
case u'྅': // TIBETAN MARK PALUTA (0x0f85 Po)
|
||||
case u'࿐': // TIBETAN MARK BSKA- SHOG GI MGO RGYAN (0x0fd0 Po)
|
||||
case u'࿑': // TIBETAN MARK MNYAM YIG GI MGO RGYAN (0x0fd1 Po)
|
||||
case u'࿒': // TIBETAN MARK NYIS TSHEG (0x0fd2 Po)
|
||||
case u'࿓': // TIBETAN MARK INITIAL BRDA RNYING YIG MGO MDUN MA (0x0fd3 Po)
|
||||
case u'࿔': // TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA (0x0fd4 Po)
|
||||
case u'࿙': // TIBETAN MARK LEADING MCHAN RTAGS (0x0fd9 Po)
|
||||
case u'࿚': // TIBETAN MARK TRAILING MCHAN RTAGS (0x0fda Po)
|
||||
case u'၊': // MYANMAR SIGN LITTLE SECTION (0x104a Po)
|
||||
case u'။': // MYANMAR SIGN SECTION (0x104b Po)
|
||||
case u'၌': // MYANMAR SYMBOL LOCATIVE (0x104c Po)
|
||||
case u'၍': // MYANMAR SYMBOL COMPLETED (0x104d Po)
|
||||
case u'၎': // MYANMAR SYMBOL AFOREMENTIONED (0x104e Po)
|
||||
case u'၏': // MYANMAR SYMBOL GENITIVE (0x104f Po)
|
||||
case u'჻': // GEORGIAN PARAGRAPH SEPARATOR (0x10fb Po)
|
||||
case u'፠': // ETHIOPIC SECTION MARK (0x1360 Po)
|
||||
case u'፡': // ETHIOPIC WORDSPACE (0x1361 Po)
|
||||
case u'።': // ETHIOPIC FULL STOP (0x1362 Po)
|
||||
case u'፣': // ETHIOPIC COMMA (0x1363 Po)
|
||||
case u'፤': // ETHIOPIC SEMICOLON (0x1364 Po)
|
||||
case u'፥': // ETHIOPIC COLON (0x1365 Po)
|
||||
case u'፦': // ETHIOPIC PREFACE COLON (0x1366 Po)
|
||||
case u'፧': // ETHIOPIC QUESTION MARK (0x1367 Po)
|
||||
case u'፨': // ETHIOPIC PARAGRAPH SEPARATOR (0x1368 Po)
|
||||
case u'᐀': // CANADIAN SYLLABICS HYPHEN (0x1400 Pd)
|
||||
case u'᙮': // CANADIAN SYLLABICS FULL STOP (0x166e Po)
|
||||
case u'᚛': // OGHAM FEATHER MARK (0x169b Ps)
|
||||
case u'᚜': // OGHAM REVERSED FEATHER MARK (0x169c Pe)
|
||||
case u'᛫': // RUNIC SINGLE PUNCTUATION (0x16eb Po)
|
||||
case u'᛬': // RUNIC MULTIPLE PUNCTUATION (0x16ec Po)
|
||||
case u'᛭': // RUNIC CROSS PUNCTUATION (0x16ed Po)
|
||||
case u'᜵': // PHILIPPINE SINGLE PUNCTUATION (0x1735 Po)
|
||||
case u'᜶': // PHILIPPINE DOUBLE PUNCTUATION (0x1736 Po)
|
||||
case u'។': // KHMER SIGN KHAN (0x17d4 Po)
|
||||
case u'៕': // KHMER SIGN BARIYOOSAN (0x17d5 Po)
|
||||
case u'៖': // KHMER SIGN CAMNUC PII KUUH (0x17d6 Po)
|
||||
case u'៘': // KHMER SIGN BEYYAL (0x17d8 Po)
|
||||
case u'៙': // KHMER SIGN PHNAEK MUAN (0x17d9 Po)
|
||||
case u'៚': // KHMER SIGN KOOMUUT (0x17da Po)
|
||||
case u'᠀': // MONGOLIAN BIRGA (0x1800 Po)
|
||||
case u'᠁': // MONGOLIAN ELLIPSIS (0x1801 Po)
|
||||
case u'᠂': // MONGOLIAN COMMA (0x1802 Po)
|
||||
case u'᠃': // MONGOLIAN FULL STOP (0x1803 Po)
|
||||
case u'᠄': // MONGOLIAN COLON (0x1804 Po)
|
||||
case u'᠅': // MONGOLIAN FOUR DOTS (0x1805 Po)
|
||||
case u'᠆': // MONGOLIAN TODO SOFT HYPHEN (0x1806 Pd)
|
||||
case u'᠇': // MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER (0x1807 Po)
|
||||
case u'᠈': // MONGOLIAN MANCHU COMMA (0x1808 Po)
|
||||
case u'᠉': // MONGOLIAN MANCHU FULL STOP (0x1809 Po)
|
||||
case u'᠊': // MONGOLIAN NIRUGU (0x180a Po)
|
||||
case u'᥄': // LIMBU EXCLAMATION MARK (0x1944 Po)
|
||||
case u'᥅': // LIMBU QUESTION MARK (0x1945 Po)
|
||||
case u'᨞': // BUGINESE PALLAWA (0x1a1e Po)
|
||||
case u'᨟': // BUGINESE END OF SECTION (0x1a1f Po)
|
||||
case u'᱾': // OL CHIKI PUNCTUATION MUCAAD (0x1c7e Po)
|
||||
case u'᱿': // OL CHIKI PUNCTUATION DOUBLE MUCAAD (0x1c7f Po)
|
||||
case u'‐': // HYPHEN (0x2010 Pd)
|
||||
case u'‑': // NON-BREAKING HYPHEN (0x2011 Pd)
|
||||
case u'‒': // FIGURE DASH (0x2012 Pd)
|
||||
case u'–': // EN DASH (0x2013 Pd)
|
||||
case u'—': // EM DASH (0x2014 Pd)
|
||||
case u'―': // HORIZONTAL BAR (0x2015 Pd)
|
||||
case u'‖': // DOUBLE VERTICAL LINE (0x2016 Po)
|
||||
case u'‗': // DOUBLE LOW LINE (0x2017 Po)
|
||||
case u'‘': // LEFT SINGLE QUOTATION MARK (0x2018 Pi)
|
||||
case u'’': // RIGHT SINGLE QUOTATION MARK (0x2019 Pf)
|
||||
case u'‚': // SINGLE LOW-9 QUOTATION MARK (0x201a Ps)
|
||||
case u'‛': // SINGLE HIGH-REVERSED-9 QUOTATION MARK (0x201b Pi)
|
||||
case u'“': // LEFT DOUBLE QUOTATION MARK (0x201c Pi)
|
||||
case u'”': // RIGHT DOUBLE QUOTATION MARK (0x201d Pf)
|
||||
case u'„': // DOUBLE LOW-9 QUOTATION MARK (0x201e Ps)
|
||||
case u'‟': // DOUBLE HIGH-REVERSED-9 QUOTATION MARK (0x201f Pi)
|
||||
case u'†': // DAGGER (0x2020 Po)
|
||||
case u'‡': // DOUBLE DAGGER (0x2021 Po)
|
||||
case u'•': // BULLET (0x2022 Po)
|
||||
case u'‣': // TRIANGULAR BULLET (0x2023 Po)
|
||||
case u'․': // ONE DOT LEADER (0x2024 Po)
|
||||
case u'‥': // TWO DOT LEADER (0x2025 Po)
|
||||
case u'…': // HORIZONTAL ELLIPSIS (0x2026 Po)
|
||||
case u'‧': // HYPHENATION POINT (0x2027 Po)
|
||||
case u'‰': // PER MILLE SIGN (0x2030 Po)
|
||||
case u'‱': // PER TEN THOUSAND SIGN (0x2031 Po)
|
||||
case u'′': // PRIME (0x2032 Po)
|
||||
case u'″': // DOUBLE PRIME (0x2033 Po)
|
||||
case u'‴': // TRIPLE PRIME (0x2034 Po)
|
||||
case u'‵': // REVERSED PRIME (0x2035 Po)
|
||||
case u'‶': // REVERSED DOUBLE PRIME (0x2036 Po)
|
||||
case u'‷': // REVERSED TRIPLE PRIME (0x2037 Po)
|
||||
case u'‸': // CARET (0x2038 Po)
|
||||
case u'‹': // SINGLE LEFT-POINTING ANGLE QUOTATION MARK (0x2039 Pi)
|
||||
case u'›': // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK (0x203a Pf)
|
||||
case u'※': // REFERENCE MARK (0x203b Po)
|
||||
case u'‼': // DOUBLE EXCLAMATION MARK (0x203c Po)
|
||||
case u'‽': // INTERROBANG (0x203d Po)
|
||||
case u'‾': // OVERLINE (0x203e Po)
|
||||
case u'‿': // UNDERTIE (0x203f Pc)
|
||||
case u'⁀': // CHARACTER TIE (0x2040 Pc)
|
||||
case u'⁁': // CARET INSERTION POINT (0x2041 Po)
|
||||
case u'⁂': // ASTERISM (0x2042 Po)
|
||||
case u'⁃': // HYPHEN BULLET (0x2043 Po)
|
||||
case u'⁅': // LEFT SQUARE BRACKET WITH QUILL (0x2045 Ps)
|
||||
case u'⁆': // RIGHT SQUARE BRACKET WITH QUILL (0x2046 Pe)
|
||||
case u'⁇': // DOUBLE QUESTION MARK (0x2047 Po)
|
||||
case u'⁈': // QUESTION EXCLAMATION MARK (0x2048 Po)
|
||||
case u'⁉': // EXCLAMATION QUESTION MARK (0x2049 Po)
|
||||
case u'⁊': // TIRONIAN SIGN ET (0x204a Po)
|
||||
case u'⁋': // REVERSED PILCROW SIGN (0x204b Po)
|
||||
case u'⁌': // BLACK LEFTWARDS BULLET (0x204c Po)
|
||||
case u'⁍': // BLACK RIGHTWARDS BULLET (0x204d Po)
|
||||
case u'⁎': // LOW ASTERISK (0x204e Po)
|
||||
case u'⁏': // REVERSED SEMICOLON (0x204f Po)
|
||||
case u'⁐': // CLOSE UP (0x2050 Po)
|
||||
case u'⁑': // TWO ASTERISKS ALIGNED VERTICALLY (0x2051 Po)
|
||||
case u'⁓': // SWUNG DASH (0x2053 Po)
|
||||
case u'⁔': // INVERTED UNDERTIE (0x2054 Pc)
|
||||
case u'⁕': // FLOWER PUNCTUATION MARK (0x2055 Po)
|
||||
case u'⁖': // THREE DOT PUNCTUATION (0x2056 Po)
|
||||
case u'⁗': // QUADRUPLE PRIME (0x2057 Po)
|
||||
case u'⁘': // FOUR DOT PUNCTUATION (0x2058 Po)
|
||||
case u'⁙': // FIVE DOT PUNCTUATION (0x2059 Po)
|
||||
case u'⁚': // TWO DOT PUNCTUATION (0x205a Po)
|
||||
case u'⁛': // FOUR DOT MARK (0x205b Po)
|
||||
case u'⁜': // DOTTED CROSS (0x205c Po)
|
||||
case u'⁝': // TRICOLON (0x205d Po)
|
||||
case u'⁞': // VERTICAL FOUR DOTS (0x205e Po)
|
||||
case u'⁽': // SUPERSCRIPT LEFT PARENTHESIS (0x207d Ps)
|
||||
case u'⁾': // SUPERSCRIPT RIGHT PARENTHESIS (0x207e Pe)
|
||||
case u'₍': // SUBSCRIPT LEFT PARENTHESIS (0x208d Ps)
|
||||
case u'₎': // SUBSCRIPT RIGHT PARENTHESIS (0x208e Pe)
|
||||
case u'⌈': // LEFT CEILING (0x2308 Ps)
|
||||
case u'⌉': // RIGHT CEILING (0x2309 Pe)
|
||||
case u'⌊': // LEFT FLOOR (0x230a Ps)
|
||||
case u'⌋': // RIGHT FLOOR (0x230b Pe)
|
||||
case u'〈': // LEFT-POINTING ANGLE BRACKET (0x2329 Ps)
|
||||
case u'〉': // RIGHT-POINTING ANGLE BRACKET (0x232a Pe)
|
||||
case u'❨': // MEDIUM LEFT PARENTHESIS ORNAMENT (0x2768 Ps)
|
||||
case u'❩': // MEDIUM RIGHT PARENTHESIS ORNAMENT (0x2769 Pe)
|
||||
case u'❪': // MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT (0x276a Ps)
|
||||
case u'❫': // MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT (0x276b Pe)
|
||||
case u'❬': // MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT (0x276c Ps)
|
||||
case u'❭': // MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT (0x276d Pe)
|
||||
case u'❮': // HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT (0x276e Ps)
|
||||
case u'❯': // HEAVY RIGHT-POINTING ANGLE QUOT MARK ORNAMENT (0x276f Pe)
|
||||
case u'❰': // HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT (0x2770 Ps)
|
||||
case u'❱': // HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT (0x2771 Pe)
|
||||
case u'❲': // LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT (0x2772 Ps)
|
||||
case u'❳': // LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT (0x2773 Pe)
|
||||
case u'❴': // MEDIUM LEFT CURLY BRACKET ORNAMENT (0x2774 Ps)
|
||||
case u'❵': // MEDIUM RIGHT CURLY BRACKET ORNAMENT (0x2775 Pe)
|
||||
case u'⟅': // LEFT S-SHAPED BAG DELIMITER (0x27c5 Ps)
|
||||
case u'⟆': // RIGHT S-SHAPED BAG DELIMITER (0x27c6 Pe)
|
||||
case u'⟦': // MATHEMATICAL LEFT WHITE SQUARE BRACKET (0x27e6 Ps)
|
||||
case u'⟧': // MATHEMATICAL RIGHT WHITE SQUARE BRACKET (0x27e7 Pe)
|
||||
case u'⟨': // MATHEMATICAL LEFT ANGLE BRACKET (0x27e8 Ps)
|
||||
case u'⟩': // MATHEMATICAL RIGHT ANGLE BRACKET (0x27e9 Pe)
|
||||
case u'⟪': // MATHEMATICAL LEFT DOUBLE ANGLE BRACKET (0x27ea Ps)
|
||||
case u'⟫': // MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET (0x27eb Pe)
|
||||
case u'⟬': // MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET (0x27ec Ps)
|
||||
case u'⟭': // MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET (0x27ed Pe)
|
||||
case u'⟮': // MATHEMATICAL LEFT FLATTENED PARENTHESIS (0x27ee Ps)
|
||||
case u'⟯': // MATHEMATICAL RIGHT FLATTENED PARENTHESIS (0x27ef Pe)
|
||||
case u'⦃': // LEFT WHITE CURLY BRACKET (0x2983 Ps)
|
||||
case u'⦄': // RIGHT WHITE CURLY BRACKET (0x2984 Pe)
|
||||
case u'⦅': // LEFT WHITE PARENTHESIS (0x2985 Ps)
|
||||
case u'⦆': // RIGHT WHITE PARENTHESIS (0x2986 Pe)
|
||||
case u'⦇': // Z NOTATION LEFT IMAGE BRACKET (0x2987 Ps)
|
||||
case u'⦈': // Z NOTATION RIGHT IMAGE BRACKET (0x2988 Pe)
|
||||
case u'⦉': // Z NOTATION LEFT BINDING BRACKET (0x2989 Ps)
|
||||
case u'⦊': // Z NOTATION RIGHT BINDING BRACKET (0x298a Pe)
|
||||
case u'⦋': // LEFT SQUARE BRACKET WITH UNDERBAR (0x298b Ps)
|
||||
case u'⦌': // RIGHT SQUARE BRACKET WITH UNDERBAR (0x298c Pe)
|
||||
case u'⦍': // LEFT SQUARE BRACKET WITH TICK IN TOP CORNER (0x298d Ps)
|
||||
case u'⦎': // RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER (0x298e Pe)
|
||||
case u'⦏': // LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER (0x298f Ps)
|
||||
case u'⦐': // RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER (0x2990 Pe)
|
||||
case u'⦑': // LEFT ANGLE BRACKET WITH DOT (0x2991 Ps)
|
||||
case u'⦒': // RIGHT ANGLE BRACKET WITH DOT (0x2992 Pe)
|
||||
case u'⦓': // LEFT ARC LESS-THAN BRACKET (0x2993 Ps)
|
||||
case u'⦔': // RIGHT ARC GREATER-THAN BRACKET (0x2994 Pe)
|
||||
case u'⦗': // LEFT BLACK TORTOISE SHELL BRACKET (0x2997 Ps)
|
||||
case u'⦘': // RIGHT BLACK TORTOISE SHELL BRACKET (0x2998 Pe)
|
||||
case u'⧘': // LEFT WIGGLY FENCE (0x29d8 Ps)
|
||||
case u'⧙': // RIGHT WIGGLY FENCE (0x29d9 Pe)
|
||||
case u'⧚': // LEFT DOUBLE WIGGLY FENCE (0x29da Ps)
|
||||
case u'⧛': // RIGHT DOUBLE WIGGLY FENCE (0x29db Pe)
|
||||
case u'⧼': // LEFT-POINTING CURVED ANGLE BRACKET (0x29fc Ps)
|
||||
case u'⧽': // RIGHT-POINTING CURVED ANGLE BRACKET (0x29fd Pe)
|
||||
case u'⵰': // TIFINAGH SEPARATOR MARK (0x2d70 Po)
|
||||
case u'⸎': // EDITORIAL CORONIS (0x2e0e Po)
|
||||
case u'⸏': // PARAGRAPHOS (0x2e0f Po)
|
||||
case u'⸐': // FORKED PARAGRAPHOS (0x2e10 Po)
|
||||
case u'⸑': // REVERSED FORKED PARAGRAPHOS (0x2e11 Po)
|
||||
case u'⸒': // HYPODIASTOLE (0x2e12 Po)
|
||||
case u'⸓': // DOTTED OBELOS (0x2e13 Po)
|
||||
case u'⸔': // DOWNWARDS ANCORA (0x2e14 Po)
|
||||
case u'⸕': // UPWARDS ANCORA (0x2e15 Po)
|
||||
case u'⸖': // DOTTED RIGHT-POINTING ANGLE (0x2e16 Po)
|
||||
case u'⸗': // DOUBLE OBLIQUE HYPHEN (0x2e17 Pd)
|
||||
case u'⸙': // PALM BRANCH (0x2e19 Po)
|
||||
case u'⸚': // HYPHEN WITH DIAERESIS (0x2e1a Pd)
|
||||
case u'⸛': // TILDE WITH RING ABOVE (0x2e1b Po)
|
||||
case u'⸞': // TILDE WITH DOT ABOVE (0x2e1e Po)
|
||||
case u'⸟': // TILDE WITH DOT BELOW (0x2e1f Po)
|
||||
case u'⸪': // TWO DOTS OVER ONE DOT PUNCTUATION (0x2e2a Po)
|
||||
case u'⸫': // ONE DOT OVER TWO DOTS PUNCTUATION (0x2e2b Po)
|
||||
case u'⸬': // SQUARED FOUR DOT PUNCTUATION (0x2e2c Po)
|
||||
case u'⸭': // FIVE DOT MARK (0x2e2d Po)
|
||||
case u'⸮': // REVERSED QUESTION MARK (0x2e2e Po)
|
||||
case u'⸰': // RING POINT (0x2e30 Po)
|
||||
case u'⸱': // WORD SEPARATOR MIDDLE DOT (0x2e31 Po)
|
||||
case u'⸲': // TURNED COMMA (0x2e32 Po)
|
||||
case u'⸳': // RAISED DOT (0x2e33 Po)
|
||||
case u'⸴': // RAISED COMMA (0x2e34 Po)
|
||||
case u'⸵': // TURNED SEMICOLON (0x2e35 Po)
|
||||
case u'⸶': // DAGGER WITH LEFT GUARD (0x2e36 Po)
|
||||
case u'⸷': // DAGGER WITH RIGHT GUARD (0x2e37 Po)
|
||||
case u'⸸': // TURNED DAGGER (0x2e38 Po)
|
||||
case u'⸹': // TOP HALF SECTION SIGN (0x2e39 Po)
|
||||
case u'⸺': // TWO-EM DASH (0x2e3a Pd)
|
||||
case u'⸻': // THREE-EM DASH (0x2e3b Pd)
|
||||
case u'⸼': // STENOGRAPHIC FULL STOP (0x2e3c Po)
|
||||
case u'⸽': // VERTICAL SIX DOTS (0x2e3d Po)
|
||||
case u'⸾': // WIGGLY VERTICAL LINE (0x2e3e Po)
|
||||
case u'⸿': // CAPITULUM (0x2e3f Po)
|
||||
case u'⹀': // DOUBLE HYPHEN (0x2e40 Pd)
|
||||
case u'⹁': // REVERSED COMMA (0x2e41 Po)
|
||||
case u'⹂': // DOUBLE LOW-REVERSED-9 QUOTATION MARK (0x2e42 Ps)
|
||||
case u'⹃': // DASH WITH LEFT UPTURN (0x2e43 Po)
|
||||
case u'⹄': // DOUBLE SUSPENSION MARK (0x2e44 Po)
|
||||
case u'⹅': // INVERTED LOW KAVYKA (0x2e45 Po)
|
||||
case u'⹆': // INVERTED LOW KAVYKA WITH KAVYKA ABOVE (0x2e46 Po)
|
||||
case u'⹇': // LOW KAVYKA (0x2e47 Po)
|
||||
case u'⹈': // LOW KAVYKA WITH DOT (0x2e48 Po)
|
||||
case u'⹉': // DOUBLE STACKED COMMA (0x2e49 Po)
|
||||
case u'⹊': // DOTTED SOLIDUS (0x2e4a Po)
|
||||
case u'⹋': // TRIPLE DAGGER (0x2e4b Po)
|
||||
case u'⹌': // MEDIEVAL COMMA (0x2e4c Po)
|
||||
case u'⹍': // PARAGRAPHUS MARK (0x2e4d Po)
|
||||
case u'⹎': // PUNCTUS ELEVATUS MARK (0x2e4e Po)
|
||||
case u'⹏': // CORNISH VERSE DIVIDER (0x2e4f Po)
|
||||
case u'、': // IDEOGRAPHIC COMMA (0x3001 Po)
|
||||
case u'。': // IDEOGRAPHIC FULL STOP (0x3002 Po)
|
||||
case u'〃': // DITTO MARK (0x3003 Po)
|
||||
case u'〈': // LEFT ANGLE BRACKET (0x3008 Ps)
|
||||
case u'〉': // RIGHT ANGLE BRACKET (0x3009 Pe)
|
||||
case u'《': // LEFT DOUBLE ANGLE BRACKET (0x300a Ps)
|
||||
case u'》': // RIGHT DOUBLE ANGLE BRACKET (0x300b Pe)
|
||||
case u'「': // LEFT CORNER BRACKET (0x300c Ps)
|
||||
case u'」': // RIGHT CORNER BRACKET (0x300d Pe)
|
||||
case u'『': // LEFT WHITE CORNER BRACKET (0x300e Ps)
|
||||
case u'』': // RIGHT WHITE CORNER BRACKET (0x300f Pe)
|
||||
case u'【': // LEFT BLACK LENTICULAR BRACKET (0x3010 Ps)
|
||||
case u'】': // RIGHT BLACK LENTICULAR BRACKET (0x3011 Pe)
|
||||
case u'〔': // LEFT TORTOISE SHELL BRACKET (0x3014 Ps)
|
||||
case u'〕': // RIGHT TORTOISE SHELL BRACKET (0x3015 Pe)
|
||||
case u'〖': // LEFT WHITE LENTICULAR BRACKET (0x3016 Ps)
|
||||
case u'〗': // RIGHT WHITE LENTICULAR BRACKET (0x3017 Pe)
|
||||
case u'〘': // LEFT WHITE TORTOISE SHELL BRACKET (0x3018 Ps)
|
||||
case u'〙': // RIGHT WHITE TORTOISE SHELL BRACKET (0x3019 Pe)
|
||||
case u'〚': // LEFT WHITE SQUARE BRACKET (0x301a Ps)
|
||||
case u'〛': // RIGHT WHITE SQUARE BRACKET (0x301b Pe)
|
||||
case u'〜': // WAVE DASH (0x301c Pd)
|
||||
case u'〝': // REVERSED DOUBLE PRIME QUOTATION MARK (0x301d Ps)
|
||||
case u'〞': // DOUBLE PRIME QUOTATION MARK (0x301e Pe)
|
||||
case u'〟': // LOW DOUBLE PRIME QUOTATION MARK (0x301f Pe)
|
||||
case u'〰': // WAVY DASH (0x3030 Pd)
|
||||
case u'〽': // PART ALTERNATION MARK (0x303d Po)
|
||||
case u'゠': // KATAKANA-HIRAGANA DOUBLE HYPHEN (0x30a0 Pd)
|
||||
case u'・': // KATAKANA MIDDLE DOT (0x30fb Po)
|
||||
case u'꓾': // LISU PUNCTUATION COMMA (0xa4fe Po)
|
||||
case u'꓿': // LISU PUNCTUATION FULL STOP (0xa4ff Po)
|
||||
case u'꘍': // VAI COMMA (0xa60d Po)
|
||||
case u'꘎': // VAI FULL STOP (0xa60e Po)
|
||||
case u'꘏': // VAI QUESTION MARK (0xa60f Po)
|
||||
case u'꙾': // CYRILLIC KAVYKA (0xa67e Po)
|
||||
case u'꡴': // PHAGS-PA SINGLE HEAD MARK (0xa874 Po)
|
||||
case u'꡵': // PHAGS-PA DOUBLE HEAD MARK (0xa875 Po)
|
||||
case u'꡶': // PHAGS-PA MARK SHAD (0xa876 Po)
|
||||
case u'꡷': // PHAGS-PA MARK DOUBLE SHAD (0xa877 Po)
|
||||
case u'꣎': // SAURASHTRA DANDA (0xa8ce Po)
|
||||
case u'꣏': // SAURASHTRA DOUBLE DANDA (0xa8cf Po)
|
||||
case u'꣸': // DEVANAGARI SIGN PUSHPIKA (0xa8f8 Po)
|
||||
case u'꣹': // DEVANAGARI GAP FILLER (0xa8f9 Po)
|
||||
case u'꣺': // DEVANAGARI CARET (0xa8fa Po)
|
||||
case u'꣼': // DEVANAGARI SIGN SIDDHAM (0xa8fc Po)
|
||||
case u'꧁': // JAVANESE LEFT RERENGGAN (0xa9c1 Po)
|
||||
case u'꧂': // JAVANESE RIGHT RERENGGAN (0xa9c2 Po)
|
||||
case u'꧃': // JAVANESE PADA ANDAP (0xa9c3 Po)
|
||||
case u'꧄': // JAVANESE PADA MADYA (0xa9c4 Po)
|
||||
case u'꧅': // JAVANESE PADA LUHUR (0xa9c5 Po)
|
||||
case u'꧆': // JAVANESE PADA WINDU (0xa9c6 Po)
|
||||
case u'꧇': // JAVANESE PADA PANGKAT (0xa9c7 Po)
|
||||
case u'꧈': // JAVANESE PADA LINGSA (0xa9c8 Po)
|
||||
case u'꧉': // JAVANESE PADA LUNGSI (0xa9c9 Po)
|
||||
case u'꧊': // JAVANESE PADA ADEG (0xa9ca Po)
|
||||
case u'꧋': // JAVANESE PADA ADEG ADEG (0xa9cb Po)
|
||||
case u'꧌': // JAVANESE PADA PISELEH (0xa9cc Po)
|
||||
case u'꧍': // JAVANESE TURNED PADA PISELEH (0xa9cd Po)
|
||||
case u'꧞': // JAVANESE PADA TIRTA TUMETES (0xa9de Po)
|
||||
case u'꧟': // JAVANESE PADA ISEN-ISEN (0xa9df Po)
|
||||
case u'꩜': // CHAM PUNCTUATION SPIRAL (0xaa5c Po)
|
||||
case u'꩝': // CHAM PUNCTUATION DANDA (0xaa5d Po)
|
||||
case u'꩞': // CHAM PUNCTUATION DOUBLE DANDA (0xaa5e Po)
|
||||
case u'꩟': // CHAM PUNCTUATION TRIPLE DANDA (0xaa5f Po)
|
||||
case u'꫞': // TAI VIET SYMBOL HO HOI (0xaade Po)
|
||||
case u'꫟': // TAI VIET SYMBOL KOI KOI (0xaadf Po)
|
||||
case u'꫰': // MEETEI MAYEK CHEIKHAN (0xaaf0 Po)
|
||||
case u'꫱': // MEETEI MAYEK AHANG KHUDAM (0xaaf1 Po)
|
||||
case u'꯫': // MEETEI MAYEK CHEIKHEI (0xabeb Po)
|
||||
case u'︐': // PRESENTATION FORM FOR VERTICAL COMMA (0xfe10 Po)
|
||||
case u'︑': // PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC COMMA (0xfe11 Po)
|
||||
case u'︒': // PRESENTATION FORM FOR VERTICAL IDEO FULL STOP (0xfe12 Po)
|
||||
case u'︓': // PRESENTATION FORM FOR VERTICAL COLON (0xfe13 Po)
|
||||
case u'︔': // PRESENTATION FORM FOR VERTICAL SEMICOLON (0xfe14 Po)
|
||||
case u'︕': // PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (0xfe15 Po)
|
||||
case u'︖': // PRESENTATION FORM FOR VERTICAL QUESTION MARK (0xfe16 Po)
|
||||
case u'︗': // PRESENTATION ... LEFT WHITE LENTICULAR BRACKET (0xfe17 Ps)
|
||||
case u'︘': // PRESENTATION ... RIGHT WHITE LENTICULAR BRAKCET (0xfe18 Pe)
|
||||
case u'︙': // PRESENTATION ... VERTICAL HORIZONTAL ELLIPSIS (0xfe19 Po)
|
||||
case u'︰': // PRESENTATION FORM FOR VERTICAL TWO DOT LEADER (0xfe30 Po)
|
||||
case u'︱': // PRESENTATION FORM FOR VERTICAL EM DASH (0xfe31 Pd)
|
||||
case u'︲': // PRESENTATION FORM FOR VERTICAL EN DASH (0xfe32 Pd)
|
||||
case u'︳': // PRESENTATION FORM FOR VERTICAL LOW LINE (0xfe33 Pc)
|
||||
case u'︴': // PRESENTATION FORM FOR VERTICAL WAVY LOW LINE (0xfe34 Pc)
|
||||
case u'︵': // PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS (0xfe35 Ps)
|
||||
case u'︶': // PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS (0xfe36 Pe)
|
||||
case u'︷': // PRESENTATION ... VERTICAL LEFT CURLY BRACKET (0xfe37 Ps)
|
||||
case u'︸': // PRESENTATION ... VERTICAL RIGHT CURLY BRACKET (0xfe38 Pe)
|
||||
case u'︹': // PRESENTATION ... LEFT TORTOISE SHELL BRACKET (0xfe39 Ps)
|
||||
case u'︺': // PRESENTATION ... RIGHT TORTOISE SHELL BRACKET (0xfe3a Pe)
|
||||
case u'︻': // PRESENTATION ... LEFT BLACK LENTICULAR BRACKET (0xfe3b Ps)
|
||||
case u'︼': // PRESENTATION ... RIGHT BLACK LENTICULAR BRACKET (0xfe3c Pe)
|
||||
case u'︽': // PRESENTATION ... LEFT DOUBLE ANGLE BRACKET (0xfe3d Ps)
|
||||
case u'︾': // PRESENTATION ... RIGHT DOUBLE ANGLE BRACKET (0xfe3e Pe)
|
||||
case u'︿': // PRESENTATION ... LEFT ANGLE BRACKET (0xfe3f Ps)
|
||||
case u'﹀': // PRESENTATION ... RIGHT ANGLE BRACKET (0xfe40 Pe)
|
||||
case u'﹁': // PRESENTATION ... LEFT CORNER BRACKET (0xfe41 Ps)
|
||||
case u'﹂': // PRESENTATION ... RIGHT CORNER BRACKET (0xfe42 Pe)
|
||||
case u'﹃': // PRESENTATION ... LEFT WHITE CORNER BRACKET (0xfe43 Ps)
|
||||
case u'﹄': // PRESENTATION ... RIGHT WHITE CORNER BRACKET Pe)
|
||||
case u'﹅': // SESAME DOT (0xfe45 Po)
|
||||
case u'﹆': // WHITE SESAME DOT (0xfe46 Po)
|
||||
case u'﹇': // PRESENTATION ... VERTICAL LEFT SQUARE BRACKET (0xfe47 Ps)
|
||||
case u'﹈': // PRESENTATION ... VERTICAL RIGHT SQUARE BRACKET (0xfe48 Pe)
|
||||
case u'﹉': // DASHED OVERLINE (0xfe49 Po)
|
||||
case u'﹊': // CENTRELINE OVERLINE (0xfe4a Po)
|
||||
case u'﹋': // WAVY OVERLINE (0xfe4b Po)
|
||||
case u'﹌': // DOUBLE WAVY OVERLINE (0xfe4c Po)
|
||||
case u'﹍': // DASHED LOW LINE (0xfe4d Pc)
|
||||
case u'﹎': // CENTRELINE LOW LINE (0xfe4e Pc)
|
||||
case u'﹏': // WAVY LOW LINE (0xfe4f Pc)
|
||||
case u'﹐': // SMALL COMMA (0xfe50 Po)
|
||||
case u'﹑': // SMALL IDEOGRAPHIC COMMA (0xfe51 Po)
|
||||
case u'﹒': // SMALL FULL STOP (0xfe52 Po)
|
||||
case u'﹔': // SMALL SEMICOLON (0xfe54 Po)
|
||||
case u'﹕': // SMALL COLON (0xfe55 Po)
|
||||
case u'﹖': // SMALL QUESTION MARK (0xfe56 Po)
|
||||
case u'﹗': // SMALL EXCLAMATION MARK (0xfe57 Po)
|
||||
case u'﹘': // SMALL EM DASH (0xfe58 Pd)
|
||||
case u'﹙': // SMALL LEFT PARENTHESIS (0xfe59 Ps)
|
||||
case u'﹚': // SMALL RIGHT PARENTHESIS (0xfe5a Pe)
|
||||
case u'﹛': // SMALL LEFT CURLY BRACKET (0xfe5b Ps)
|
||||
case u'﹜': // SMALL RIGHT CURLY BRACKET (0xfe5c Pe)
|
||||
case u'﹝': // SMALL LEFT TORTOISE SHELL BRACKET (0xfe5d Ps)
|
||||
case u'﹞': // SMALL RIGHT TORTOISE SHELL BRACKET (0xfe5e Pe)
|
||||
case u'﹟': // SMALL NUMBER SIGN (0xfe5f Po)
|
||||
case u'﹠': // SMALL AMPERSAND (0xfe60 Po)
|
||||
case u'﹡': // SMALL ASTERISK (0xfe61 Po)
|
||||
case u'﹣': // SMALL HYPHEN-MINUS (0xfe63 Pd)
|
||||
case u'﹨': // SMALL REVERSE SOLIDUS (0xfe68 Po)
|
||||
case u'﹪': // SMALL PERCENT SIGN (0xfe6a Po)
|
||||
case u'﹫': // SMALL COMMERCIAL AT (0xfe6b Po)
|
||||
case u'!': // FULLWIDTH EXCLAMATION MARK (0xff01 Po)
|
||||
case u'"': // FULLWIDTH QUOTATION MARK (0xff02 Po)
|
||||
case u'#': // FULLWIDTH NUMBER SIGN (0xff03 Po)
|
||||
case u'%': // FULLWIDTH PERCENT SIGN (0xff05 Po)
|
||||
case u'&': // FULLWIDTH AMPERSAND (0xff06 Po)
|
||||
case u''': // FULLWIDTH APOSTROPHE (0xff07 Po)
|
||||
case u'(': // FULLWIDTH LEFT PARENTHESIS (0xff08 Ps)
|
||||
case u')': // FULLWIDTH RIGHT PARENTHESIS (0xff09 Pe)
|
||||
case u'*': // FULLWIDTH ASTERISK (0xff0a Po)
|
||||
case u',': // FULLWIDTH COMMA (0xff0c Po)
|
||||
case u'-': // FULLWIDTH HYPHEN-MINUS (0xff0d Pd)
|
||||
case u'.': // FULLWIDTH FULL STOP (0xff0e Po)
|
||||
case u'/': // FULLWIDTH SOLIDUS (0xff0f Po)
|
||||
case u':': // FULLWIDTH COLON (0xff1a Po)
|
||||
case u';': // FULLWIDTH SEMICOLON (0xff1b Po)
|
||||
case u'?': // FULLWIDTH QUESTION MARK (0xff1f Po)
|
||||
case u'@': // FULLWIDTH COMMERCIAL AT (0xff20 Po)
|
||||
case u'[': // FULLWIDTH LEFT SQUARE BRACKET (0xff3b Ps)
|
||||
case u'\': // FULLWIDTH REVERSE SOLIDUS (0xff3c Po)
|
||||
case u']': // FULLWIDTH RIGHT SQUARE BRACKET (0xff3d Pe)
|
||||
case u'_': // FULLWIDTH LOW LINE (0xff3f Pc)
|
||||
case u'{': // FULLWIDTH LEFT CURLY BRACKET (0xff5b Ps)
|
||||
case u'}': // FULLWIDTH RIGHT CURLY BRACKET (0xff5d Pe)
|
||||
case u'⦅': // FULLWIDTH LEFT WHITE PARENTHESIS (0xff5f Ps)
|
||||
case u'⦆': // FULLWIDTH RIGHT WHITE PARENTHESIS (0xff60 Pe)
|
||||
case u'。': // HALFWIDTH IDEOGRAPHIC FULL STOP (0xff61 Po)
|
||||
case u'「': // HALFWIDTH LEFT CORNER BRACKET (0xff62 Ps)
|
||||
case u'」': // HALFWIDTH RIGHT CORNER BRACKET (0xff63 Pe)
|
||||
case u'、': // HALFWIDTH IDEOGRAPHIC COMMA (0xff64 Po)
|
||||
case u'・': // HALFWIDTH KATAKANA MIDDLE DOT (0xff65 Po)
|
||||
return 1;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,6 +18,39 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/str/str.h"
|
||||
|
||||
int iswspace(wint_t wc) {
|
||||
return isspace(wc);
|
||||
/**
|
||||
* Returns nonzero if c is space character.
|
||||
*
|
||||
* We define this as invisible characters which move the cursor. That
|
||||
* means `\t\r\n\f\v` and unicodes whose category begins with `Z` but
|
||||
* not ogham since it's not invisible and non-breaking spaces neither
|
||||
* since they're not invisible to emacs users.
|
||||
*/
|
||||
int iswspace(wint_t c) {
|
||||
switch (c) {
|
||||
case '\t': // CHARACTER TABULATION
|
||||
case '\n': // LINE FEED
|
||||
case '\f': // FORM FEED
|
||||
case '\v': // LINE TABULATION
|
||||
case '\r': // CARRIAGE RETURN
|
||||
case ' ': // SPACE
|
||||
case 0x2000: // EN QUAD (Zs)
|
||||
case 0x2001: // EM QUAD (Zs)
|
||||
case 0x2002: // EN SPACE (Zs)
|
||||
case 0x2003: // EM SPACE (Zs)
|
||||
case 0x2004: // THREE-PER-EM SPACE (Zs)
|
||||
case 0x2005: // FOUR-PER-EM SPACE (Zs)
|
||||
case 0x2006: // SIX-PER-EM SPACE (Zs)
|
||||
case 0x2007: // FIGURE SPACE (Zs)
|
||||
case 0x2008: // PUNCTUATION SPACE (Zs)
|
||||
case 0x2009: // THIN SPACE (Zs)
|
||||
case 0x200a: // HAIR SPACE (Zs)
|
||||
case 0x2028: // LINE SEPARATOR (Zl)
|
||||
case 0x2029: // PARAGRAPH SEPARATOR (Zp)
|
||||
case 0x205f: // MEDIUM MATHEMATICAL SPACE (Zs)
|
||||
case 0x3000: // IDEOGRAPHIC SPACE (Zs)
|
||||
return 1;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
|
1774
libc/str/iswupper.c
1774
libc/str/iswupper.c
File diff suppressed because it is too large
Load diff
|
@ -18,6 +18,10 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/str/str.h"
|
||||
|
||||
int iswxdigit(wint_t wc) {
|
||||
return isxdigit(wc);
|
||||
/**
|
||||
* Returns nonzero if c is ascii hex digit.
|
||||
*/
|
||||
int iswxdigit(wint_t c) {
|
||||
return ('0' <= c && c <= '9') || ('A' <= c && c <= 'F') ||
|
||||
('a' <= c && c <= 'f');
|
||||
}
|
||||
|
|
|
@ -16,6 +16,9 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/assert.h"
|
||||
#include "libc/calls/calls.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
|
|
|
@ -62,6 +62,12 @@ o/$(MODE)/libc/str/getziplfileuncompressedsize.o: \
|
|||
OVERRIDE_CFLAGS += \
|
||||
-Os
|
||||
|
||||
o/$(MODE)/libc/str/iswpunct.o \
|
||||
o/$(MODE)/libc/str/iswupper.o \
|
||||
o/$(MODE)/libc/str/iswlower.o: \
|
||||
OVERRIDE_CFLAGS += \
|
||||
-fno-jump-tables
|
||||
|
||||
LIBC_STR_LIBS = $(foreach x,$(LIBC_STR_ARTIFACTS),$($(x)))
|
||||
LIBC_STR_SRCS = $(foreach x,$(LIBC_STR_ARTIFACTS),$($(x)_SRCS))
|
||||
LIBC_STR_HDRS = $(foreach x,$(LIBC_STR_ARTIFACTS),$($(x)_HDRS))
|
||||
|
|
|
@ -26,4 +26,6 @@
|
|||
* @return address of last c in s, or NULL if not found
|
||||
* @asyncsignalsafe
|
||||
*/
|
||||
char *(strrchr)(const char *s, int c) { return memrchr(s, c, strlen(s)); }
|
||||
char *strrchr(const char *s, int c) {
|
||||
return memrchr(s, c, strlen(s));
|
||||
}
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Converts character to lower case.
|
||||
* Converts character to ascii lower case.
|
||||
*/
|
||||
int tolower(int c) {
|
||||
return 'A' <= c && c <= 'Z' ? c + ('a' - 'A') : c;
|
||||
|
|
1644
libc/str/towlower.c
1644
libc/str/towlower.c
File diff suppressed because it is too large
Load diff
1645
libc/str/towupper.c
1645
libc/str/towupper.c
File diff suppressed because it is too large
Load diff
59
libc/str/wctype.c
Normal file
59
libc/str/wctype.c
Normal file
|
@ -0,0 +1,59 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/bits.h"
|
||||
#include "libc/macros.internal.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
static const char kWcTypeNames[][8] = {
|
||||
"alnum", //
|
||||
"alpha", //
|
||||
"blank", //
|
||||
"cntrl", //
|
||||
"digit", //
|
||||
"graph", //
|
||||
"lower", //
|
||||
"print", //
|
||||
"punct", //
|
||||
"space", //
|
||||
"upper", //
|
||||
"xdigit", //
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns number representing character class name.
|
||||
*
|
||||
* @param s can be "alnum", "alpha", "blank", "cntrl", "digit", "graph",
|
||||
* "lower", "print", "punct", "space", "upper", "xdigit"
|
||||
* @return nonzero id or 0 if not found
|
||||
*/
|
||||
wctype_t wctype(const char *s) {
|
||||
int i;
|
||||
char b[8];
|
||||
for (i = 0; i < 8; ++i) {
|
||||
b[i] = *s ? *s++ : 0;
|
||||
}
|
||||
if (!*s) {
|
||||
for (i = 0; i < ARRAYLEN(kWcTypeNames); ++i) {
|
||||
if (READ64LE(b) == READ64LE(kWcTypeNames[i])) {
|
||||
return i + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue