Fold LIBC_UNICODE into LIBC_STR

This commit is contained in:
Justine Tunney 2022-08-13 08:42:32 -07:00
parent 17aea99bb3
commit 367d06d9e4
173 changed files with 170 additions and 355 deletions

281
libc/str/SpecialCasing.txt Normal file
View file

@ -0,0 +1,281 @@
# SpecialCasing-14.0.0.txt
# Date: 2021-03-08, 19:35:55 GMT
# © 2021 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# For documentation, see http://www.unicode.org/reports/tr44/
#
# Special Casing
#
# This file is a supplement to the UnicodeData.txt file. It does not define any
# properties, but rather provides additional information about the casing of
# Unicode characters, for situations when casing incurs a change in string length
# or is dependent on context or locale. For compatibility, the UnicodeData.txt
# file only contains simple case mappings for characters where they are one-to-one
# and independent of context and language. The data in this file, combined with
# the simple case mappings in UnicodeData.txt, defines the full case mappings
# Lowercase_Mapping (lc), Titlecase_Mapping (tc), and Uppercase_Mapping (uc).
#
# Note that the preferred mechanism for defining tailored casing operations is
# the Unicode Common Locale Data Repository (CLDR). For more information, see the
# discussion of case mappings and case algorithms in the Unicode Standard.
#
# All code points not listed in this file that do not have a simple case mappings
# in UnicodeData.txt map to themselves.
# ================================================================================
# Format
# ================================================================================
# The entries in this file are in the following machine-readable format:
#
# <code>; <lower>; <title>; <upper>; (<condition_list>;)? # <comment>
#
# <code>, <lower>, <title>, and <upper> provide the respective full case mappings
# of <code>, expressed as character values in hex. If there is more than one character,
# they are separated by spaces. Other than as used to separate elements, spaces are
# to be ignored.
#
# The <condition_list> is optional. Where present, it consists of one or more language IDs
# or casing contexts, separated by spaces. In these conditions:
# - A condition list overrides the normal behavior if all of the listed conditions are true.
# - The casing context is always the context of the characters in the original string,
# NOT in the resulting string.
# - Case distinctions in the condition list are not significant.
# - Conditions preceded by "Not_" represent the negation of the condition.
# The condition list is not represented in the UCD as a formal property.
#
# A language ID is defined by BCP 47, with '-' and '_' treated equivalently.
#
# A casing context for a character is defined by Section 3.13 Default Case Algorithms
# of The Unicode Standard.
#
# Parsers of this file must be prepared to deal with future additions to this format:
# * Additional contexts
# * Additional fields
# ================================================================================
# ================================================================================
# Unconditional mappings
# ================================================================================
# The German es-zed is special--the normal mapping is to SS.
# Note: the titlecase should never occur in practice. It is equal to titlecase(uppercase(<es-zed>))
00DF; 00DF; 0053 0073; 0053 0053; # LATIN SMALL LETTER SHARP S
# Preserve canonical equivalence for I with dot. Turkic is handled below.
0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE
# Ligatures
FB00; FB00; 0046 0066; 0046 0046; # LATIN SMALL LIGATURE FF
FB01; FB01; 0046 0069; 0046 0049; # LATIN SMALL LIGATURE FI
FB02; FB02; 0046 006C; 0046 004C; # LATIN SMALL LIGATURE FL
FB03; FB03; 0046 0066 0069; 0046 0046 0049; # LATIN SMALL LIGATURE FFI
FB04; FB04; 0046 0066 006C; 0046 0046 004C; # LATIN SMALL LIGATURE FFL
FB05; FB05; 0053 0074; 0053 0054; # LATIN SMALL LIGATURE LONG S T
FB06; FB06; 0053 0074; 0053 0054; # LATIN SMALL LIGATURE ST
0587; 0587; 0535 0582; 0535 0552; # ARMENIAN SMALL LIGATURE ECH YIWN
FB13; FB13; 0544 0576; 0544 0546; # ARMENIAN SMALL LIGATURE MEN NOW
FB14; FB14; 0544 0565; 0544 0535; # ARMENIAN SMALL LIGATURE MEN ECH
FB15; FB15; 0544 056B; 0544 053B; # ARMENIAN SMALL LIGATURE MEN INI
FB16; FB16; 054E 0576; 054E 0546; # ARMENIAN SMALL LIGATURE VEW NOW
FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH
# No corresponding uppercase precomposed character
0149; 0149; 02BC 004E; 02BC 004E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
0390; 0390; 0399 0308 0301; 0399 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
03B0; 03B0; 03A5 0308 0301; 03A5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
01F0; 01F0; 004A 030C; 004A 030C; # LATIN SMALL LETTER J WITH CARON
1E96; 1E96; 0048 0331; 0048 0331; # LATIN SMALL LETTER H WITH LINE BELOW
1E97; 1E97; 0054 0308; 0054 0308; # LATIN SMALL LETTER T WITH DIAERESIS
1E98; 1E98; 0057 030A; 0057 030A; # LATIN SMALL LETTER W WITH RING ABOVE
1E99; 1E99; 0059 030A; 0059 030A; # LATIN SMALL LETTER Y WITH RING ABOVE
1E9A; 1E9A; 0041 02BE; 0041 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING
1F50; 1F50; 03A5 0313; 03A5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI
1F52; 1F52; 03A5 0313 0300; 03A5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA
1F54; 1F54; 03A5 0313 0301; 03A5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA
1F56; 1F56; 03A5 0313 0342; 03A5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI
1FB6; 1FB6; 0391 0342; 0391 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI
1FC6; 1FC6; 0397 0342; 0397 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI
1FD2; 1FD2; 0399 0308 0300; 0399 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
1FD3; 1FD3; 0399 0308 0301; 0399 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
1FD6; 1FD6; 0399 0342; 0399 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI
1FD7; 1FD7; 0399 0308 0342; 0399 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
1FE2; 1FE2; 03A5 0308 0300; 03A5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
1FE3; 1FE3; 03A5 0308 0301; 03A5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
1FE4; 1FE4; 03A1 0313; 03A1 0313; # GREEK SMALL LETTER RHO WITH PSILI
1FE6; 1FE6; 03A5 0342; 03A5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI
1FE7; 1FE7; 03A5 0308 0342; 03A5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
1FF6; 1FF6; 03A9 0342; 03A9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI
# IMPORTANT-when iota-subscript (0345) is uppercased or titlecased,
# the result will be incorrect unless the iota-subscript is moved to the end
# of any sequence of combining marks. Otherwise, the accents will go on the capital iota.
# This process can be achieved by first transforming the text to NFC before casing.
# E.g. <alpha><iota_subscript><acute> is uppercased to <ALPHA><acute><IOTA>
# The following cases are already in the UnicodeData.txt file, so are only commented here.
# 0345; 0345; 0399; 0399; # COMBINING GREEK YPOGEGRAMMENI
# All letters with YPOGEGRAMMENI (iota-subscript) or PROSGEGRAMMENI (iota adscript)
# have special uppercases.
# Note: characters with PROSGEGRAMMENI are actually titlecase, not uppercase!
1F80; 1F80; 1F88; 1F08 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
1F81; 1F81; 1F89; 1F09 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI
1F82; 1F82; 1F8A; 1F0A 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI
1F83; 1F83; 1F8B; 1F0B 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI
1F84; 1F84; 1F8C; 1F0C 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI
1F85; 1F85; 1F8D; 1F0D 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI
1F86; 1F86; 1F8E; 1F0E 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
1F87; 1F87; 1F8F; 1F0F 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
1F88; 1F80; 1F88; 1F08 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
1F89; 1F81; 1F89; 1F09 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
1F8A; 1F82; 1F8A; 1F0A 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
1F8B; 1F83; 1F8B; 1F0B 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
1F8C; 1F84; 1F8C; 1F0C 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
1F8D; 1F85; 1F8D; 1F0D 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
1F8E; 1F86; 1F8E; 1F0E 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
1F8F; 1F87; 1F8F; 1F0F 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
1F90; 1F90; 1F98; 1F28 0399; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI
1F91; 1F91; 1F99; 1F29 0399; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI
1F92; 1F92; 1F9A; 1F2A 0399; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI
1F93; 1F93; 1F9B; 1F2B 0399; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI
1F94; 1F94; 1F9C; 1F2C 0399; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI
1F95; 1F95; 1F9D; 1F2D 0399; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI
1F96; 1F96; 1F9E; 1F2E 0399; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
1F97; 1F97; 1F9F; 1F2F 0399; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
1F98; 1F90; 1F98; 1F28 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
1F99; 1F91; 1F99; 1F29 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
1F9A; 1F92; 1F9A; 1F2A 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
1F9B; 1F93; 1F9B; 1F2B 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
1F9C; 1F94; 1F9C; 1F2C 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
1F9D; 1F95; 1F9D; 1F2D 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
1F9E; 1F96; 1F9E; 1F2E 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
1F9F; 1F97; 1F9F; 1F2F 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
1FA0; 1FA0; 1FA8; 1F68 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI
1FA1; 1FA1; 1FA9; 1F69 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI
1FA2; 1FA2; 1FAA; 1F6A 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
1FA3; 1FA3; 1FAB; 1F6B 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI
1FA4; 1FA4; 1FAC; 1F6C 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI
1FA5; 1FA5; 1FAD; 1F6D 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI
1FA6; 1FA6; 1FAE; 1F6E 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
1FA7; 1FA7; 1FAF; 1F6F 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
1FA8; 1FA0; 1FA8; 1F68 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
1FA9; 1FA1; 1FA9; 1F69 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
1FAA; 1FA2; 1FAA; 1F6A 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
1FAB; 1FA3; 1FAB; 1F6B 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
1FAC; 1FA4; 1FAC; 1F6C 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
1FAD; 1FA5; 1FAD; 1F6D 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
1FAE; 1FA6; 1FAE; 1F6E 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
1FAF; 1FA7; 1FAF; 1F6F 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
1FB3; 1FB3; 1FBC; 0391 0399; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
1FBC; 1FB3; 1FBC; 0391 0399; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
1FC3; 1FC3; 1FCC; 0397 0399; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI
1FCC; 1FC3; 1FCC; 0397 0399; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
1FF3; 1FF3; 1FFC; 03A9 0399; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI
1FFC; 1FF3; 1FFC; 03A9 0399; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
# Some characters with YPOGEGRAMMENI also have no corresponding titlecases
1FB2; 1FB2; 1FBA 0345; 1FBA 0399; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
1FB4; 1FB4; 0386 0345; 0386 0399; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
1FC2; 1FC2; 1FCA 0345; 1FCA 0399; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI
1FC4; 1FC4; 0389 0345; 0389 0399; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
1FF2; 1FF2; 1FFA 0345; 1FFA 0399; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
1FF4; 1FF4; 038F 0345; 038F 0399; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
1FB7; 1FB7; 0391 0342 0345; 0391 0342 0399; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
1FC7; 1FC7; 0397 0342 0345; 0397 0342 0399; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
1FF7; 1FF7; 03A9 0342 0345; 03A9 0342 0399; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
# ================================================================================
# Conditional Mappings
# The remainder of this file provides conditional casing data used to produce
# full case mappings.
# ================================================================================
# Language-Insensitive Mappings
# These are characters whose full case mappings do not depend on language, but do
# depend on context (which characters come before or after). For more information
# see the header of this file and the Unicode Standard.
# ================================================================================
# Special case for final form of sigma
03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
# Note: the following cases for non-final are already in the UnicodeData.txt file.
# 03A3; 03C3; 03A3; 03A3; # GREEK CAPITAL LETTER SIGMA
# 03C3; 03C3; 03A3; 03A3; # GREEK SMALL LETTER SIGMA
# 03C2; 03C2; 03A3; 03A3; # GREEK SMALL LETTER FINAL SIGMA
# Note: the following cases are not included, since they would case-fold in lowercasing
# 03C3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK SMALL LETTER SIGMA
# 03C2; 03C3; 03A3; 03A3; Not_Final_Sigma; # GREEK SMALL LETTER FINAL SIGMA
# ================================================================================
# Language-Sensitive Mappings
# These are characters whose full case mappings depend on language and perhaps also
# context (which characters come before or after). For more information
# see the header of this file and the Unicode Standard.
# ================================================================================
# Lithuanian
# Lithuanian retains the dot in a lowercase i when followed by accents.
# Remove DOT ABOVE after "i" with upper or titlecase
0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
# Introduce an explicit dot above when lowercasing capital I's and J's
# whenever there are more accents above.
# (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
# ================================================================================
# Turkish and Azeri
# I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
# The following rules handle those cases.
0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE
0130; 0069; 0130; 0130; az; # LATIN CAPITAL LETTER I WITH DOT ABOVE
# When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
# This matches the behavior of the canonically equivalent I-dot_above
0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
# When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I
0049; 0131; 0049; 0049; az Not_Before_Dot; # LATIN CAPITAL LETTER I
# When uppercasing, i turns into a dotted capital I
0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I
0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I
# Note: the following case is already in the UnicodeData.txt file.
# 0131; 0131; 0049; 0049; tr; # LATIN SMALL LETTER DOTLESS I
# EOF

356
libc/str/blocks.txt Normal file
View file

@ -0,0 +1,356 @@
# Blocks-14.0.0.txt
# Date: 2021-01-22, 23:29:00 GMT [KW]
# © 2021 Unicode®, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# For documentation, see http://www.unicode.org/reports/tr44/
#
# Format:
# Start Code..End Code; Block Name
# ================================================
# Note: When comparing block names, casing, whitespace, hyphens,
# and underbars are ignored.
# For example, "Latin Extended-A" and "latin extended a" are equivalent.
# For more information on the comparison of property values,
# see UAX #44: http://www.unicode.org/reports/tr44/
#
# All block ranges start with a value where (cp MOD 16) = 0,
# and end with a value where (cp MOD 16) = 15. In other words,
# the last hexadecimal digit of the start of range is ...0
# and the last hexadecimal digit of the end of range is ...F.
# This constraint on block ranges guarantees that allocations
# are done in terms of whole columns, and that code chart display
# never involves splitting columns in the charts.
#
# All code points not explicitly listed for Block
# have the value No_Block.
# Property: Block
#
# @missing: 0000..10FFFF; No_Block
0000..007F; Basic Latin
0080..00FF; Latin-1 Supplement
0100..017F; Latin Extended-A
0180..024F; Latin Extended-B
0250..02AF; IPA Extensions
02B0..02FF; Spacing Modifier Letters
0300..036F; Combining Diacritical Marks
0370..03FF; Greek and Coptic
0400..04FF; Cyrillic
0500..052F; Cyrillic Supplement
0530..058F; Armenian
0590..05FF; Hebrew
0600..06FF; Arabic
0700..074F; Syriac
0750..077F; Arabic Supplement
0780..07BF; Thaana
07C0..07FF; NKo
0800..083F; Samaritan
0840..085F; Mandaic
0860..086F; Syriac Supplement
0870..089F; Arabic Extended-B
08A0..08FF; Arabic Extended-A
0900..097F; Devanagari
0980..09FF; Bengali
0A00..0A7F; Gurmukhi
0A80..0AFF; Gujarati
0B00..0B7F; Oriya
0B80..0BFF; Tamil
0C00..0C7F; Telugu
0C80..0CFF; Kannada
0D00..0D7F; Malayalam
0D80..0DFF; Sinhala
0E00..0E7F; Thai
0E80..0EFF; Lao
0F00..0FFF; Tibetan
1000..109F; Myanmar
10A0..10FF; Georgian
1100..11FF; Hangul Jamo
1200..137F; Ethiopic
1380..139F; Ethiopic Supplement
13A0..13FF; Cherokee
1400..167F; Unified Canadian Aboriginal Syllabics
1680..169F; Ogham
16A0..16FF; Runic
1700..171F; Tagalog
1720..173F; Hanunoo
1740..175F; Buhid
1760..177F; Tagbanwa
1780..17FF; Khmer
1800..18AF; Mongolian
18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
1900..194F; Limbu
1950..197F; Tai Le
1980..19DF; New Tai Lue
19E0..19FF; Khmer Symbols
1A00..1A1F; Buginese
1A20..1AAF; Tai Tham
1AB0..1AFF; Combining Diacritical Marks Extended
1B00..1B7F; Balinese
1B80..1BBF; Sundanese
1BC0..1BFF; Batak
1C00..1C4F; Lepcha
1C50..1C7F; Ol Chiki
1C80..1C8F; Cyrillic Extended-C
1C90..1CBF; Georgian Extended
1CC0..1CCF; Sundanese Supplement
1CD0..1CFF; Vedic Extensions
1D00..1D7F; Phonetic Extensions
1D80..1DBF; Phonetic Extensions Supplement
1DC0..1DFF; Combining Diacritical Marks Supplement
1E00..1EFF; Latin Extended Additional
1F00..1FFF; Greek Extended
2000..206F; General Punctuation
2070..209F; Superscripts and Subscripts
20A0..20CF; Currency Symbols
20D0..20FF; Combining Diacritical Marks for Symbols
2100..214F; Letterlike Symbols
2150..218F; Number Forms
2190..21FF; Arrows
2200..22FF; Mathematical Operators
2300..23FF; Miscellaneous Technical
2400..243F; Control Pictures
2440..245F; Optical Character Recognition
2460..24FF; Enclosed Alphanumerics
2500..257F; Box Drawing
2580..259F; Block Elements
25A0..25FF; Geometric Shapes
2600..26FF; Miscellaneous Symbols
2700..27BF; Dingbats
27C0..27EF; Miscellaneous Mathematical Symbols-A
27F0..27FF; Supplemental Arrows-A
2800..28FF; Braille Patterns
2900..297F; Supplemental Arrows-B
2980..29FF; Miscellaneous Mathematical Symbols-B
2A00..2AFF; Supplemental Mathematical Operators
2B00..2BFF; Miscellaneous Symbols and Arrows
2C00..2C5F; Glagolitic
2C60..2C7F; Latin Extended-C
2C80..2CFF; Coptic
2D00..2D2F; Georgian Supplement
2D30..2D7F; Tifinagh
2D80..2DDF; Ethiopic Extended
2DE0..2DFF; Cyrillic Extended-A
2E00..2E7F; Supplemental Punctuation
2E80..2EFF; CJK Radicals Supplement
2F00..2FDF; Kangxi Radicals
2FF0..2FFF; Ideographic Description Characters
3000..303F; CJK Symbols and Punctuation
3040..309F; Hiragana
30A0..30FF; Katakana
3100..312F; Bopomofo
3130..318F; Hangul Compatibility Jamo
3190..319F; Kanbun
31A0..31BF; Bopomofo Extended
31C0..31EF; CJK Strokes
31F0..31FF; Katakana Phonetic Extensions
3200..32FF; Enclosed CJK Letters and Months
3300..33FF; CJK Compatibility
3400..4DBF; CJK Unified Ideographs Extension A
4DC0..4DFF; Yijing Hexagram Symbols
4E00..9FFF; CJK Unified Ideographs
A000..A48F; Yi Syllables
A490..A4CF; Yi Radicals
A4D0..A4FF; Lisu
A500..A63F; Vai
A640..A69F; Cyrillic Extended-B
A6A0..A6FF; Bamum
A700..A71F; Modifier Tone Letters
A720..A7FF; Latin Extended-D
A800..A82F; Syloti Nagri
A830..A83F; Common Indic Number Forms
A840..A87F; Phags-pa
A880..A8DF; Saurashtra
A8E0..A8FF; Devanagari Extended
A900..A92F; Kayah Li
A930..A95F; Rejang
A960..A97F; Hangul Jamo Extended-A
A980..A9DF; Javanese
A9E0..A9FF; Myanmar Extended-B
AA00..AA5F; Cham
AA60..AA7F; Myanmar Extended-A
AA80..AADF; Tai Viet
AAE0..AAFF; Meetei Mayek Extensions
AB00..AB2F; Ethiopic Extended-A
AB30..AB6F; Latin Extended-E
AB70..ABBF; Cherokee Supplement
ABC0..ABFF; Meetei Mayek
AC00..D7AF; Hangul Syllables
D7B0..D7FF; Hangul Jamo Extended-B
D800..DB7F; High Surrogates
DB80..DBFF; High Private Use Surrogates
DC00..DFFF; Low Surrogates
E000..F8FF; Private Use Area
F900..FAFF; CJK Compatibility Ideographs
FB00..FB4F; Alphabetic Presentation Forms
FB50..FDFF; Arabic Presentation Forms-A
FE00..FE0F; Variation Selectors
FE10..FE1F; Vertical Forms
FE20..FE2F; Combining Half Marks
FE30..FE4F; CJK Compatibility Forms
FE50..FE6F; Small Form Variants
FE70..FEFF; Arabic Presentation Forms-B
FF00..FFEF; Halfwidth and Fullwidth Forms
FFF0..FFFF; Specials
10000..1007F; Linear B Syllabary
10080..100FF; Linear B Ideograms
10100..1013F; Aegean Numbers
10140..1018F; Ancient Greek Numbers
10190..101CF; Ancient Symbols
101D0..101FF; Phaistos Disc
10280..1029F; Lycian
102A0..102DF; Carian
102E0..102FF; Coptic Epact Numbers
10300..1032F; Old Italic
10330..1034F; Gothic
10350..1037F; Old Permic
10380..1039F; Ugaritic
103A0..103DF; Old Persian
10400..1044F; Deseret
10450..1047F; Shavian
10480..104AF; Osmanya
104B0..104FF; Osage
10500..1052F; Elbasan
10530..1056F; Caucasian Albanian
10570..105BF; Vithkuqi
10600..1077F; Linear A
10780..107BF; Latin Extended-F
10800..1083F; Cypriot Syllabary
10840..1085F; Imperial Aramaic
10860..1087F; Palmyrene
10880..108AF; Nabataean
108E0..108FF; Hatran
10900..1091F; Phoenician
10920..1093F; Lydian
10980..1099F; Meroitic Hieroglyphs
109A0..109FF; Meroitic Cursive
10A00..10A5F; Kharoshthi
10A60..10A7F; Old South Arabian
10A80..10A9F; Old North Arabian
10AC0..10AFF; Manichaean
10B00..10B3F; Avestan
10B40..10B5F; Inscriptional Parthian
10B60..10B7F; Inscriptional Pahlavi
10B80..10BAF; Psalter Pahlavi
10C00..10C4F; Old Turkic
10C80..10CFF; Old Hungarian
10D00..10D3F; Hanifi Rohingya
10E60..10E7F; Rumi Numeral Symbols
10E80..10EBF; Yezidi
10F00..10F2F; Old Sogdian
10F30..10F6F; Sogdian
10F70..10FAF; Old Uyghur
10FB0..10FDF; Chorasmian
10FE0..10FFF; Elymaic
11000..1107F; Brahmi
11080..110CF; Kaithi
110D0..110FF; Sora Sompeng
11100..1114F; Chakma
11150..1117F; Mahajani
11180..111DF; Sharada
111E0..111FF; Sinhala Archaic Numbers
11200..1124F; Khojki
11280..112AF; Multani
112B0..112FF; Khudawadi
11300..1137F; Grantha
11400..1147F; Newa
11480..114DF; Tirhuta
11580..115FF; Siddham
11600..1165F; Modi
11660..1167F; Mongolian Supplement
11680..116CF; Takri
11700..1174F; Ahom
11800..1184F; Dogra
118A0..118FF; Warang Citi
11900..1195F; Dives Akuru
119A0..119FF; Nandinagari
11A00..11A4F; Zanabazar Square
11A50..11AAF; Soyombo
11AB0..11ABF; Unified Canadian Aboriginal Syllabics Extended-A
11AC0..11AFF; Pau Cin Hau
11C00..11C6F; Bhaiksuki
11C70..11CBF; Marchen
11D00..11D5F; Masaram Gondi
11D60..11DAF; Gunjala Gondi
11EE0..11EFF; Makasar
11FB0..11FBF; Lisu Supplement
11FC0..11FFF; Tamil Supplement
12000..123FF; Cuneiform
12400..1247F; Cuneiform Numbers and Punctuation
12480..1254F; Early Dynastic Cuneiform
12F90..12FFF; Cypro-Minoan
13000..1342F; Egyptian Hieroglyphs
13430..1343F; Egyptian Hieroglyph Format Controls
14400..1467F; Anatolian Hieroglyphs
16800..16A3F; Bamum Supplement
16A40..16A6F; Mro
16A70..16ACF; Tangsa
16AD0..16AFF; Bassa Vah
16B00..16B8F; Pahawh Hmong
16E40..16E9F; Medefaidrin
16F00..16F9F; Miao
16FE0..16FFF; Ideographic Symbols and Punctuation
17000..187FF; Tangut
18800..18AFF; Tangut Components
18B00..18CFF; Khitan Small Script
18D00..18D7F; Tangut Supplement
1AFF0..1AFFF; Kana Extended-B
1B000..1B0FF; Kana Supplement
1B100..1B12F; Kana Extended-A
1B130..1B16F; Small Kana Extension
1B170..1B2FF; Nushu
1BC00..1BC9F; Duployan
1BCA0..1BCAF; Shorthand Format Controls
1CF00..1CFCF; Znamenny Musical Notation
1D000..1D0FF; Byzantine Musical Symbols
1D100..1D1FF; Musical Symbols
1D200..1D24F; Ancient Greek Musical Notation
1D2E0..1D2FF; Mayan Numerals
1D300..1D35F; Tai Xuan Jing Symbols
1D360..1D37F; Counting Rod Numerals
1D400..1D7FF; Mathematical Alphanumeric Symbols
1D800..1DAAF; Sutton SignWriting
1DF00..1DFFF; Latin Extended-G
1E000..1E02F; Glagolitic Supplement
1E100..1E14F; Nyiakeng Puachue Hmong
1E290..1E2BF; Toto
1E2C0..1E2FF; Wancho
1E7E0..1E7FF; Ethiopic Extended-B
1E800..1E8DF; Mende Kikakui
1E900..1E95F; Adlam
1EC70..1ECBF; Indic Siyaq Numbers
1ED00..1ED4F; Ottoman Siyaq Numbers
1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
1F000..1F02F; Mahjong Tiles
1F030..1F09F; Domino Tiles
1F0A0..1F0FF; Playing Cards
1F100..1F1FF; Enclosed Alphanumeric Supplement
1F200..1F2FF; Enclosed Ideographic Supplement
1F300..1F5FF; Miscellaneous Symbols and Pictographs
1F600..1F64F; Emoticons
1F650..1F67F; Ornamental Dingbats
1F680..1F6FF; Transport and Map Symbols
1F700..1F77F; Alchemical Symbols
1F780..1F7FF; Geometric Shapes Extended
1F800..1F8FF; Supplemental Arrows-C
1F900..1F9FF; Supplemental Symbols and Pictographs
1FA00..1FA6F; Chess Symbols
1FA70..1FAFF; Symbols and Pictographs Extended-A
1FB00..1FBFF; Symbols for Legacy Computing
20000..2A6DF; CJK Unified Ideographs Extension B
2A700..2B73F; CJK Unified Ideographs Extension C
2B740..2B81F; CJK Unified Ideographs Extension D
2B820..2CEAF; CJK Unified Ideographs Extension E
2CEB0..2EBEF; CJK Unified Ideographs Extension F
2F800..2FA1F; CJK Compatibility Ideographs Supplement
30000..3134F; CJK Unified Ideographs Extension G
E0000..E007F; Tags
E0100..E01EF; Variation Selectors Supplement
F0000..FFFFF; Supplementary Private Use Area-A
100000..10FFFF; Supplementary Private Use Area-B
# EOF

2619
libc/str/eastasianwidth.txt Normal file

File diff suppressed because it is too large Load diff

23
libc/str/freelocale.c Normal file
View file

@ -0,0 +1,23 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/str/locale.h"
void freelocale(locale_t l) {
// TODO: implement me
}

24
libc/str/isdigit_l.c Normal file
View file

@ -0,0 +1,24 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/str/str.h"
#include "libc/str/locale.h"
int isdigit_l(int c, locale_t l) {
return iswdigit(c);
}

24
libc/str/islower_l.c Normal file
View file

@ -0,0 +1,24 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/str/str.h"
#include "libc/str/locale.h"
int islower_l(int c, locale_t l) {
return islower(c);
}

24
libc/str/isupper_l.c Normal file
View file

@ -0,0 +1,24 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/str/str.h"
#include "libc/str/locale.h"
int isupper_l(int c, locale_t l) {
return isupper(c);
}

24
libc/str/iswalpha_l.c Normal file
View file

@ -0,0 +1,24 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/str/str.h"
#include "libc/str/locale.h"
int iswalpha_l(wint_t c, locale_t l) {
return iswalpha(c);
}

24
libc/str/iswblank_l.c Normal file
View file

@ -0,0 +1,24 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/str/str.h"
#include "libc/str/locale.h"
int iswblank_l(wint_t c, locale_t l) {
return iswblank(c);
}

24
libc/str/iswcntrl_l.c Normal file
View file

@ -0,0 +1,24 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/str/str.h"
#include "libc/str/locale.h"
int iswcntrl_l(wint_t c, locale_t l) {
return iswcntrl(c);
}

24
libc/str/iswdigit_l.c Normal file
View file

@ -0,0 +1,24 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/str/str.h"
#include "libc/str/locale.h"
int iswdigit_l(wint_t c, locale_t l) {
return iswdigit(c);
}

24
libc/str/iswlower_l.c Normal file
View file

@ -0,0 +1,24 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/str/str.h"
#include "libc/str/locale.h"
int iswlower_l(wint_t c, locale_t l) {
return iswlower(c);
}

24
libc/str/iswprint_l.c Normal file
View file

@ -0,0 +1,24 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/str/str.h"
#include "libc/str/locale.h"
int iswprint_l(wint_t c, locale_t l) {
return iswprint(c);
}

24
libc/str/iswpunct_l.c Normal file
View file

@ -0,0 +1,24 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/str/str.h"
#include "libc/str/locale.h"
int iswpunct_l(wint_t c, locale_t l) {
return iswpunct(c);
}

24
libc/str/iswspace_l.c Normal file
View file

@ -0,0 +1,24 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/str/str.h"
#include "libc/str/locale.h"
int iswspace_l(wint_t c, locale_t l) {
return iswspace(c);
}

24
libc/str/iswupper_l.c Normal file
View file

@ -0,0 +1,24 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/str/str.h"
#include "libc/str/locale.h"
int iswupper_l(wint_t c, locale_t l) {
return iswupper(c);
}

24
libc/str/iswxdigit_l.c Normal file
View file

@ -0,0 +1,24 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/str/str.h"
#include "libc/str/locale.h"
int iswxdigit_l(wint_t c, locale_t l) {
return iswxdigit(c);
}

24
libc/str/isxdigit_l.c Normal file
View file

@ -0,0 +1,24 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/str/str.h"
#include "libc/str/locale.h"
int isxdigit_l(int c, locale_t l) {
return iswxdigit(c);
}

203
libc/str/kcombiningchars.S Normal file
View file

@ -0,0 +1,203 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/dce.h"
#include "libc/macros.internal.h"
#include "libc/sysv/consts/nr.h"
#include "libc/sysv/consts/madv.h"
.rodata.cst4
kCombiningCharsBits:
.long 114752 * 8
.endobj kCombiningCharsBits,globl,hidden
.previous
.initbss 400,_init_kCombiningChars
kCombiningChars:
.zero 114752
.endobj kCombiningChars,globl,hidden
.previous
.init.start 400,_init_kCombiningChars
push %rsi
mov $1203,%edx
call lz4cpy
mov %rax,%rdi
pop %rsi
add $1208,%rsi
.init.end 400,_init_kCombiningChars
// o/tool/build/lz4toasm.com
// -o o/libc/str/CombiningChars.s
// -s kCombiningChars
// o/libc/str/CombiningChars.bin.lz4
.initro 400,_init_kCombiningChars
kCombiningCharsLz4:
.byte 0x1f,0x00,0x01,0x00,0x4c,0x19,0xff,0x01 #  Lλ
.byte 0x00,0x0f,0x30,0x00,0x0f,0x2f,0xf8,0x03 # 0 /°
.byte 0x22,0x00,0x0d,0x10,0xfe,0x49,0x00,0x23 #. I .
.byte 0xbf,0xb6,0x0e,0x00,0x42,0x3f,0x00,0xff # B λ
.byte 0x17,0x39,0x00,0x00,0x5e,0x00,0x17,0x01 #9  ^ 
.byte 0x28,0x00,0x92,0xc0,0xbf,0x9f,0x3d,0x00 #( Æƒ= 
.byte 0x00,0x00,0x80,0x02,0x86,0x00,0x17,0x07 #  Çå 
.byte 0x1a,0x00,0x13,0xff,0x28,0x00,0x91,0xf8 # λ( æ°
.byte 0x0f,0x20,0x00,0x00,0xc0,0xfb,0xef,0x3e #   >
.byte 0x0e,0x00,0x1b,0x0e,0x8a,0x00,0x26,0xff # è &λ
.byte 0xff,0x37,0x00,0xa2,0x14,0xfe,0x21,0xfe #λ7 ó!
.byte 0x00,0x0c,0x00,0x00,0x00,0x02,0x10,0x00 #     
.byte 0x40,0x10,0x1e,0x20,0x00,0x10,0x00,0x23 #@   .
.byte 0x40,0x06,0x10,0x00,0x20,0x86,0x39,0x1a #@  å9
.byte 0x00,0x24,0x23,0x00,0x10,0x00,0x21,0xbe # $.  !
.byte 0x21,0x20,0x00,0x13,0xfc,0x30,0x00,0x41 #!  0 A
.byte 0x90,0x1e,0x20,0x40,0x40,0x00,0x13,0x04 #É @@ 
.byte 0x5e,0x00,0x22,0x01,0x20,0x08,0x00,0x13 #^ .  
.byte 0x11,0x93,0x00,0x38,0xc1,0x3d,0x60,0x60 #ô 8=``
.byte 0x00,0x31,0x90,0x40,0x30,0x40,0x00,0x13 # 1É@0@ ‼
.byte 0x00,0x0f,0x01,0x13,0x18,0x70,0x00,0x06 # p 
.byte 0x9f,0x00,0x27,0x04,0x5c,0x0d,0x00,0x48 #ƒ \ H
.byte 0xf2,0x07,0x80,0x7f,0x1d,0x00,0x45,0xf2 #Ç E
.byte 0x1f,0x00,0x3f,0x0d,0x00,0x43,0x03,0x00 #  C 
.byte 0x00,0xa0,0x57,0x00,0x50,0xfe,0x7f,0xdf # áW P
.byte 0xe0,0xff,0x41,0x01,0x28,0x1f,0x40,0x2f #αλA(@/
.byte 0x00,0xff,0x00,0xe0,0xfd,0x66,0x00,0x00 # λ α²f  
.byte 0x00,0xc3,0x01,0x00,0x1e,0x00,0x64,0x20 #   d
.byte 0x00,0x20,0xcc,0x01,0x0b,0x0f,0xd2,0x01 # 
.byte 0x1d,0x06,0x66,0x00,0x1f,0x00,0x01,0x00 #f   
.byte 0x62,0x13,0x1c,0x04,0x00,0x26,0x0c,0x00 #b & 
.byte 0x42,0x01,0x52,0xb0,0x3f,0x40,0xfe,0x0f #BR@
.byte 0xe8,0x00,0x1a,0x78,0x2e,0x00,0x26,0x60 #Φ x. &`
.byte 0x00,0x85,0x01,0x04,0x14,0x00,0x4f,0x87 # à Oç
.byte 0x01,0x04,0x0e,0x60,0x00,0x07,0x22,0x80 #` .Ç
.byte 0x09,0x08,0x00,0x63,0x40,0x7f,0xe5,0x1f # c@σ
.byte 0xf8,0x9f,0x2a,0x01,0x05,0x8e,0x01,0x11 #°ƒ*Ä
.byte 0x0f,0x06,0x00,0x32,0xd0,0x17,0x04,0x70 # 2p
.byte 0x02,0x01,0xd0,0x01,0x23,0x3c,0x3b,0x32 #.<;2
.byte 0x00,0x13,0xa3,0xde,0x01,0x2f,0xf0,0xcf # ú/
.byte 0x58,0x00,0x00,0x6f,0xf7,0xff,0xfd,0x21 #X  oλ²!
.byte 0x10,0x03,0x8c,0x01,0x0c,0x1f,0xfb,0x1f #î
.byte 0x01,0x2e,0x52,0xf8,0x00,0x00,0x00,0x7c #.R°   |
.byte 0x0b,0x00,0x2c,0xdf,0xff,0x62,0x00,0x2f # ,λb /
.byte 0x01,0x00,0x01,0x00,0xff,0x6b,0x1d,0x80 #  λkÇ
.byte 0xff,0x01,0x1c,0x80,0xa2,0x01,0x0f,0x68 #λÇóh
.byte 0x00,0x32,0x19,0x3c,0x0e,0x00,0x2f,0x06 # 2< /
.byte 0x00,0x01,0x00,0xff,0xff,0xff,0xff,0xff #  λλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xb3,0x20,0x80,0xf7,0xf6,0x12,0x28 #λ Ç÷(
.byte 0xc0,0x00,0xfb,0x12,0x0f,0x33,0x00,0x0c # 3 
.byte 0x23,0x44,0x08,0xf4,0x11,0x0b,0x18,0x00 #.D 
.byte 0x11,0x30,0x60,0x0f,0x11,0x03,0x70,0x0f #0`p
.byte 0x62,0xc0,0x3f,0x00,0x00,0x80,0xff,0x4c #b  ÇλL
.byte 0x00,0x02,0x10,0x14,0x20,0xc8,0x33,0x06 #  3
.byte 0x00,0x05,0x29,0x13,0x52,0x7e,0x66,0x00 # )R~f 
.byte 0x08,0x10,0xf8,0x13,0x02,0x11,0x00,0x21 #° !
.byte 0x9d,0xc1,0x43,0x12,0x2f,0x30,0x40,0x7c #¥C/0@|
.byte 0x00,0x0a,0x2f,0x20,0x21,0x96,0x0a,0xff # / !ûλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xdb,0x1f,0x40,0x60,0x1f,0x4b,0x2f,0x00 #@`K/ 
.byte 0x00,0x06,0x1a,0x08,0x1f,0x80,0x3f,0x00 # Ç 
.byte 0x0c,0x1f,0x0e,0x7f,0x00,0x2c,0x1f,0x20 # ,
.byte 0x1d,0x00,0x09,0x0e,0x3e,0x1c,0x2f,0xc0 # >/
.byte 0x07,0xdd,0x01,0xbd,0x22,0x6e,0xf0,0x23 #.n.
.byte 0x1e,0x0f,0x1c,0x00,0x01,0x1f,0x60,0x64 # `d
.byte 0x00,0x34,0x1f,0xf0,0x44,0x00,0x30,0x05 # 4D 0
.byte 0xf4,0x20,0x0b,0x18,0x00,0x1a,0x02,0xb1 #  
.byte 0x1e,0x03,0x72,0x1c,0x24,0x78,0x26,0xda #r$x&
.byte 0x01,0x00,0xf0,0x0c,0x33,0x80,0xef,0x1f # 3Ç
.byte 0x2e,0x00,0x13,0x08,0x52,0x0d,0x48,0xc0 #. RH
.byte 0x7f,0x00,0x1e,0x66,0x02,0x1f,0xd3,0xe4 # fΣ
.byte 0x02,0x01,0x36,0x80,0xf8,0x07,0xc0,0x20 #6ǰ
.byte 0x00,0x7e,0x00,0x3f,0xc0,0x1f,0x1f,0xc7 # ~ 
.byte 0x02,0x06,0x19,0x5c,0x28,0x03,0x3f,0xf8 #\(°
.byte 0x85,0x0d,0xb1,0x1c,0x0b,0x22,0xb0,0x01 #à.
.byte 0xa3,0x0d,0x04,0x30,0x00,0x19,0xa7,0xde #ú0 º
.byte 0x00,0x29,0x28,0xbf,0x78,0x20,0x2f,0xbc # )(x /
.byte 0x0f,0x38,0x0e,0x0d,0x1f,0xff,0xf4,0x1c #8λ
.byte 0x20,0x20,0xf0,0x0c,0x74,0x00,0x11,0xfe # t 
.byte 0xd2,0x02,0x52,0xf8,0x79,0x80,0x00,0x7e #R°yÇ ~
.byte 0x4c,0x03,0x3f,0xfc,0x7f,0x03,0x4c,0x00 #LL 
.byte 0x1f,0x17,0x7f,0xb1,0x00,0x5b,0xfc,0xff # [λ
.byte 0xff,0xfc,0x6d,0x20,0x00,0x26,0x7e,0xb4 #λm  &~
.byte 0x21,0x00,0x1f,0xa3,0x58,0x00,0x18,0x1f #! úX 
.byte 0x18,0x23,0x07,0xff,0xff,0x96,0x2f,0xff #.λλû/λ
.byte 0x01,0xfb,0x0d,0xff,0xff,0xff,0xff,0xff #λλλλλ
.byte 0xff,0xc9,0x04,0xf0,0x0a,0x1f,0x7f,0x1c #λ
.byte 0x19,0x70,0x04,0x08,0x00,0x1f,0x07,0x30 #p 0
.byte 0x18,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλ
.byte 0xff,0xff,0x96,0x2f,0x60,0x0f,0x5f,0x25 #λλû/`_%
.byte 0xff,0xff,0x87,0x5d,0x03,0xf8,0xff,0xe7 #λλç]°λτ
.byte 0x0f,0x30,0x34,0x05,0x66,0x37,0x0f,0xba #04f7
.byte 0x14,0xe2,0x01,0x01,0x00,0x12,0x7f,0x2d #Γ -
.byte 0x3a,0x20,0x1f,0x20,0x01,0x26,0x3f,0xf8 #: &°
.byte 0xfe,0xff,0xc0,0x00,0x97,0x5f,0x7f,0xff #λ ù_λ
.byte 0xff,0xf9,0xdb,0x13,0x0e,0x0e,0x1f,0x7f #λ
.byte 0xb9,0x1a,0x24,0x0f,0xda,0x01,0xa9,0x0a #$
.byte 0xf4,0x00,0x3f,0xf0,0x07,0x00,0x01,0x00 #   
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0x46,0x57,0x02,0x00 #λλλλFW 
.byte 0x00,0x00,0xff,0x01,0x00,0x0c,0x20,0x00 #  λ   
.byte 0x1f,0xff,0x01,0x00,0x07,0x50,0xff,0xff #λ Pλλ
.byte 0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00
.endobj kCombiningCharsLz4,globl,hidden
.previous

View file

@ -0,0 +1,92 @@
// o/$(MODE)/tool/build/lz4toasm.com -o o/$(MODE)/libc/str/EastAsianWidth.s -s kEastAsianWidth o/$(MODE)/libc/str/EastAsianWidth.bin.lz4
.include "libc/macros.internal.inc"
.rodata
.align 4
kEastAsianWidthBits:
.long 32768 * 8
.endobj kEastAsianWidthBits,globl,hidden
.previous
.initbss 500,_init_kEastAsianWidth
kEastAsianWidth:
.zero 32768
.endobj kEastAsianWidth,globl,hidden
.previous
.init.start 500,_init_kEastAsianWidth
push %rsi
mov $500,%edx
call lz4cpy
mov %rax,%rdi
pop %rsi
add $504,%rsi
.init.end 500,_init_kEastAsianWidth
.initro 500,_init_kEastAsianWidth
kEastAsianWidthLz4:
.byte 0x1f,0x00,0x01,0x00,0xff,0xff,0x0e,0x17 #  λλ
.byte 0xff,0x01,0x00,0x0f,0x30,0x01,0xff,0x12 #λ 0λ
.byte 0xaf,0x88,0x99,0xf0,0xad,0xae,0xfb,0x2b #»êÖ¡«+
.byte 0x00,0x81,0xfb,0x13,0x01,0xf6,0x3f,0x0c # ü÷?
.byte 0x00,0x06,0x1a,0x00,0x04,0x2f,0x1e,0x09 #  /
.byte 0x5c,0x00,0x2d,0x43,0x60,0x00,0x00,0x30 #\ -C`  0
.byte 0xa9,0x02,0x10,0x0f,0x06,0x00,0xf3,0x0d # 
.byte 0x80,0x00,0x00,0x08,0x00,0x02,0x0c,0x00 #Ç    
.byte 0x60,0x30,0x40,0x10,0x00,0x00,0x04,0x2c #`0@►  ♦,
.byte 0x24,0x20,0x0c,0x00,0x00,0x00,0x01,0x00 #$     
.byte 0x00,0x00,0x50,0xb8,0x33,0x00,0x10,0xe0 #  P3 α
.byte 0x11,0x00,0x1f,0x80,0x00,0x01,0x58,0x12 # Ç X
.byte 0x18,0x07,0x00,0x1f,0x21,0xb0,0x03,0x55 # !U
.byte 0x16,0xfb,0xb2,0x03,0x3f,0x0f,0x00,0xff #? λ
.byte 0x01,0x00,0x06,0x11,0x3f,0x35,0x01,0x03 # ?5
.byte 0x0d,0x00,0x4e,0x7f,0xfe,0xff,0xff,0x0b # Nλλ
.byte 0x00,0x41,0xff,0xff,0xff,0xe0,0x06,0x00 # Aλλλα 
.byte 0x07,0x13,0x00,0x11,0x7f,0x06,0x00,0x17 #  
.byte 0x07,0x5e,0x00,0x02,0x12,0x00,0x2f,0x00 #^  / 
.byte 0xff,0x01,0x00,0xff,0xff,0xff,0x5d,0x0e #λ λλλ]
.byte 0xa0,0x07,0x0f,0x01,0x00,0xff,0xff,0xff #á λλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xbe #λλλλλλλ
.byte 0x13,0x1f,0x67,0x0e,0x0f,0x0c,0x13,0x83 #gâ
.byte 0x1f,0x1f,0x60,0x13,0x3d,0x0f,0x63,0x06 #`=c
.byte 0xff,0xff,0xff,0xff,0xff,0x66,0x2f,0x0f #λλλλλf/
.byte 0x00,0x01,0x00,0xff,0xff,0xf9,0x04,0xb0 #  λλ
.byte 0x1a,0x2f,0xd1,0xe0,0x00,0x1d,0xff,0x10 #/α λ
.byte 0x0f,0xa8,0x15,0x21,0x0f,0xa2,0x00,0x50 #¿§!ó P
.byte 0x12,0x03,0xa6,0x00,0x3f,0xf7,0xff,0x7f #ª ?λ
.byte 0xd9,0x04,0x00,0x08,0xcd,0x19,0x1b,0x01 # 
.byte 0x1c,0x00,0x2f,0x7f,0x00,0x01,0x00,0xff # /  λ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xf8,0x1f,0x0f,0x80 #λλλλ°Ç
.byte 0x18,0xff,0xff,0xf1,0x0f,0x00,0x03,0x4c #λλ± L
.byte 0x1f,0x07,0x04,0x08,0xff,0xff,0xff,0xff #λλλλ
.byte 0x92,0x0f,0xc5,0x04,0x10,0x03,0x27,0x16 #Æ'
.byte 0x4f,0x07,0x00,0xf0,0x00,0x6b,0x1b,0x1f #O  k
.byte 0x0f,0x04,0x10,0xff,0xff,0xff,0xff,0xff #λλλλλ
.byte 0xff,0xff,0x94,0x1f,0x10,0x19,0x00,0x05 #λλö 
.byte 0x0f,0x22,0x39,0x05,0x29,0x40,0xfe,0xd5 #.9)@■╒
.byte 0x0c,0x12,0x07,0x14,0x08,0x6f,0x0f,0xff #oλ
.byte 0x01,0x03,0x00,0x3f,0x60,0x08,0x04,0x34 # ?`4
.byte 0x01,0xe0,0xbf,0x10,0x08,0x15,0xdf,0x36 #α§6
.byte 0x38,0x10,0x87,0x08,0x00,0x15,0x11,0x64 #8ç §d
.byte 0x08,0x1f,0xfd,0x40,0x08,0x03,0x15,0x9f #²@§ƒ
.byte 0xad,0x38,0x11,0x78,0x4d,0x1f,0x10,0x04 #¡8xM
.byte 0xf3,0x39,0x07,0xb4,0x00,0x1c,0xf8,0x6a #9 °j
.byte 0x1f,0x05,0xde,0x38,0x6f,0x10,0x27,0x00 #8o' 
.byte 0x00,0x18,0xf0,0x81,0x0d,0x0a,0x0f,0x9e # ü
.byte 0x08,0x12,0x18,0xe0,0x8f,0x00,0x20,0x7b #αÅ  {
.byte 0xfc,0x06,0x00,0x20,0xe7,0xc7,0x05,0x00 #  τ 
.byte 0x0f,0xe0,0x1f,0x01,0x5f,0x0f,0x07,0x07 #α_
.byte 0x00,0x3f,0xe0,0x20,0x9b,0x0f,0x01,0x00 # ?α ¢ 
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0x0b,0x1f,0x3f,0xfc,0x1f,0xff,0xff,0xff #?λλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0x08,0x50,0xff #λλλλλPλ
.byte 0xff,0xff,0xff,0x3f,0x00,0x00,0x00,0x00
.endobj kEastAsianWidthLz4,globl,hidden
.previous

105
libc/str/langinfo.c Normal file
View file

@ -0,0 +1,105 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=8 tw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/nexgen32e/gettls.h"
#include "libc/thread/thread.h"
#include "libc/str/langinfo.h"
#include "libc/str/locale.h"
#include "libc/str/nltypes.h"
asm(".ident\t\"\\n\\n\
Musl libc (MIT License)\\n\
Copyright 2005-2014 Rich Felker, et. al.\"");
asm(".include \"libc/disclaimer.inc\"");
// clang-format off
static const char c_time[] =
"Sun\0" "Mon\0" "Tue\0" "Wed\0" "Thu\0" "Fri\0" "Sat\0"
"Sunday\0" "Monday\0" "Tuesday\0" "Wednesday\0"
"Thursday\0" "Friday\0" "Saturday\0"
"Jan\0" "Feb\0" "Mar\0" "Apr\0" "May\0" "Jun\0"
"Jul\0" "Aug\0" "Sep\0" "Oct\0" "Nov\0" "Dec\0"
"January\0" "February\0" "March\0" "April\0"
"May\0" "June\0" "July\0" "August\0"
"September\0" "October\0" "November\0" "December\0"
"AM\0" "PM\0"
"%a %b %e %T %Y\0"
"%m/%d/%y\0"
"%H:%M:%S\0"
"%I:%M:%S %p\0"
"\0"
"\0"
"%m/%d/%y\0"
"0123456789\0"
"%a %b %e %T %Y\0"
"%H:%M:%S";
static const char c_messages[] = "^[yY]\0" "^[nN]\0" "yes\0" "no";
static const char c_numeric[] = ".\0" "";
char *nl_langinfo_l(nl_item item, locale_t loc)
{
int cat = item >> 16;
int idx = item & 65535;
const char *str;
if (item == CODESET) return loc->cat[LC_CTYPE] ? "UTF-8" : "ASCII";
/* _NL_LOCALE_NAME extension */
if (idx == 65535 && cat < LC_ALL)
return loc->cat[cat] ? (char *)loc->cat[cat]->name : "C";
switch (cat) {
case LC_NUMERIC:
if (idx > 1) return "";
str = c_numeric;
break;
case LC_TIME:
if (idx > 0x31) return "";
str = c_time;
break;
case LC_MONETARY:
if (idx > 0) return "";
str = "";
break;
case LC_MESSAGES:
if (idx > 3) return "";
str = c_messages;
break;
default:
return "";
}
for (; idx; idx--, str++) for (; *str; str++);
// if (cat != LC_NUMERIC && *str) str = LCTRANS(str, cat, loc);
return (char *)str;
}
char *nl_langinfo(nl_item item)
{
return nl_langinfo_l(item, ((cthread_t)__get_tls())->locale);
}

86
libc/str/langinfo.h Normal file
View file

@ -0,0 +1,86 @@
#ifndef COSMOPOLITAN_LIBC_STR_LANGINFO_H_
#define COSMOPOLITAN_LIBC_STR_LANGINFO_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
#define ABDAY_1 0x20000
#define ABDAY_2 0x20001
#define ABDAY_3 0x20002
#define ABDAY_4 0x20003
#define ABDAY_5 0x20004
#define ABDAY_6 0x20005
#define ABDAY_7 0x20006
#define DAY_1 0x20007
#define DAY_2 0x20008
#define DAY_3 0x20009
#define DAY_4 0x2000A
#define DAY_5 0x2000B
#define DAY_6 0x2000C
#define DAY_7 0x2000D
#define ABMON_1 0x2000E
#define ABMON_2 0x2000F
#define ABMON_3 0x20010
#define ABMON_4 0x20011
#define ABMON_5 0x20012
#define ABMON_6 0x20013
#define ABMON_7 0x20014
#define ABMON_8 0x20015
#define ABMON_9 0x20016
#define ABMON_10 0x20017
#define ABMON_11 0x20018
#define ABMON_12 0x20019
#define MON_1 0x2001A
#define MON_2 0x2001B
#define MON_3 0x2001C
#define MON_4 0x2001D
#define MON_5 0x2001E
#define MON_6 0x2001F
#define MON_7 0x20020
#define MON_8 0x20021
#define MON_9 0x20022
#define MON_10 0x20023
#define MON_11 0x20024
#define MON_12 0x20025
#define AM_STR 0x20026
#define PM_STR 0x20027
#define D_T_FMT 0x20028
#define D_FMT 0x20029
#define T_FMT 0x2002A
#define T_FMT_AMPM 0x2002B
#define ERA 0x2002C
#define ERA_D_FMT 0x2002E
#define ALT_DIGITS 0x2002F
#define ERA_D_T_FMT 0x20030
#define ERA_T_FMT 0x20031
#define CODESET 14
#define CRNCYSTR 0x4000F
#define RADIXCHAR 0x10000
#define THOUSEP 0x10001
#define YESEXPR 0x50000
#define NOEXPR 0x50001
#define _NL_LOCALE_NAME(cat) (((cat) << 16) | 0xffff)
#if defined(_GNU_SOURCE)
#define NL_LOCALE_NAME(cat) _NL_LOCALE_NAME(cat)
#endif
#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
#define YESSTR 0x50002
#define NOSTR 0x50003
#endif
char *nl_langinfo(int);
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_STR_LANGINFO_H_ */

82
libc/str/locale.h Normal file
View file

@ -0,0 +1,82 @@
#ifndef COSMOPOLITAN_LIBC_STR_LOCALE_H_
#define COSMOPOLITAN_LIBC_STR_LOCALE_H_
#include "libc/fmt/conv.h"
#include "libc/time/struct/tm.h"
#define LC_CTYPE 0
#define LC_NUMERIC 1
#define LC_CTYPE_MASK 1
#define LC_TIME 2
#define LC_NUMERIC_MASK 2
#define LC_COLLATE 3
#define LC_MONETARY 4
#define LC_TIME_MASK 4
#define LC_MESSAGES 5
#define LC_ALL 6
#define LC_COLLATE_MASK 8
#define LC_MONETARY_MASK 16
#define LC_MESSAGES_MASK 32
#define LC_ALL_MASK 0x1fbf
#define LOCALE_NAME_MAX 23
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
#define LC_GLOBAL_LOCALE ((locale_t)-1)
struct __locale_map {
const void *map;
size_t map_size;
char name[LOCALE_NAME_MAX + 1];
const struct __locale_map *next;
};
struct __locale_struct {
const struct __locale_map *cat[6];
};
typedef struct __locale_struct *locale_t;
char *nl_langinfo_l(int, locale_t);
char *setlocale(int, const char *);
double strtod_l(const char *, char **, locale_t);
double wcstod_l(const wchar_t *, wchar_t **, locale_t);
float strtof_l(const char *, char **, locale_t);
float wcstof_l(const wchar_t *, wchar_t **, locale_t);
int isdigit_l(int, locale_t);
int islower_l(int, locale_t);
int isupper_l(int, locale_t);
int iswalpha_l(wint_t, locale_t);
int iswblank_l(wint_t, locale_t);
int iswcntrl_l(wint_t, locale_t);
int iswdigit_l(wint_t, locale_t);
int iswlower_l(wint_t, locale_t);
int iswprint_l(wint_t, locale_t);
int iswpunct_l(wint_t, locale_t);
int iswspace_l(wint_t, locale_t);
int iswupper_l(wint_t, locale_t);
int iswxdigit_l(wint_t, locale_t);
int isxdigit_l(int, locale_t);
int strcoll_l(const char *, const char *, locale_t);
int tolower_l(int, locale_t);
int toupper_l(int, locale_t);
int wcscoll_l(const wchar_t *, const wchar_t *, locale_t);
locale_t duplocale(locale_t);
locale_t newlocale(int, const char *, locale_t);
locale_t uselocale(locale_t);
long double strtold_l(const char *, char **, locale_t);
long double wcstold_l(const wchar_t *, wchar_t **, locale_t);
long long strtoll_l(const char *, char **, int, locale_t);
long long wcstoll_l(const wchar_t *, wchar_t **, int, locale_t);
size_t strftime_l(char *, size_t, char const *, struct tm const *, locale_t);
size_t strxfrm_l(char *, const char *, size_t, locale_t);
size_t wcsxfrm_l(wchar_t *, const wchar_t *, size_t, locale_t);
unsigned long long strtoull_l(const char *, char **, int, locale_t);
unsigned long long wcstoull_l(const wchar_t *, wchar_t **, int, locale_t);
void freelocale(locale_t);
wint_t towlower_l(wint_t, locale_t);
wint_t towupper_l(wint_t, locale_t);
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_STR_LOCALE_H_ */

51
libc/str/localeconv.c Normal file
View file

@ -0,0 +1,51 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/limits.h"
#include "libc/str/unicode.h"
static const struct lconv kLocaleConv = {
.decimal_point = ".",
.thousands_sep = "",
.grouping = "",
.int_curr_symbol = "",
.currency_symbol = "",
.mon_decimal_point = "",
.mon_thousands_sep = "",
.mon_grouping = "",
.positive_sign = "",
.negative_sign = "",
.int_frac_digits = CHAR_MAX,
.frac_digits = CHAR_MAX,
.p_cs_precedes = CHAR_MAX,
.p_sep_by_space = CHAR_MAX,
.n_cs_precedes = CHAR_MAX,
.n_sep_by_space = CHAR_MAX,
.p_sign_posn = CHAR_MAX,
.n_sign_posn = CHAR_MAX,
.int_p_cs_precedes = CHAR_MAX,
.int_p_sep_by_space = CHAR_MAX,
.int_n_cs_precedes = CHAR_MAX,
.int_n_sep_by_space = CHAR_MAX,
.int_p_sign_posn = CHAR_MAX,
.int_n_sign_posn = CHAR_MAX,
};
struct lconv *localeconv(void) {
return (/* unconst */ struct lconv *)&kLocaleConv;
}

25
libc/str/newlocale.c Normal file
View file

@ -0,0 +1,25 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/sysv/errfuns.h"
#include "libc/str/locale.h"
locale_t newlocale(int catmask, const char *locale, locale_t base) {
// TODO: implement me
return 0;
}

19
libc/str/nltypes.h Normal file
View file

@ -0,0 +1,19 @@
#ifndef COSMOPOLITAN_LIBC_STR_NLTYPES_H_
#define COSMOPOLITAN_LIBC_STR_NLTYPES_H_
#define NL_SETD 1
#define NL_CAT_LOCALE 1
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
typedef int nl_item;
typedef void *nl_catd;
nl_catd catopen(const char *, int);
char *catgets(nl_catd, int, int, const char *);
int catclose(nl_catd);
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_STR_NLTYPES_H_ */

35
libc/str/setlocale.c Normal file
View file

@ -0,0 +1,35 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/safemacros.internal.h"
#include "libc/str/str.h"
#include "libc/str/locale.h"
/**
* Sets program locale.
*
* Cosmopolitan only supports the C or POSIX locale.
*/
char *setlocale(int category, const char *locale) {
if (!locale) return "C";
if (!strcmp(locale, "C") || !strcmp(locale, "POSIX")) {
return locale;
} else {
return NULL;
}
}

View file

@ -94,6 +94,32 @@ o/$(MODE)/libc/fmt/strsignal.greg.o: private \
-ffreestanding \
$(NO_MAGIC)
o/$(MODE)/libc/str/eastasianwidth.bin: \
libc/str/eastasianwidth.txt \
o/$(MODE)/tool/decode/mkwides.com
@$(COMPILE) -AMKWIDES -T$@ o/$(MODE)/tool/decode/mkwides.com -o $@ $<
o/$(MODE)/libc/str/eastasianwidth.bin.lz4: \
o/$(MODE)/libc/str/eastasianwidth.bin \
o/$(MODE)/third_party/lz4cli/lz4cli.com
@$(COMPILE) -ALZ4 -T$@ o/$(MODE)/third_party/lz4cli/lz4cli.com -q -f -9 --content-size $< $@
o/$(MODE)/libc/str/eastasianwidth.s: \
o/$(MODE)/libc/str/eastasianwidth.bin.lz4 \
o/$(MODE)/tool/build/lz4toasm.com
@$(COMPILE) -ABIN2ASM -T$@ o/$(MODE)/tool/build/lz4toasm.com -s kEastAsianWidth -o $@ $<
o/$(MODE)/libc/str/combiningchars.bin: \
libc/str/strdata.txt \
o/$(MODE)/tool/decode/mkcombos.com
@$(COMPILE) -AMKCOMBOS -T$@ o/$(MODE)/tool/decode/mkcombos.com -o $@ $<
o/$(MODE)/libc/str/combiningchars.bin.lz4: \
o/$(MODE)/libc/str/combiningchars.bin \
o/$(MODE)/third_party/lz4cli/lz4cli.com
@$(COMPILE) -ALZ4 -T$@ o/$(MODE)/third_party/lz4cli/lz4cli.com -q -f -9 --content-size $< $@
o/$(MODE)/libc/str/combiningchars.s: \
o/$(MODE)/libc/str/combiningchars.bin.lz4 \
o/$(MODE)/tool/build/lz4toasm.com
@$(COMPILE) -ABIN2ASM -T$@ o/$(MODE)/tool/build/lz4toasm.com -s kCombiningChars -o $@ $<
LIBC_STR_LIBS = $(foreach x,$(LIBC_STR_ARTIFACTS),$($(x)))
LIBC_STR_SRCS = $(foreach x,$(LIBC_STR_ARTIFACTS),$($(x)_SRCS))
LIBC_STR_HDRS = $(foreach x,$(LIBC_STR_ARTIFACTS),$($(x)_HDRS))

24
libc/str/strcoll_l.c Normal file
View file

@ -0,0 +1,24 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/str/str.h"
#include "libc/str/locale.h"
int strcoll_l(const char *p, const char *q, locale_t l) {
return strcmp(p, q);
}

93
libc/str/strnwidth.c Normal file
View file

@ -0,0 +1,93 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/pcmpgtb.h"
#include "libc/intrin/pmovmskb.h"
#include "libc/intrin/psubb.h"
#include "libc/macros.internal.h"
#include "libc/nexgen32e/bsf.h"
#include "libc/str/str.h"
#include "libc/str/thompike.h"
#include "libc/str/unicode.h"
/**
* Returns monospace display width of UTF-8 string.
*
* - Control codes are discounted
* - ANSI escape sequences are discounted
* - East asian glyphs, emoji, etc. count as two
*
* @param s is NUL-terminated string
* @param n is max bytes to consider
* @param o is offset for doing tabs
* @return monospace display width
*/
int strnwidth(const char *s, size_t n, size_t o) {
size_t i;
wint_t c, w;
unsigned l, r, k;
enum { kAscii, kUtf8, kEsc, kCsi } t;
for (w = r = t = l = i = 0; i < n;) {
if ((c = s[i++] & 0xff)) {
switch (t) {
case kAscii:
if (0x20 <= c && c <= 0x7E) {
++l;
} else if (c == '\t') {
if ((k = (o + i - 1) & 7)) {
l += 8 - k;
} else {
l += 8;
}
} else if (c == 033) {
t = kEsc;
} else if (c >= 0300) {
t = kUtf8;
w = ThomPikeByte(c);
r = ThomPikeLen(c) - 1;
}
break;
case kUtf8:
if (ThomPikeCont(c)) {
w = ThomPikeMerge(w, c);
if (--r) break;
}
l += MAX(0, wcwidth(w));
t = kAscii;
break;
case kEsc:
if (c == '[') {
t = kCsi;
} else if (!(040 <= c && c < 060)) {
t = kAscii;
}
break;
case kCsi:
if (!(060 <= c && c < 0100)) {
t = kAscii;
}
break;
default:
unreachable;
}
} else {
break;
}
}
return l;
}

38
libc/str/strnwidth16.c Normal file
View file

@ -0,0 +1,38 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/safemacros.internal.h"
#include "libc/str/oldutf16.internal.h"
#include "libc/str/str.h"
#include "libc/str/unicode.h"
/**
* Returns monospace display width of UTF-16 or UCS-2 string.
*/
int strnwidth16(const char16_t *p, size_t n, size_t o) {
size_t l;
wint_t wc;
l = 0;
if (n) {
while (*p) {
p += getutf16(p, &wc);
l += max(0, wcwidth(wc));
}
}
return l;
}

34
libc/str/strwidth.c Normal file
View file

@ -0,0 +1,34 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/str/unicode.h"
/**
* Returns monospace display width of UTF-8 string.
*
* - Control codes are discounted
* - ANSI escape sequences are discounted
* - East asian glyphs, emoji, etc. count as two
*
* @param s is NUL-terminated string
* @param o is string offset for computing tab widths
* @return monospace display width
*/
int strwidth(const char *s, size_t o) {
return strnwidth(s, -1, 0);
}

28
libc/str/strwidth16.c Normal file
View file

@ -0,0 +1,28 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/safemacros.internal.h"
#include "libc/limits.h"
#include "libc/str/unicode.h"
/**
* Returns monospace display width of UTF-16 or UCS-2 string.
*/
int strwidth16(const char16_t *s, size_t o) {
return strnwidth16(s, SIZE_MAX, o);
}

24
libc/str/strxfrm_l.c Normal file
View file

@ -0,0 +1,24 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/str/str.h"
#include "libc/str/locale.h"
size_t strxfrm_l(char *dest, const char *src, size_t count, locale_t l) {
return strxfrm(dest, src, count);
}

24
libc/str/tolower_l.c Normal file
View file

@ -0,0 +1,24 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/str/str.h"
#include "libc/str/locale.h"
int tolower_l(int c, locale_t l) {
return tolower(c);
}

24
libc/str/toupper_l.c Normal file
View file

@ -0,0 +1,24 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/str/str.h"
#include "libc/str/locale.h"
int toupper_l(int c, locale_t l) {
return toupper(c);
}

24
libc/str/towlower_l.c Normal file
View file

@ -0,0 +1,24 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/str/str.h"
#include "libc/str/locale.h"
wint_t towlower_l(wint_t c, locale_t l) {
return towlower(c);
}

24
libc/str/towupper_l.c Normal file
View file

@ -0,0 +1,24 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/str/str.h"
#include "libc/str/locale.h"
wint_t towupper_l(wint_t c, locale_t l) {
return towupper(c);
}

View file

@ -0,0 +1,37 @@
Lu = Letter, uppercase
Ll = Letter, lowercase
Lt = Letter, titlecase
Lm = Letter, modifier
Lo = Letter, other
Mn = Mark, nonspacing
Mc = Mark, spacing combining
Me = Mark, enclosing
Nd = Number, decimal digit
Nl = Number, letter
No = Number, other
Pc = Punctuation, connector
Pd = Punctuation, dash
Ps = Punctuation, open
Pe = Punctuation, close
Pi = Punctuation, initial quote (may behave like Ps or Pe depending on usage)
Pf = Punctuation, final quote (may behave like Ps or Pe depending on usage)
Po = Punctuation, other
Sm = Symbol, math
Sc = Symbol, currency
Sk = Symbol, modifier
So = Symbol, other
Zs = Separator, space
Zl = Separator, line
Zp = Separator, paragraph
Cc = Other, control
Cf = Other, format
Cs = Other, surrogate
Co = Other, private use
Cn = Other, not assigned (including noncharacters)
W Wide Naturally wide character, e.g. Hiragana.
Na Narrow Naturally narrow character, e.g. ISO Basic Latin alphabet.
F Fullwidth Wide variant with compatibility normalisation to naturally narrow character, e.g. fullwidth Latin script.
H Halfwidth Narrow variant with compatibility normalisation to naturally wide character, e.g. half-width kana. Includes U+20A9 (₩) as an exception.
A Ambiguous Characters included in East Asian DBCS codes but also in European SBCS codes, e.g. Greek alphabet. Duospaced behaviour can consequently vary.
N Neutral Characters which do not appear in East Asian DBCS codes, e.g. Devanagari.

44
libc/str/unicode.h Normal file
View file

@ -0,0 +1,44 @@
#ifndef COSMOPOLITAN_LIBC_STR_UNICODE_H_
#define COSMOPOLITAN_LIBC_STR_UNICODE_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
struct lconv {
char *decimal_point;
char *thousands_sep;
char *grouping;
char *int_curr_symbol;
char *currency_symbol;
char *mon_decimal_point;
char *mon_thousands_sep;
char *mon_grouping;
char *positive_sign;
char *negative_sign;
char int_frac_digits;
char frac_digits;
char p_cs_precedes;
char p_sep_by_space;
char n_cs_precedes;
char n_sep_by_space;
char p_sign_posn;
char n_sign_posn;
char int_p_cs_precedes;
char int_n_cs_precedes;
char int_p_sep_by_space;
char int_n_sep_by_space;
char int_p_sign_posn;
char int_n_sign_posn;
};
int wcwidth(wchar_t) pureconst;
int wcswidth(const wchar_t *, size_t) strlenesque;
int wcsnwidth(const wchar_t *, size_t, size_t) strlenesque;
int strwidth(const char *, size_t) strlenesque;
int strnwidth(const char *, size_t, size_t) strlenesque;
int strwidth16(const char16_t *, size_t) strlenesque;
int strnwidth16(const char16_t *, size_t, size_t) strlenesque;
struct lconv *localeconv(void);
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_STR_UNICODE_H_ */

34924
libc/str/unicodedata.txt Normal file

File diff suppressed because it is too large Load diff

15
libc/str/update.sh Executable file
View file

@ -0,0 +1,15 @@
#!/bin/sh
[ -d libc/unicode ] || exit
[ -x o//examples/curl.com ] || make -j8 o//examples/curl.com || exit
mkdir -p o/tmp/ || exit
shineget() {
echo $2
o//examples/curl.com $2 >o/tmp/$$ || exit
mv o/tmp/$$ $1 || exit
}
shineget libc/str/blocks.txt https://www.unicode.org/Public/UCD/latest/ucd/Blocks.txt
shineget libc/str/unicodedata.txt https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
shineget libc/str/eastasianwidth.txt https://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt
shineget libc/str/SpecialCasing.txt https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt

25
libc/str/uselocale.c Normal file
View file

@ -0,0 +1,25 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/sysv/errfuns.h"
#include "libc/str/locale.h"
locale_t uselocale(locale_t l) {
// TODO: implement me!
return 0;
}

24
libc/str/wcscoll_l.c Normal file
View file

@ -0,0 +1,24 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/str/str.h"
#include "libc/str/locale.h"
int wcscoll_l(const wchar_t *p, const wchar_t *q, locale_t l) {
return wcscmp(p, q);
}

34
libc/str/wcsnwidth.c Normal file
View file

@ -0,0 +1,34 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/str/unicode.h"
/**
* Returns monospace display width of wide character string.
*/
int wcsnwidth(const wchar_t *pwcs, size_t n, size_t o) {
int w, width = 0;
for (; *pwcs && n-- > 0; pwcs++) {
if ((w = wcwidth(*pwcs)) < 0) {
return -1;
} else {
width += w;
}
}
return width;
}

27
libc/str/wcswidth.c Normal file
View file

@ -0,0 +1,27 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/limits.h"
#include "libc/str/unicode.h"
/**
* Returns monospace display width of wide character string.
*/
int wcswidth(const wchar_t *pwcs, size_t o) {
return wcsnwidth(pwcs, SIZE_MAX, o);
}

24
libc/str/wcsxfrm_l.c Normal file
View file

@ -0,0 +1,24 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/str/str.h"
#include "libc/str/locale.h"
size_t wcsxfrm_l(wchar_t *dest, const wchar_t *src, size_t count, locale_t l) {
return wcsxfrm(dest, src, count);
}

45
libc/str/wcwidth.c Normal file
View file

@ -0,0 +1,45 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/likely.h"
#include "libc/str/unicode.h"
extern const uint8_t kEastAsianWidth[];
extern const uint32_t kEastAsianWidthBits;
extern const uint8_t kCombiningChars[];
extern const uint32_t kCombiningCharsBits;
/**
* Returns cell width of monospace character.
*/
int wcwidth(wchar_t c) {
if (LIKELY(32 <= c && c < 127)) {
return 1;
} else if (!c) {
return 0;
} else if ((0 < c && c < 32) || (0x7f <= c && c < 0xA0)) {
return -1;
} else if ((0 <= c && c < kCombiningCharsBits) &&
!!(kCombiningChars[c >> 3] & (1 << (c & 7)))) {
return 0;
} else if (0 <= c && c < kEastAsianWidthBits) {
return 1 + !!(kEastAsianWidth[c >> 3] & (1 << (c & 7)));
} else {
return 1;
}
}