mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-07-18 00:20:30 +00:00
Initial import
This commit is contained in:
commit
c91b3c5006
14915 changed files with 590219 additions and 0 deletions
2473
libc/unicode/eastasianwidth.txt
Normal file
2473
libc/unicode/eastasianwidth.txt
Normal file
File diff suppressed because it is too large
Load diff
236
libc/unicode/kcombiningchars.S
Normal file
236
libc/unicode/kcombiningchars.S
Normal file
|
@ -0,0 +1,236 @@
|
|||
/*-*- mode:asm; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/dce.h"
|
||||
#include "libc/macros.h"
|
||||
#include "libc/sysv/consts/nr.h"
|
||||
#include "libc/sysv/consts/madv.h"
|
||||
|
||||
.rodata.cst4
|
||||
kCombiningCharsBits:
|
||||
.long 114752 * 8
|
||||
.endobj kCombiningCharsBits,globl,hidden
|
||||
.previous
|
||||
|
||||
.initbss 400,_init_kCombiningChars
|
||||
kCombiningChars:
|
||||
.zero 114752
|
||||
.endobj kCombiningChars,globl,hidden
|
||||
.previous
|
||||
|
||||
.init.start 400,_init_kCombiningChars
|
||||
push %rsi
|
||||
mov $1257,%edx
|
||||
call lz4cpy
|
||||
mov %rax,%rdi
|
||||
pop %rsi
|
||||
add $1264,%rsi
|
||||
.init.end 400,_init_kCombiningChars
|
||||
|
||||
/ The data below is sparse, as evidenced by:
|
||||
/ o/tool/viz/bing.com <o/libc/str/CombiningChars.bin |
|
||||
/ o/tool/viz/fold.com
|
||||
/ Thus ask the kernel to shrink its physical memory requirements
|
||||
/ which might sadly need mmap intermediate step on linux to work
|
||||
/ how one would have hoped.
|
||||
.text.startup
|
||||
kCombiningCharsInit:
|
||||
push %rbp
|
||||
mov %rsp,%rbp
|
||||
.profilable
|
||||
ezlea kCombiningChars,di
|
||||
mov $114752,%esi
|
||||
call mergepages
|
||||
pop %rbp
|
||||
ret
|
||||
.endfn kCombiningCharsInit
|
||||
.previous
|
||||
.init_array
|
||||
.align 8
|
||||
kCombiningCharsCtor:
|
||||
.quad kCombiningCharsInit
|
||||
.endobj kCombiningCharsCtor
|
||||
.previous
|
||||
|
||||
/ o/tool/build/lz4toasm.com \
|
||||
/ -o o/libc/str/CombiningChars.s \
|
||||
/ -s kCombiningChars \
|
||||
/ o/libc/str/CombiningChars.bin.lz4
|
||||
.initro 400,_init_kCombiningChars
|
||||
kCombiningCharsLz4:
|
||||
.byte 0x16,0x00,0x01,0x00,0x24,0x40,0x01,0x0a #▬ ☺ $@☺◙
|
||||
.byte 0x00,0x4f,0x81,0x10,0x01,0x00,0x01,0x00 # Oü►☺ ☺
|
||||
.byte 0x2c,0x7b,0x3c,0x00,0xfc,0xff,0xe0,0xaf #,{< ⁿλα»
|
||||
.byte 0xff,0x01,0x00,0x3f,0x20,0x00,0x30,0x38 #λ☺ ? 08
|
||||
.byte 0x00,0x0c,0x2f,0xf8,0x03,0x5a,0x00,0x0d # ♀/°♥Z ♪
|
||||
.byte 0x10,0xfe,0x49,0x00,0x23,0xbf,0xb6,0x0e #►■I .┐╢♫
|
||||
.byte 0x00,0x42,0x3f,0x00,0xff,0x17,0x39,0x00 # B? λ↨9
|
||||
.byte 0x48,0xff,0xff,0x00,0x00,0xb7,0x00,0xf7 #Hλλ ╖ ≈
|
||||
.byte 0x01,0xc0,0xbf,0x9f,0x3d,0x00,0x00,0x00 #☺└┐ƒ=
|
||||
.byte 0x80,0x02,0x00,0x00,0x00,0xff,0xff,0xff #Ç☻ λλλ
|
||||
.byte 0x07,0x1a,0x00,0x13,0xff,0x28,0x00,0x91 #•→ ‼λ( æ
|
||||
.byte 0xf8,0x0f,0x20,0x00,0x00,0xc0,0xfb,0xef #°☼ └√∩
|
||||
.byte 0x3e,0x0e,0x00,0x1b,0x0e,0x8a,0x00,0x26 #>♫ ←♫è &
|
||||
.byte 0xff,0xff,0x37,0x00,0xa2,0x14,0xfe,0x21 #λλ7 ó¶■!
|
||||
.byte 0xfe,0x00,0x0c,0x00,0x00,0x00,0x02,0x10 #■ ♀ ☻►
|
||||
.byte 0x00,0x40,0x10,0x1e,0x20,0x00,0x10,0x00 # @►▲ ►
|
||||
.byte 0x23,0x40,0x06,0x10,0x00,0x20,0x86,0x39 #.@♠► å9
|
||||
.byte 0x1a,0x00,0x24,0x23,0x00,0x10,0x00,0x21 #→ $. ► !
|
||||
.byte 0xbe,0x21,0x20,0x00,0x13,0xfc,0x30,0x00 #╛! ‼ⁿ0
|
||||
.byte 0x41,0x90,0x1e,0x20,0x40,0x40,0x00,0x13 #AÉ▲ @@ ‼
|
||||
.byte 0x04,0x5e,0x00,0x22,0x01,0x20,0x08,0x00 #♦^ .☺ ◘
|
||||
.byte 0x13,0x11,0x93,0x00,0x38,0xc1,0x3d,0x60 #‼◄ô 8┴=`
|
||||
.byte 0x60,0x00,0x31,0x90,0x40,0x30,0x40,0x00 #` 1É@0@
|
||||
.byte 0x13,0x00,0x0f,0x01,0x13,0x18,0x70,0x00 #‼ ☼☺‼↑p
|
||||
.byte 0x06,0x9f,0x00,0x27,0x04,0x5c,0x0d,0x00 #♠ƒ '♦\♪
|
||||
.byte 0x48,0xf2,0x07,0x80,0x7f,0x1d,0x00,0x45 #H≥•Ç⌂↔ E
|
||||
.byte 0xf2,0x1f,0x00,0x3f,0x0d,0x00,0x43,0x03 #≥▼ ?♪ C♥
|
||||
.byte 0x00,0x00,0xa0,0x57,0x00,0x50,0xfe,0x7f # áW P■⌂
|
||||
.byte 0xdf,0xe0,0xff,0x41,0x01,0x28,0x1f,0x40 #▀αλA☺(▼@
|
||||
.byte 0x2f,0x00,0xff,0x00,0xe0,0xfd,0x66,0x00 #/ λ α²f
|
||||
.byte 0x00,0x00,0xc3,0x01,0x00,0x1e,0x00,0x64 # ├☺ ▲ d
|
||||
.byte 0x20,0x00,0x20,0x7a,0x01,0x05,0x1f,0xff # z☺♣▼λ
|
||||
.byte 0x01,0x00,0x00,0x0f,0x13,0x02,0x18,0x2f #☺ ☼‼☻↑/
|
||||
.byte 0xe0,0x00,0x01,0x00,0x62,0x13,0x1c,0x04 #α ☺ b‼∟♦
|
||||
.byte 0x00,0x26,0x0c,0x00,0x42,0x01,0x52,0xb0 # &♀ B☺R░
|
||||
.byte 0x3f,0x40,0xfe,0x0f,0xe8,0x00,0x1a,0x78 #?@■☼Φ →x
|
||||
.byte 0x2e,0x00,0x26,0x60,0x00,0x85,0x01,0x04 #. &` à☺♦
|
||||
.byte 0x14,0x00,0x4f,0x87,0x01,0x04,0x0e,0x60 #¶ Oç☺♦♫`
|
||||
.byte 0x00,0x07,0x23,0x80,0x09,0x3f,0x03,0x53 # •.Ç○?♥S
|
||||
.byte 0x7f,0xe5,0x1f,0xf8,0x9f,0x2a,0x01,0x05 #⌂σ▼°ƒ*☺♣
|
||||
.byte 0x8e,0x01,0x11,0x0f,0x06,0x00,0x32,0xd0 #Ä☺◄☼♠ 2╨
|
||||
.byte 0x17,0x04,0x70,0x02,0x01,0xd0,0x01,0x23 #↨♦p☻☺╨☺.
|
||||
.byte 0x3c,0x3b,0x32,0x00,0x13,0xa3,0xde,0x01 #<;2 ‼ú▐☺
|
||||
.byte 0x2f,0xf0,0xcf,0x58,0x00,0x00,0x6f,0xf7 #/≡╧X o≈
|
||||
.byte 0xff,0xfd,0x21,0x10,0x03,0x8c,0x01,0x0c #λ²!►♥î☺♀
|
||||
.byte 0x1f,0xfb,0x15,0x01,0x24,0x40,0xa0,0x03 #▼√§☺$@á♥
|
||||
.byte 0xe0,0x00,0x02,0x00,0x72,0x60,0x00,0xf8 #α ☻ r` °
|
||||
.byte 0x00,0x00,0x00,0x7c,0x15,0x00,0x2c,0xdf # |§ ,▀
|
||||
.byte 0xff,0x62,0x00,0x2f,0x01,0x00,0x01,0x00 #λb /☺ ☺
|
||||
.byte 0xff,0x6b,0x1d,0x80,0xff,0x01,0x1c,0x80 #λk↔Çλ☺∟Ç
|
||||
.byte 0xa2,0x01,0x0f,0x68,0x00,0x32,0x19,0x3c #ó☺☼h 2↓<
|
||||
.byte 0x0e,0x00,0x2f,0x1e,0x00,0x01,0x00,0xff #♫ /▲ ☺ λ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xb3,0x20,0x80 #λλλλλ│ Ç
|
||||
.byte 0xf7,0xf6,0x12,0x27,0xc0,0x00,0xfb,0x12 #≈÷↕'└ √↕
|
||||
.byte 0x58,0xff,0xff,0x7f,0x00,0x03,0x24,0x00 #Xλλ⌂ ♥$
|
||||
.byte 0x1a,0x06,0x33,0x00,0x23,0x44,0x08,0xf4 #→♠3 .D◘⌠
|
||||
.byte 0x11,0x0b,0x4b,0x00,0x11,0x30,0x60,0x0f #◄♂K ◄0`☼
|
||||
.byte 0x11,0x03,0x70,0x0f,0x62,0xc0,0x3f,0x00 #◄♥p☼b└?
|
||||
.byte 0x00,0x80,0xff,0x46,0x00,0x02,0x10,0x14 # ÇλF ☻►¶
|
||||
.byte 0x20,0xc8,0x33,0x06,0x00,0x05,0x29,0x13 # ╚3♠ ♣)‼
|
||||
.byte 0x52,0x7e,0x66,0x00,0x08,0x10,0xf8,0x13 #R~f ◘►°‼
|
||||
.byte 0x02,0x11,0x00,0x21,0x9d,0xc1,0x43,0x12 #☻◄ !¥┴C↕
|
||||
.byte 0x19,0x30,0x66,0x13,0x1c,0x08,0x64,0x00 #↓0f‼∟◘d
|
||||
.byte 0x2f,0x20,0x21,0x96,0x0a,0xff,0xff,0xff #/ !û◙λλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xdb,0x1e #λλλλλλ█▲
|
||||
.byte 0x40,0x13,0x00,0x3f,0xfc,0xff,0x03,0x5d #@‼ ?ⁿλ♥]
|
||||
.byte 0x00,0x34,0x00,0x02,0x1a,0x0f,0x06,0x1a # 4 ☻→☼♠→
|
||||
.byte 0x08,0x1d,0x80,0xdc,0x1f,0x0a,0x91,0x0a #◘↔Ç▄▼◙æ◙
|
||||
.byte 0x1f,0x0e,0x7f,0x00,0x2c,0x1f,0x20,0x1d #▼♫⌂ ,▼ ↔
|
||||
.byte 0x00,0x09,0x0e,0x74,0x00,0x2f,0xc0,0x07 # ○♫t /└•
|
||||
.byte 0xdd,0x01,0xbd,0x22,0x6e,0xf0,0x23,0x1e #▌☺╜.n≡.▲
|
||||
.byte 0x0f,0x1c,0x00,0x01,0x1f,0x60,0x64,0x00 #☼∟ ☺▼`d
|
||||
.byte 0x34,0x1f,0xf0,0x44,0x00,0x30,0x05,0xf4 #4▼≡D 0♣⌠
|
||||
.byte 0x20,0x0b,0x18,0x00,0x1a,0x02,0xb1,0x1e # ♂↑ →☻▒▲
|
||||
.byte 0x03,0x72,0x1c,0x24,0x78,0x26,0xda,0x01 #♥r∟$x&┌☺
|
||||
.byte 0x00,0xf0,0x0c,0x35,0x80,0xef,0x1f,0x32 # ≡♀5Ç∩▼2
|
||||
.byte 0x02,0x02,0x20,0x00,0x29,0xc0,0x7f,0x26 #☻☻ )└⌂&
|
||||
.byte 0x1c,0x3f,0x80,0xd3,0x40,0x7c,0x02,0x01 #∟?Ç╙@|☻☺
|
||||
.byte 0x26,0xf8,0x07,0xc0,0x20,0x00,0x7e,0x00 #&°•└ ~
|
||||
.byte 0x3f,0xc0,0x1f,0x1f,0xc7,0x02,0x06,0x19 #?└▼▼╟☻♠↓
|
||||
.byte 0x5c,0x28,0x03,0x3f,0xf8,0x85,0x0d,0xb1 #\(♥?°à♪▒
|
||||
.byte 0x1c,0x0b,0x22,0xb0,0x01,0xa3,0x0d,0x04 #∟♂.░☺ú♪♦
|
||||
.byte 0x30,0x00,0x19,0xa7,0xde,0x00,0x29,0x28 #0 ↓º▐ )(
|
||||
.byte 0xbf,0x78,0x20,0x2f,0xbc,0x0f,0x38,0x0e #┐x /╝☼8♫
|
||||
.byte 0x0d,0x2f,0xff,0x06,0x96,0x01,0x20,0x10 #♪/λ♠û☺ ►
|
||||
.byte 0x0c,0x74,0x00,0x11,0xfe,0xd2,0x02,0x52 #♀t ◄■╥☻R
|
||||
.byte 0xf8,0x79,0x80,0x00,0x7e,0x4c,0x03,0x2f #°yÇ ~L♥/
|
||||
.byte 0xfc,0x7f,0xdb,0x03,0x20,0x28,0x7f,0xbf #ⁿ⌂█♥ (⌂┐
|
||||
.byte 0x1c,0x04,0x3b,0xff,0xfc,0x6d,0x20,0x00 #∟♦;λⁿm
|
||||
.byte 0x26,0x7e,0xb4,0x21,0x00,0x1f,0xa3,0x58 #&~┤! ▼úX
|
||||
.byte 0x00,0x18,0x1f,0x18,0x23,0x07,0xff,0xff # ↑▼↑.•λλ
|
||||
.byte 0x96,0x2f,0xff,0x01,0xfb,0x0d,0xff,0xff #û/λ☺√♪λλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xc9,0x04,0xf0,0x0a #λλλλ╔♦≡◙
|
||||
.byte 0x1f,0x7f,0x1c,0x19,0x70,0x04,0x08,0x00 #▼⌂∟↓p♦◘
|
||||
.byte 0x1f,0x07,0x30,0x18,0xff,0xff,0xff,0xff #▼•0↑λλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0x96,0x2f,0x60 #λλλλλû/`
|
||||
.byte 0x0f,0x5f,0x25,0xff,0xff,0x87,0x5d,0x03 #☼_%λλç]♥
|
||||
.byte 0xf8,0xff,0xe7,0x0f,0x30,0x34,0x05,0x66 #°λτ☼04♣f
|
||||
.byte 0x37,0x0f,0xba,0x14,0xe2,0x01,0x01,0x00 #7☼║¶Γ☺☺
|
||||
.byte 0x12,0x7f,0x2d,0x3a,0x20,0x1f,0x20,0x01 #↕⌂-: ▼ ☺
|
||||
.byte 0x26,0x3f,0xf8,0xfe,0xff,0xc0,0x00,0x97 #&?°■λ└ ù
|
||||
.byte 0x5f,0x7f,0xff,0xff,0xf9,0xdb,0x13,0x0e #_⌂λλ∙█‼♫
|
||||
.byte 0x0e,0x1f,0x7f,0xb9,0x1a,0x24,0x0f,0xda #♫▼⌂╣→$☼┌
|
||||
.byte 0x01,0xa9,0x0a,0xf4,0x00,0x1f,0xf0,0x37 #☺⌐◙⌠ ▼≡7
|
||||
.byte 0x0f,0xff,0x44,0x2f,0xf8,0x00,0x01,0x00 #☼λD/° ☺
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xee,0x57,0x02,0x00,0x00,0x00 #λλεW☻
|
||||
.byte 0xff,0x01,0x00,0x0c,0x20,0x00,0x1f,0xff #λ☺ ♀ ▼λ
|
||||
.byte 0x01,0x00,0x07,0x50,0xff,0xff,0xff,0x00 #☺ •Pλλλ
|
||||
.byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
|
||||
.endobj kCombiningCharsLz4,globl,hidden
|
||||
.previous
|
92
libc/unicode/keastasianwidth.s
Normal file
92
libc/unicode/keastasianwidth.s
Normal file
|
@ -0,0 +1,92 @@
|
|||
/ o/$(MODE)/tool/build/lz4toasm.com -o o/$(MODE)/libc/str/EastAsianWidth.s -s kEastAsianWidth o/$(MODE)/libc/str/EastAsianWidth.bin.lz4
|
||||
.include "libc/macros.inc"
|
||||
|
||||
.rodata
|
||||
.align 4
|
||||
kEastAsianWidthBits:
|
||||
.long 32768 * 8
|
||||
.endobj kEastAsianWidthBits,globl,hidden
|
||||
.previous
|
||||
|
||||
.initbss 500,_init_kEastAsianWidth
|
||||
kEastAsianWidth:
|
||||
.zero 32768
|
||||
.endobj kEastAsianWidth,globl,hidden
|
||||
.previous
|
||||
|
||||
.init.start 500,_init_kEastAsianWidth
|
||||
push %rsi
|
||||
mov $500,%edx
|
||||
call lz4cpy
|
||||
mov %rax,%rdi
|
||||
pop %rsi
|
||||
add $504,%rsi
|
||||
.init.end 500,_init_kEastAsianWidth
|
||||
|
||||
.initro 500,_init_kEastAsianWidth
|
||||
kEastAsianWidthLz4:
|
||||
.byte 0x1f,0x00,0x01,0x00,0xff,0xff,0x0e,0x17 #▼ ☺ λλ♫↨
|
||||
.byte 0xff,0x01,0x00,0x0f,0x30,0x01,0xff,0x12 #λ☺ ☼0☺λ↕
|
||||
.byte 0xaf,0x88,0x99,0xf0,0xad,0xae,0xfb,0x2b #»êÖ≡¡«√+
|
||||
.byte 0x00,0x81,0xfb,0x13,0x01,0xf6,0x3f,0x0c # ü√‼☺÷?♀
|
||||
.byte 0x00,0x06,0x1a,0x00,0x04,0x2f,0x1e,0x09 # ♠→ ♦/▲○
|
||||
.byte 0x5c,0x00,0x2d,0x43,0x60,0x00,0x00,0x30 #\ -C` 0
|
||||
.byte 0xa9,0x02,0x10,0x0f,0x06,0x00,0xf3,0x0d #⌐☻►☼♠ ≤♪
|
||||
.byte 0x80,0x00,0x00,0x08,0x00,0x02,0x0c,0x00 #Ç ◘ ☻♀
|
||||
.byte 0x60,0x30,0x40,0x10,0x00,0x00,0x04,0x2c #`0@► ♦,
|
||||
.byte 0x24,0x20,0x0c,0x00,0x00,0x00,0x01,0x00 #$ ♀ ☺
|
||||
.byte 0x00,0x00,0x50,0xb8,0x33,0x00,0x10,0xe0 # P╕3 ►α
|
||||
.byte 0x11,0x00,0x1f,0x80,0x00,0x01,0x58,0x12 #◄ ▼Ç ☺X↕
|
||||
.byte 0x18,0x07,0x00,0x1f,0x21,0xb0,0x03,0x55 #↑• ▼!░♥U
|
||||
.byte 0x16,0xfb,0xb2,0x03,0x3f,0x0f,0x00,0xff #▬√▓♥?☼ λ
|
||||
.byte 0x01,0x00,0x06,0x11,0x3f,0x35,0x01,0x03 #☺ ♠◄?5☺♥
|
||||
.byte 0x0d,0x00,0x4e,0x7f,0xfe,0xff,0xff,0x0b #♪ N⌂■λλ♂
|
||||
.byte 0x00,0x41,0xff,0xff,0xff,0xe0,0x06,0x00 # Aλλλα♠
|
||||
.byte 0x07,0x13,0x00,0x11,0x7f,0x06,0x00,0x17 #•‼ ◄⌂♠ ↨
|
||||
.byte 0x07,0x5e,0x00,0x02,0x12,0x00,0x2f,0x00 #•^ ☻↕ /
|
||||
.byte 0xff,0x01,0x00,0xff,0xff,0xff,0x5d,0x0e #λ☺ λλλ]♫
|
||||
.byte 0xa0,0x07,0x0f,0x01,0x00,0xff,0xff,0xff #á•☼☺ λλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xbe #λλλλλλλ╛
|
||||
.byte 0x13,0x1f,0x67,0x0e,0x0f,0x0c,0x13,0x83 #‼▼g♫☼♀‼â
|
||||
.byte 0x1f,0x1f,0x60,0x13,0x3d,0x0f,0x63,0x06 #▼▼`‼=☼c♠
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0x66,0x2f,0x0f #λλλλλf/☼
|
||||
.byte 0x00,0x01,0x00,0xff,0xff,0xf9,0x04,0xb0 # ☺ λλ∙♦░
|
||||
.byte 0x1a,0x2f,0xd1,0xe0,0x00,0x1d,0xff,0x10 #→/╤α ↔λ►
|
||||
.byte 0x0f,0xa8,0x15,0x21,0x0f,0xa2,0x00,0x50 #☼¿§!☼ó P
|
||||
.byte 0x12,0x03,0xa6,0x00,0x3f,0xf7,0xff,0x7f #↕♥ª ?≈λ⌂
|
||||
.byte 0xd9,0x04,0x00,0x08,0xcd,0x19,0x1b,0x01 #┘♦ ◘═↓←☺
|
||||
.byte 0x1c,0x00,0x2f,0x7f,0x00,0x01,0x00,0xff #∟ /⌂ ☺ λ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xf8,0x1f,0x0f,0x80 #λλλλ°▼☼Ç
|
||||
.byte 0x18,0xff,0xff,0xf1,0x0f,0x00,0x03,0x4c #↑λλ±☼ ♥L
|
||||
.byte 0x1f,0x07,0x04,0x08,0xff,0xff,0xff,0xff #▼•♦◘λλλλ
|
||||
.byte 0x92,0x0f,0xc5,0x04,0x10,0x03,0x27,0x16 #Æ☼┼♦►♥'▬
|
||||
.byte 0x4f,0x07,0x00,0xf0,0x00,0x6b,0x1b,0x1f #O• ≡ k←▼
|
||||
.byte 0x0f,0x04,0x10,0xff,0xff,0xff,0xff,0xff #☼♦►λλλλλ
|
||||
.byte 0xff,0xff,0x94,0x1f,0x10,0x19,0x00,0x05 #λλö▼►↓ ♣
|
||||
.byte 0x0f,0x22,0x39,0x05,0x29,0x40,0xfe,0xd5 #☼.9♣)@■╒
|
||||
.byte 0x0c,0x12,0x07,0x14,0x08,0x6f,0x0f,0xff #♀↕•¶◘o☼λ
|
||||
.byte 0x01,0x03,0x00,0x3f,0x60,0x08,0x04,0x34 #☺♥ ?`◘♦4
|
||||
.byte 0x01,0xe0,0xbf,0x10,0x08,0x15,0xdf,0x36 #☺α┐►◘§▀6
|
||||
.byte 0x38,0x10,0x87,0x08,0x00,0x15,0x11,0x64 #8►ç◘ §◄d
|
||||
.byte 0x08,0x1f,0xfd,0x40,0x08,0x03,0x15,0x9f #◘▼²@◘♥§ƒ
|
||||
.byte 0xad,0x38,0x11,0x78,0x4d,0x1f,0x10,0x04 #¡8◄xM▼►♦
|
||||
.byte 0xf3,0x39,0x07,0xb4,0x00,0x1c,0xf8,0x6a #≤9•┤ ∟°j
|
||||
.byte 0x1f,0x05,0xde,0x38,0x6f,0x10,0x27,0x00 #▼♣▐8o►'
|
||||
.byte 0x00,0x18,0xf0,0x81,0x0d,0x0a,0x0f,0x9e # ↑≡ü♪◙☼€
|
||||
.byte 0x08,0x12,0x18,0xe0,0x8f,0x00,0x20,0x7b #◘↕↑αÅ {
|
||||
.byte 0xfc,0x06,0x00,0x20,0xe7,0xc7,0x05,0x00 #ⁿ♠ τ╟♣
|
||||
.byte 0x0f,0xe0,0x1f,0x01,0x5f,0x0f,0x07,0x07 #☼α▼☺_☼••
|
||||
.byte 0x00,0x3f,0xe0,0x20,0x9b,0x0f,0x01,0x00 # ?α ¢☼☺
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0x0b,0x1f,0x3f,0xfc,0x1f,0xff,0xff,0xff #♂▼?ⁿ▼λλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
|
||||
.byte 0xff,0xff,0xff,0xff,0xff,0x08,0x50,0xff #λλλλλ◘Pλ
|
||||
.byte 0xff,0xff,0xff,0x3f,0x00,0x00,0x00,0x00
|
||||
.endobj kEastAsianWidthLz4,globl,hidden
|
||||
.previous
|
38
libc/unicode/strnwidth16.c
Normal file
38
libc/unicode/strnwidth16.c
Normal file
|
@ -0,0 +1,38 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/safemacros.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/unicode/unicode.h"
|
||||
|
||||
/**
|
||||
* Returns monospace display width of UTF-16 or UCS-2 string.
|
||||
*/
|
||||
int strnwidth16(const char16_t *p, size_t n) {
|
||||
size_t l;
|
||||
wint_t wc;
|
||||
l = 0;
|
||||
if (n) {
|
||||
while (*p) {
|
||||
p += getutf16(p, &wc);
|
||||
l += max(0, wcwidth(wc));
|
||||
}
|
||||
}
|
||||
return l;
|
||||
}
|
71
libc/unicode/strwidth.c
Normal file
71
libc/unicode/strwidth.c
Normal file
|
@ -0,0 +1,71 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/safemacros.h"
|
||||
#include "libc/conv/conv.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/str/internal.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/unicode/unicode.h"
|
||||
|
||||
#define kOneTrueTabWidth 8
|
||||
|
||||
/**
|
||||
* Returns monospace display width in UTF-8 string.
|
||||
*/
|
||||
int(strwidth)(const char *s) {
|
||||
return strnwidth(s, SIZE_MAX);
|
||||
}
|
||||
|
||||
int(strnwidth)(const char *s, size_t n) {
|
||||
/* TODO(jart): Fix this function. */
|
||||
size_t l;
|
||||
wint_t wc;
|
||||
const unsigned char *p, *pe;
|
||||
l = 0;
|
||||
if (n) {
|
||||
p = (const unsigned char *)s;
|
||||
pe = (const unsigned char *)(n == SIZE_MAX ? INTPTR_MAX : (intptr_t)s + n);
|
||||
for (;;) {
|
||||
while (p < pe && iscont(*p)) p++;
|
||||
if (p == pe || !*p) break;
|
||||
if (*p == L'\t') {
|
||||
if (l & (kOneTrueTabWidth - 1)) {
|
||||
l += kOneTrueTabWidth - (l & (kOneTrueTabWidth - 1));
|
||||
} else {
|
||||
l += kOneTrueTabWidth;
|
||||
}
|
||||
++p;
|
||||
} else if (*p == L'\e') {
|
||||
while (++p < pe && *p) {
|
||||
if (*p == '[' || *p == ';' || isdigit(*p)) {
|
||||
continue;
|
||||
} else {
|
||||
++p;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
p += abs(tpdecode((const char *)p, &wc));
|
||||
l += max(0, wcwidth(wc));
|
||||
}
|
||||
}
|
||||
}
|
||||
return l;
|
||||
}
|
29
libc/unicode/strwidth16.c
Normal file
29
libc/unicode/strwidth16.c
Normal file
|
@ -0,0 +1,29 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/safemacros.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/unicode/unicode.h"
|
||||
|
||||
/**
|
||||
* Returns monospace display width of UTF-16 or UCS-2 string.
|
||||
*/
|
||||
int strwidth16(const char16_t *s) {
|
||||
return strnwidth16(s, SIZE_MAX);
|
||||
}
|
49
libc/unicode/unicode.h
Normal file
49
libc/unicode/unicode.h
Normal file
|
@ -0,0 +1,49 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_UNICODE_UNICODE_H_
|
||||
#define COSMOPOLITAN_LIBC_UNICODE_UNICODE_H_
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § characters » unicode ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
extern const uint64_t kEastAsianWidth[];
|
||||
extern const uint32_t kEastAsianWidthBits;
|
||||
extern const uint64_t kCombiningChars[];
|
||||
extern const uint32_t kCombiningCharsBits;
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § strings » multibyte » unicode ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
int wcwidth(wchar_t) pureconst;
|
||||
int wcswidth(const wchar_t *) strlenesque;
|
||||
int wcsnwidth(const wchar_t *, size_t) strlenesque;
|
||||
int strwidth(const char *) strlenesque;
|
||||
int strnwidth(const char *, size_t) strlenesque;
|
||||
int strwidth16(const char16_t *) strlenesque;
|
||||
int strnwidth16(const char16_t *, size_t) strlenesque;
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § unicode » generic typing ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
#if __STDC_VERSION__ + 0 >= 201112
|
||||
|
||||
#define strwidth(s) \
|
||||
_Generic(*(s), wchar_t \
|
||||
: wcswidth, char16_t \
|
||||
: strwidth16, default \
|
||||
: strwidth)(s)
|
||||
|
||||
#define strnwidth(s, n) \
|
||||
_Generic(*(s), wchar_t \
|
||||
: wcswidth, char16_t \
|
||||
: strnwidth16, default \
|
||||
: strnwidth)(s, n)
|
||||
|
||||
#endif /* C11 */
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_UNICODE_UNICODE_H_ */
|
91
libc/unicode/unicode.mk
Normal file
91
libc/unicode/unicode.mk
Normal file
|
@ -0,0 +1,91 @@
|
|||
#-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐
|
||||
#───vi: set et ft=make ts=8 tw=8 fenc=utf-8 :vi───────────────────────┘
|
||||
|
||||
PKGS += LIBC_UNICODE
|
||||
|
||||
LIBC_UNICODE_ARTIFACTS += LIBC_UNICODE_A
|
||||
LIBC_UNICODE = $(LIBC_UNICODE_A_DEPS) $(LIBC_UNICODE_A)
|
||||
LIBC_UNICODE_A = o/$(MODE)/libc/unicode/unicode.a
|
||||
LIBC_UNICODE_A_FILES := $(wildcard libc/unicode/*)
|
||||
LIBC_UNICODE_A_HDRS = $(filter %.h,$(LIBC_UNICODE_A_FILES))
|
||||
LIBC_UNICODE_A_SRCS_A = $(filter %.s,$(LIBC_UNICODE_A_FILES))
|
||||
LIBC_UNICODE_A_SRCS_S = $(filter %.S,$(LIBC_UNICODE_A_FILES))
|
||||
LIBC_UNICODE_A_SRCS_C = $(filter %.c,$(LIBC_UNICODE_A_FILES))
|
||||
|
||||
LIBC_UNICODE_A_SRCS = \
|
||||
$(LIBC_UNICODE_A_SRCS_A) \
|
||||
$(LIBC_UNICODE_A_SRCS_S) \
|
||||
$(LIBC_UNICODE_A_SRCS_C)
|
||||
|
||||
LIBC_UNICODE_A_OBJS = \
|
||||
$(LIBC_UNICODE_A_SRCS:%=o/$(MODE)/%.zip.o) \
|
||||
$(LIBC_UNICODE_A_SRCS_A:%.s=o/$(MODE)/%.o) \
|
||||
$(LIBC_UNICODE_A_SRCS_S:%.S=o/$(MODE)/%.o) \
|
||||
$(LIBC_UNICODE_A_SRCS_C:%.c=o/$(MODE)/%.o)
|
||||
|
||||
LIBC_UNICODE_A_CHECKS = \
|
||||
$(LIBC_UNICODE_A).pkg \
|
||||
$(LIBC_UNICODE_A_HDRS:%=o/$(MODE)/%.ok)
|
||||
|
||||
LIBC_UNICODE_A_DIRECTDEPS = \
|
||||
LIBC_STUBS \
|
||||
LIBC_NEXGEN32E \
|
||||
LIBC_RUNTIME \
|
||||
LIBC_STR \
|
||||
LIBC_SYSV
|
||||
|
||||
LIBC_UNICODE_A_DEPS := \
|
||||
$(call uniq,$(foreach x,$(LIBC_UNICODE_A_DIRECTDEPS),$($(x))))
|
||||
|
||||
$(LIBC_UNICODE_A): \
|
||||
libc/unicode/ \
|
||||
$(LIBC_UNICODE_A).pkg \
|
||||
$(LIBC_UNICODE_A_OBJS)
|
||||
|
||||
$(LIBC_UNICODE_A).pkg: \
|
||||
$(LIBC_UNICODE_A_OBJS) \
|
||||
$(foreach x,$(LIBC_UNICODE_A_DIRECTDEPS),$($(x)_A).pkg)
|
||||
|
||||
LIBC_UNICODE_LIBS = $(foreach x,$(LIBC_UNICODE_ARTIFACTS),$($(x)))
|
||||
LIBC_UNICODE_SRCS = $(foreach x,$(LIBC_UNICODE_ARTIFACTS),$($(x)_SRCS))
|
||||
LIBC_UNICODE_HDRS = $(foreach x,$(LIBC_UNICODE_ARTIFACTS),$($(x)_HDRS))
|
||||
LIBC_UNICODE_BINS = $(foreach x,$(LIBC_UNICODE_ARTIFACTS),$($(x)_BINS))
|
||||
LIBC_UNICODE_CHECKS = $(foreach x,$(LIBC_UNICODE_ARTIFACTS),$($(x)_CHECKS))
|
||||
LIBC_UNICODE_OBJS = $(foreach x,$(LIBC_UNICODE_ARTIFACTS),$($(x)_OBJS))
|
||||
LIBC_UNICODE_TESTS = $(foreach x,$(LIBC_UNICODE_ARTIFACTS),$($(x)_TESTS))
|
||||
$(LIBC_UNICODE_OBJS): $(BUILD_FILES) libc/unicode/unicode.mk
|
||||
|
||||
.PHONY: o/$(MODE)/libc/unicode
|
||||
o/$(MODE)/libc/unicode: $(LIBC_UNICODE) $(LIBC_UNICODE_CHECKS)
|
||||
|
||||
o/$(MODE)/libc/unicode/eastasianwidth.bin: \
|
||||
libc/unicode/eastasianwidth.txt \
|
||||
o/$(MODE)/tool/decode/mkwides.com
|
||||
@TARGET=$@ ACTION=MKWIDES build/do \
|
||||
o/$(MODE)/tool/decode/mkwides.com -o $@ $<
|
||||
o/$(MODE)/libc/unicode/eastasianwidth.bin.lz4: \
|
||||
o/$(MODE)/libc/unicode/eastasianwidth.bin \
|
||||
o/$(MODE)/third_party/lz4cli/lz4cli.com
|
||||
@TARGET=$@ ACTION=LZ4 build/do \
|
||||
o/$(MODE)/third_party/lz4cli/lz4cli.com -q -f -9 --content-size $< $@
|
||||
o/$(MODE)/libc/unicode/eastasianwidth.s: \
|
||||
o/$(MODE)/libc/unicode/eastasianwidth.bin.lz4 \
|
||||
o/$(MODE)/tool/build/lz4toasm.com
|
||||
@TARGET=$@ ACTION=BIN2ASM build/do \
|
||||
o/$(MODE)/tool/build/lz4toasm.com -s kEastAsianWidth -o $@ $<
|
||||
|
||||
o/$(MODE)/libc/unicode/combiningchars.bin: \
|
||||
libc/unicode/unicodedata.txt \
|
||||
o/$(MODE)/tool/decode/mkcombos.com
|
||||
@TARGET=$@ ACTION=MKCOMBOS build/do \
|
||||
o/$(MODE)/tool/decode/mkcombos.com -o $@ $<
|
||||
o/$(MODE)/libc/unicode/combiningchars.bin.lz4: \
|
||||
o/$(MODE)/libc/unicode/combiningchars.bin \
|
||||
o/$(MODE)/third_party/lz4cli/lz4cli.com
|
||||
@TARGET=$@ ACTION=LZ4 build/do \
|
||||
o/$(MODE)/third_party/lz4cli/lz4cli.com -q -f -9 --content-size $< $@
|
||||
o/$(MODE)/libc/unicode/combiningchars.s: \
|
||||
o/$(MODE)/libc/unicode/combiningchars.bin.lz4 \
|
||||
o/$(MODE)/tool/build/lz4toasm.com
|
||||
@TARGET=$@ ACTION=BIN2ASM build/do \
|
||||
o/$(MODE)/tool/build/lz4toasm.com -s kCombiningChars -o $@ $<
|
32841
libc/unicode/unicodedata.txt
Normal file
32841
libc/unicode/unicodedata.txt
Normal file
File diff suppressed because it is too large
Load diff
35
libc/unicode/wcsnwidth.c
Normal file
35
libc/unicode/wcsnwidth.c
Normal file
|
@ -0,0 +1,35 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/unicode/unicode.h"
|
||||
|
||||
/**
|
||||
* Returns monospace display width of wide character string.
|
||||
*/
|
||||
int wcsnwidth(const wchar_t *pwcs, size_t n) {
|
||||
int w, width = 0;
|
||||
for (; *pwcs && n-- > 0; pwcs++) {
|
||||
if ((w = wcwidth(*pwcs)) < 0) {
|
||||
return -1;
|
||||
} else {
|
||||
width += w;
|
||||
}
|
||||
}
|
||||
return width;
|
||||
}
|
28
libc/unicode/wcswidth.c
Normal file
28
libc/unicode/wcswidth.c
Normal file
|
@ -0,0 +1,28 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/limits.h"
|
||||
#include "libc/unicode/unicode.h"
|
||||
|
||||
/**
|
||||
* Returns monospace display width of wide character string.
|
||||
*/
|
||||
int wcswidth(const wchar_t *pwcs) {
|
||||
return wcsnwidth(pwcs, SIZE_MAX);
|
||||
}
|
38
libc/unicode/wcwidth.c
Normal file
38
libc/unicode/wcwidth.c
Normal file
|
@ -0,0 +1,38 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/bits.h"
|
||||
#include "libc/unicode/unicode.h"
|
||||
|
||||
/**
|
||||
* Returns cell width of monospace character.
|
||||
*/
|
||||
int wcwidth(wchar_t ucs) {
|
||||
if (ucs == 0) return 0;
|
||||
if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0)) {
|
||||
return -1;
|
||||
} else if (0 <= ucs && ucs < kCombiningCharsBits &&
|
||||
bt(kCombiningChars, ucs)) {
|
||||
return 0;
|
||||
} else if (0 <= ucs && ucs < kEastAsianWidthBits) {
|
||||
return 1 + bt(kEastAsianWidth, ucs);
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue