Initial import

This commit is contained in:
Justine Tunney 2020-06-15 07:18:57 -07:00
commit c91b3c5006
14915 changed files with 590219 additions and 0 deletions

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,236 @@
/*-*- mode:asm; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify │
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License. │
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of │
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software │
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/dce.h"
#include "libc/macros.h"
#include "libc/sysv/consts/nr.h"
#include "libc/sysv/consts/madv.h"
.rodata.cst4
kCombiningCharsBits:
.long 114752 * 8
.endobj kCombiningCharsBits,globl,hidden
.previous
.initbss 400,_init_kCombiningChars
kCombiningChars:
.zero 114752
.endobj kCombiningChars,globl,hidden
.previous
.init.start 400,_init_kCombiningChars
push %rsi
mov $1257,%edx
call lz4cpy
mov %rax,%rdi
pop %rsi
add $1264,%rsi
.init.end 400,_init_kCombiningChars
/ The data below is sparse, as evidenced by:
/ o/tool/viz/bing.com <o/libc/str/CombiningChars.bin |
/ o/tool/viz/fold.com
/ Thus ask the kernel to shrink its physical memory requirements
/ which might sadly need mmap intermediate step on linux to work
/ how one would have hoped.
.text.startup
kCombiningCharsInit:
push %rbp
mov %rsp,%rbp
.profilable
ezlea kCombiningChars,di
mov $114752,%esi
call mergepages
pop %rbp
ret
.endfn kCombiningCharsInit
.previous
.init_array
.align 8
kCombiningCharsCtor:
.quad kCombiningCharsInit
.endobj kCombiningCharsCtor
.previous
/ o/tool/build/lz4toasm.com \
/ -o o/libc/str/CombiningChars.s \
/ -s kCombiningChars \
/ o/libc/str/CombiningChars.bin.lz4
.initro 400,_init_kCombiningChars
kCombiningCharsLz4:
.byte 0x16,0x00,0x01,0x00,0x24,0x40,0x01,0x0a #  $@
.byte 0x00,0x4f,0x81,0x10,0x01,0x00,0x01,0x00 # Oü  
.byte 0x2c,0x7b,0x3c,0x00,0xfc,0xff,0xe0,0xaf #,{< λα»
.byte 0xff,0x01,0x00,0x3f,0x20,0x00,0x30,0x38 #λ ?  08
.byte 0x00,0x0c,0x2f,0xf8,0x03,0x5a,0x00,0x0d # /°Z 
.byte 0x10,0xfe,0x49,0x00,0x23,0xbf,0xb6,0x0e #I .
.byte 0x00,0x42,0x3f,0x00,0xff,0x17,0x39,0x00 # B? λ9 
.byte 0x48,0xff,0xff,0x00,0x00,0xb7,0x00,0xf7 #Hλλ   
.byte 0x01,0xc0,0xbf,0x9f,0x3d,0x00,0x00,0x00 #ƒ=   
.byte 0x80,0x02,0x00,0x00,0x00,0xff,0xff,0xff #Ç   λλλ
.byte 0x07,0x1a,0x00,0x13,0xff,0x28,0x00,0x91 # λ( æ
.byte 0xf8,0x0f,0x20,0x00,0x00,0xc0,0xfb,0xef #°   
.byte 0x3e,0x0e,0x00,0x1b,0x0e,0x8a,0x00,0x26 #> è &
.byte 0xff,0xff,0x37,0x00,0xa2,0x14,0xfe,0x21 #λλ7 ó!
.byte 0xfe,0x00,0x0c,0x00,0x00,0x00,0x02,0x10 #    
.byte 0x00,0x40,0x10,0x1e,0x20,0x00,0x10,0x00 # @   
.byte 0x23,0x40,0x06,0x10,0x00,0x20,0x86,0x39 #.@  å9
.byte 0x1a,0x00,0x24,0x23,0x00,0x10,0x00,0x21 # $.  !
.byte 0xbe,0x21,0x20,0x00,0x13,0xfc,0x30,0x00 #!  0 
.byte 0x41,0x90,0x1e,0x20,0x40,0x40,0x00,0x13 #AÉ @@ 
.byte 0x04,0x5e,0x00,0x22,0x01,0x20,0x08,0x00 #^ .  
.byte 0x13,0x11,0x93,0x00,0x38,0xc1,0x3d,0x60 #ô 8=`
.byte 0x60,0x00,0x31,0x90,0x40,0x30,0x40,0x00 #` 1É@0@ 
.byte 0x13,0x00,0x0f,0x01,0x13,0x18,0x70,0x00 # p 
.byte 0x06,0x9f,0x00,0x27,0x04,0x5c,0x0d,0x00 #ƒ '\ 
.byte 0x48,0xf2,0x07,0x80,0x7f,0x1d,0x00,0x45 #HÇ E
.byte 0xf2,0x1f,0x00,0x3f,0x0d,0x00,0x43,0x03 # ? C
.byte 0x00,0x00,0xa0,0x57,0x00,0x50,0xfe,0x7f #  áW P
.byte 0xdf,0xe0,0xff,0x41,0x01,0x28,0x1f,0x40 #αλA(@
.byte 0x2f,0x00,0xff,0x00,0xe0,0xfd,0x66,0x00 #/ λ α²f 
.byte 0x00,0x00,0xc3,0x01,0x00,0x1e,0x00,0x64 #    d
.byte 0x20,0x00,0x20,0x7a,0x01,0x05,0x1f,0xff #   zλ
.byte 0x01,0x00,0x00,0x0f,0x13,0x02,0x18,0x2f #  /
.byte 0xe0,0x00,0x01,0x00,0x62,0x13,0x1c,0x04 #α  b
.byte 0x00,0x26,0x0c,0x00,0x42,0x01,0x52,0xb0 # & BR
.byte 0x3f,0x40,0xfe,0x0f,0xe8,0x00,0x1a,0x78 #?@Φ x
.byte 0x2e,0x00,0x26,0x60,0x00,0x85,0x01,0x04 #. &` à
.byte 0x14,0x00,0x4f,0x87,0x01,0x04,0x0e,0x60 # Oç`
.byte 0x00,0x07,0x23,0x80,0x09,0x3f,0x03,0x53 # .Ç?S
.byte 0x7f,0xe5,0x1f,0xf8,0x9f,0x2a,0x01,0x05 #σ°ƒ*
.byte 0x8e,0x01,0x11,0x0f,0x06,0x00,0x32,0xd0 #Ä 2
.byte 0x17,0x04,0x70,0x02,0x01,0xd0,0x01,0x23 #p.
.byte 0x3c,0x3b,0x32,0x00,0x13,0xa3,0xde,0x01 #<;2 ú
.byte 0x2f,0xf0,0xcf,0x58,0x00,0x00,0x6f,0xf7 #/X  o
.byte 0xff,0xfd,0x21,0x10,0x03,0x8c,0x01,0x0c #λ²!î
.byte 0x1f,0xfb,0x15,0x01,0x24,0x40,0xa0,0x03 #§$@á
.byte 0xe0,0x00,0x02,0x00,0x72,0x60,0x00,0xf8 #α  r` °
.byte 0x00,0x00,0x00,0x7c,0x15,0x00,0x2c,0xdf #   |§ ,
.byte 0xff,0x62,0x00,0x2f,0x01,0x00,0x01,0x00 #λb /  
.byte 0xff,0x6b,0x1d,0x80,0xff,0x01,0x1c,0x80 #λkÇλÇ
.byte 0xa2,0x01,0x0f,0x68,0x00,0x32,0x19,0x3c #óh 2<
.byte 0x0e,0x00,0x2f,0x1e,0x00,0x01,0x00,0xff # /  λ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xb3,0x20,0x80 #λλλλλ Ç
.byte 0xf7,0xf6,0x12,0x27,0xc0,0x00,0xfb,0x12 #÷' 
.byte 0x58,0xff,0xff,0x7f,0x00,0x03,0x24,0x00 #Xλλ $ 
.byte 0x1a,0x06,0x33,0x00,0x23,0x44,0x08,0xf4 #3 .D
.byte 0x11,0x0b,0x4b,0x00,0x11,0x30,0x60,0x0f #K 0`
.byte 0x11,0x03,0x70,0x0f,0x62,0xc0,0x3f,0x00 #pb? 
.byte 0x00,0x80,0xff,0x46,0x00,0x02,0x10,0x14 # ÇλF 
.byte 0x20,0xc8,0x33,0x06,0x00,0x05,0x29,0x13 # 3 )
.byte 0x52,0x7e,0x66,0x00,0x08,0x10,0xf8,0x13 #R~f °
.byte 0x02,0x11,0x00,0x21,0x9d,0xc1,0x43,0x12 # !¥C
.byte 0x19,0x30,0x66,0x13,0x1c,0x08,0x64,0x00 #0fd 
.byte 0x2f,0x20,0x21,0x96,0x0a,0xff,0xff,0xff #/ !ûλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xdb,0x1e #λλλλλλ
.byte 0x40,0x13,0x00,0x3f,0xfc,0xff,0x03,0x5d #@ ?λ]
.byte 0x00,0x34,0x00,0x02,0x1a,0x0f,0x06,0x1a # 4 
.byte 0x08,0x1d,0x80,0xdc,0x1f,0x0a,0x91,0x0a #Çæ
.byte 0x1f,0x0e,0x7f,0x00,0x2c,0x1f,0x20,0x1d # ,
.byte 0x00,0x09,0x0e,0x74,0x00,0x2f,0xc0,0x07 # t /
.byte 0xdd,0x01,0xbd,0x22,0x6e,0xf0,0x23,0x1e #.n.
.byte 0x0f,0x1c,0x00,0x01,0x1f,0x60,0x64,0x00 # `d 
.byte 0x34,0x1f,0xf0,0x44,0x00,0x30,0x05,0xf4 #4D 0
.byte 0x20,0x0b,0x18,0x00,0x1a,0x02,0xb1,0x1e #  
.byte 0x03,0x72,0x1c,0x24,0x78,0x26,0xda,0x01 #r$x&
.byte 0x00,0xf0,0x0c,0x35,0x80,0xef,0x1f,0x32 # 5Ç2
.byte 0x02,0x02,0x20,0x00,0x29,0xc0,0x7f,0x26 #  )&
.byte 0x1c,0x3f,0x80,0xd3,0x40,0x7c,0x02,0x01 #?Ç@|
.byte 0x26,0xf8,0x07,0xc0,0x20,0x00,0x7e,0x00 #&°  ~ 
.byte 0x3f,0xc0,0x1f,0x1f,0xc7,0x02,0x06,0x19 #?
.byte 0x5c,0x28,0x03,0x3f,0xf8,0x85,0x0d,0xb1 #\(?°à
.byte 0x1c,0x0b,0x22,0xb0,0x01,0xa3,0x0d,0x04 #.ú
.byte 0x30,0x00,0x19,0xa7,0xde,0x00,0x29,0x28 #0 º )(
.byte 0xbf,0x78,0x20,0x2f,0xbc,0x0f,0x38,0x0e #x /8
.byte 0x0d,0x2f,0xff,0x06,0x96,0x01,0x20,0x10 #/λû
.byte 0x0c,0x74,0x00,0x11,0xfe,0xd2,0x02,0x52 #t R
.byte 0xf8,0x79,0x80,0x00,0x7e,0x4c,0x03,0x2f #°yÇ ~L/
.byte 0xfc,0x7f,0xdb,0x03,0x20,0x28,0x7f,0xbf # (
.byte 0x1c,0x04,0x3b,0xff,0xfc,0x6d,0x20,0x00 #;λm  
.byte 0x26,0x7e,0xb4,0x21,0x00,0x1f,0xa3,0x58 #&~! úX
.byte 0x00,0x18,0x1f,0x18,0x23,0x07,0xff,0xff # .λλ
.byte 0x96,0x2f,0xff,0x01,0xfb,0x0d,0xff,0xff #û/λλλ
.byte 0xff,0xff,0xff,0xff,0xc9,0x04,0xf0,0x0a #λλλλ
.byte 0x1f,0x7f,0x1c,0x19,0x70,0x04,0x08,0x00 #p 
.byte 0x1f,0x07,0x30,0x18,0xff,0xff,0xff,0xff #0λλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0x96,0x2f,0x60 #λλλλλû/`
.byte 0x0f,0x5f,0x25,0xff,0xff,0x87,0x5d,0x03 #_%λλç]
.byte 0xf8,0xff,0xe7,0x0f,0x30,0x34,0x05,0x66 #°λτ04f
.byte 0x37,0x0f,0xba,0x14,0xe2,0x01,0x01,0x00 #7Γ 
.byte 0x12,0x7f,0x2d,0x3a,0x20,0x1f,0x20,0x01 #-:
.byte 0x26,0x3f,0xf8,0xfe,0xff,0xc0,0x00,0x97 #&?°λ ù
.byte 0x5f,0x7f,0xff,0xff,0xf9,0xdb,0x13,0x0e #_λλ
.byte 0x0e,0x1f,0x7f,0xb9,0x1a,0x24,0x0f,0xda #$
.byte 0x01,0xa9,0x0a,0xf4,0x00,0x1f,0xf0,0x37 # 7
.byte 0x0f,0xff,0x44,0x2f,0xf8,0x00,0x01,0x00 #λD/°  
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xee,0x57,0x02,0x00,0x00,0x00 #λλεW   
.byte 0xff,0x01,0x00,0x0c,0x20,0x00,0x1f,0xff #λ   λ
.byte 0x01,0x00,0x07,0x50,0xff,0xff,0xff,0x00 # Pλλλ 
.byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
.endobj kCombiningCharsLz4,globl,hidden
.previous

View file

@ -0,0 +1,92 @@
/ o/$(MODE)/tool/build/lz4toasm.com -o o/$(MODE)/libc/str/EastAsianWidth.s -s kEastAsianWidth o/$(MODE)/libc/str/EastAsianWidth.bin.lz4
.include "libc/macros.inc"
.rodata
.align 4
kEastAsianWidthBits:
.long 32768 * 8
.endobj kEastAsianWidthBits,globl,hidden
.previous
.initbss 500,_init_kEastAsianWidth
kEastAsianWidth:
.zero 32768
.endobj kEastAsianWidth,globl,hidden
.previous
.init.start 500,_init_kEastAsianWidth
push %rsi
mov $500,%edx
call lz4cpy
mov %rax,%rdi
pop %rsi
add $504,%rsi
.init.end 500,_init_kEastAsianWidth
.initro 500,_init_kEastAsianWidth
kEastAsianWidthLz4:
.byte 0x1f,0x00,0x01,0x00,0xff,0xff,0x0e,0x17 #  λλ
.byte 0xff,0x01,0x00,0x0f,0x30,0x01,0xff,0x12 #λ 0λ
.byte 0xaf,0x88,0x99,0xf0,0xad,0xae,0xfb,0x2b #»êÖ¡«+
.byte 0x00,0x81,0xfb,0x13,0x01,0xf6,0x3f,0x0c # ü÷?
.byte 0x00,0x06,0x1a,0x00,0x04,0x2f,0x1e,0x09 #  /
.byte 0x5c,0x00,0x2d,0x43,0x60,0x00,0x00,0x30 #\ -C`  0
.byte 0xa9,0x02,0x10,0x0f,0x06,0x00,0xf3,0x0d # 
.byte 0x80,0x00,0x00,0x08,0x00,0x02,0x0c,0x00 #Ç    
.byte 0x60,0x30,0x40,0x10,0x00,0x00,0x04,0x2c #`0@►  ♦,
.byte 0x24,0x20,0x0c,0x00,0x00,0x00,0x01,0x00 #$     
.byte 0x00,0x00,0x50,0xb8,0x33,0x00,0x10,0xe0 #  P3 α
.byte 0x11,0x00,0x1f,0x80,0x00,0x01,0x58,0x12 # Ç X
.byte 0x18,0x07,0x00,0x1f,0x21,0xb0,0x03,0x55 # !U
.byte 0x16,0xfb,0xb2,0x03,0x3f,0x0f,0x00,0xff #? λ
.byte 0x01,0x00,0x06,0x11,0x3f,0x35,0x01,0x03 # ?5
.byte 0x0d,0x00,0x4e,0x7f,0xfe,0xff,0xff,0x0b # Nλλ
.byte 0x00,0x41,0xff,0xff,0xff,0xe0,0x06,0x00 # Aλλλα 
.byte 0x07,0x13,0x00,0x11,0x7f,0x06,0x00,0x17 #  
.byte 0x07,0x5e,0x00,0x02,0x12,0x00,0x2f,0x00 #^  / 
.byte 0xff,0x01,0x00,0xff,0xff,0xff,0x5d,0x0e #λ λλλ]
.byte 0xa0,0x07,0x0f,0x01,0x00,0xff,0xff,0xff #á λλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xbe #λλλλλλλ
.byte 0x13,0x1f,0x67,0x0e,0x0f,0x0c,0x13,0x83 #gâ
.byte 0x1f,0x1f,0x60,0x13,0x3d,0x0f,0x63,0x06 #`=c
.byte 0xff,0xff,0xff,0xff,0xff,0x66,0x2f,0x0f #λλλλλf/
.byte 0x00,0x01,0x00,0xff,0xff,0xf9,0x04,0xb0 #  λλ
.byte 0x1a,0x2f,0xd1,0xe0,0x00,0x1d,0xff,0x10 #/α λ
.byte 0x0f,0xa8,0x15,0x21,0x0f,0xa2,0x00,0x50 #¿§!ó P
.byte 0x12,0x03,0xa6,0x00,0x3f,0xf7,0xff,0x7f #ª ?λ
.byte 0xd9,0x04,0x00,0x08,0xcd,0x19,0x1b,0x01 # 
.byte 0x1c,0x00,0x2f,0x7f,0x00,0x01,0x00,0xff # /  λ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xf8,0x1f,0x0f,0x80 #λλλλ°Ç
.byte 0x18,0xff,0xff,0xf1,0x0f,0x00,0x03,0x4c #λλ± L
.byte 0x1f,0x07,0x04,0x08,0xff,0xff,0xff,0xff #λλλλ
.byte 0x92,0x0f,0xc5,0x04,0x10,0x03,0x27,0x16 #Æ'
.byte 0x4f,0x07,0x00,0xf0,0x00,0x6b,0x1b,0x1f #O  k
.byte 0x0f,0x04,0x10,0xff,0xff,0xff,0xff,0xff #λλλλλ
.byte 0xff,0xff,0x94,0x1f,0x10,0x19,0x00,0x05 #λλö 
.byte 0x0f,0x22,0x39,0x05,0x29,0x40,0xfe,0xd5 #.9)@■╒
.byte 0x0c,0x12,0x07,0x14,0x08,0x6f,0x0f,0xff #oλ
.byte 0x01,0x03,0x00,0x3f,0x60,0x08,0x04,0x34 # ?`4
.byte 0x01,0xe0,0xbf,0x10,0x08,0x15,0xdf,0x36 #α§6
.byte 0x38,0x10,0x87,0x08,0x00,0x15,0x11,0x64 #8ç §d
.byte 0x08,0x1f,0xfd,0x40,0x08,0x03,0x15,0x9f #²@§ƒ
.byte 0xad,0x38,0x11,0x78,0x4d,0x1f,0x10,0x04 #¡8xM
.byte 0xf3,0x39,0x07,0xb4,0x00,0x1c,0xf8,0x6a #9 °j
.byte 0x1f,0x05,0xde,0x38,0x6f,0x10,0x27,0x00 #8o' 
.byte 0x00,0x18,0xf0,0x81,0x0d,0x0a,0x0f,0x9e # ü
.byte 0x08,0x12,0x18,0xe0,0x8f,0x00,0x20,0x7b #αÅ  {
.byte 0xfc,0x06,0x00,0x20,0xe7,0xc7,0x05,0x00 #  τ 
.byte 0x0f,0xe0,0x1f,0x01,0x5f,0x0f,0x07,0x07 #α_
.byte 0x00,0x3f,0xe0,0x20,0x9b,0x0f,0x01,0x00 # ?α ¢ 
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0x0b,0x1f,0x3f,0xfc,0x1f,0xff,0xff,0xff #?λλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #λλλλλλλλ
.byte 0xff,0xff,0xff,0xff,0xff,0x08,0x50,0xff #λλλλλPλ
.byte 0xff,0xff,0xff,0x3f,0x00,0x00,0x00,0x00
.endobj kEastAsianWidthLz4,globl,hidden
.previous

View file

@ -0,0 +1,38 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/bits/safemacros.h"
#include "libc/str/str.h"
#include "libc/unicode/unicode.h"
/**
* Returns monospace display width of UTF-16 or UCS-2 string.
*/
int strnwidth16(const char16_t *p, size_t n) {
size_t l;
wint_t wc;
l = 0;
if (n) {
while (*p) {
p += getutf16(p, &wc);
l += max(0, wcwidth(wc));
}
}
return l;
}

71
libc/unicode/strwidth.c Normal file
View file

@ -0,0 +1,71 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/bits/safemacros.h"
#include "libc/conv/conv.h"
#include "libc/limits.h"
#include "libc/str/internal.h"
#include "libc/str/str.h"
#include "libc/unicode/unicode.h"
#define kOneTrueTabWidth 8
/**
* Returns monospace display width in UTF-8 string.
*/
int(strwidth)(const char *s) {
return strnwidth(s, SIZE_MAX);
}
int(strnwidth)(const char *s, size_t n) {
/* TODO(jart): Fix this function. */
size_t l;
wint_t wc;
const unsigned char *p, *pe;
l = 0;
if (n) {
p = (const unsigned char *)s;
pe = (const unsigned char *)(n == SIZE_MAX ? INTPTR_MAX : (intptr_t)s + n);
for (;;) {
while (p < pe && iscont(*p)) p++;
if (p == pe || !*p) break;
if (*p == L'\t') {
if (l & (kOneTrueTabWidth - 1)) {
l += kOneTrueTabWidth - (l & (kOneTrueTabWidth - 1));
} else {
l += kOneTrueTabWidth;
}
++p;
} else if (*p == L'\e') {
while (++p < pe && *p) {
if (*p == '[' || *p == ';' || isdigit(*p)) {
continue;
} else {
++p;
break;
}
}
} else {
p += abs(tpdecode((const char *)p, &wc));
l += max(0, wcwidth(wc));
}
}
}
return l;
}

29
libc/unicode/strwidth16.c Normal file
View file

@ -0,0 +1,29 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/bits/safemacros.h"
#include "libc/limits.h"
#include "libc/unicode/unicode.h"
/**
* Returns monospace display width of UTF-16 or UCS-2 string.
*/
int strwidth16(const char16_t *s) {
return strnwidth16(s, SIZE_MAX);
}

49
libc/unicode/unicode.h Normal file
View file

@ -0,0 +1,49 @@
#ifndef COSMOPOLITAN_LIBC_UNICODE_UNICODE_H_
#define COSMOPOLITAN_LIBC_UNICODE_UNICODE_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § characters » unicode
*/
extern const uint64_t kEastAsianWidth[];
extern const uint32_t kEastAsianWidthBits;
extern const uint64_t kCombiningChars[];
extern const uint32_t kCombiningCharsBits;
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § strings » multibyte » unicode
*/
int wcwidth(wchar_t) pureconst;
int wcswidth(const wchar_t *) strlenesque;
int wcsnwidth(const wchar_t *, size_t) strlenesque;
int strwidth(const char *) strlenesque;
int strnwidth(const char *, size_t) strlenesque;
int strwidth16(const char16_t *) strlenesque;
int strnwidth16(const char16_t *, size_t) strlenesque;
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § unicode » generic typing
*/
#if __STDC_VERSION__ + 0 >= 201112
#define strwidth(s) \
_Generic(*(s), wchar_t \
: wcswidth, char16_t \
: strwidth16, default \
: strwidth)(s)
#define strnwidth(s, n) \
_Generic(*(s), wchar_t \
: wcswidth, char16_t \
: strnwidth16, default \
: strnwidth)(s, n)
#endif /* C11 */
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_UNICODE_UNICODE_H_ */

91
libc/unicode/unicode.mk Normal file
View file

@ -0,0 +1,91 @@
#-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐
#───vi: set et ft=make ts=8 tw=8 fenc=utf-8 :vi───────────────────────┘
PKGS += LIBC_UNICODE
LIBC_UNICODE_ARTIFACTS += LIBC_UNICODE_A
LIBC_UNICODE = $(LIBC_UNICODE_A_DEPS) $(LIBC_UNICODE_A)
LIBC_UNICODE_A = o/$(MODE)/libc/unicode/unicode.a
LIBC_UNICODE_A_FILES := $(wildcard libc/unicode/*)
LIBC_UNICODE_A_HDRS = $(filter %.h,$(LIBC_UNICODE_A_FILES))
LIBC_UNICODE_A_SRCS_A = $(filter %.s,$(LIBC_UNICODE_A_FILES))
LIBC_UNICODE_A_SRCS_S = $(filter %.S,$(LIBC_UNICODE_A_FILES))
LIBC_UNICODE_A_SRCS_C = $(filter %.c,$(LIBC_UNICODE_A_FILES))
LIBC_UNICODE_A_SRCS = \
$(LIBC_UNICODE_A_SRCS_A) \
$(LIBC_UNICODE_A_SRCS_S) \
$(LIBC_UNICODE_A_SRCS_C)
LIBC_UNICODE_A_OBJS = \
$(LIBC_UNICODE_A_SRCS:%=o/$(MODE)/%.zip.o) \
$(LIBC_UNICODE_A_SRCS_A:%.s=o/$(MODE)/%.o) \
$(LIBC_UNICODE_A_SRCS_S:%.S=o/$(MODE)/%.o) \
$(LIBC_UNICODE_A_SRCS_C:%.c=o/$(MODE)/%.o)
LIBC_UNICODE_A_CHECKS = \
$(LIBC_UNICODE_A).pkg \
$(LIBC_UNICODE_A_HDRS:%=o/$(MODE)/%.ok)
LIBC_UNICODE_A_DIRECTDEPS = \
LIBC_STUBS \
LIBC_NEXGEN32E \
LIBC_RUNTIME \
LIBC_STR \
LIBC_SYSV
LIBC_UNICODE_A_DEPS := \
$(call uniq,$(foreach x,$(LIBC_UNICODE_A_DIRECTDEPS),$($(x))))
$(LIBC_UNICODE_A): \
libc/unicode/ \
$(LIBC_UNICODE_A).pkg \
$(LIBC_UNICODE_A_OBJS)
$(LIBC_UNICODE_A).pkg: \
$(LIBC_UNICODE_A_OBJS) \
$(foreach x,$(LIBC_UNICODE_A_DIRECTDEPS),$($(x)_A).pkg)
LIBC_UNICODE_LIBS = $(foreach x,$(LIBC_UNICODE_ARTIFACTS),$($(x)))
LIBC_UNICODE_SRCS = $(foreach x,$(LIBC_UNICODE_ARTIFACTS),$($(x)_SRCS))
LIBC_UNICODE_HDRS = $(foreach x,$(LIBC_UNICODE_ARTIFACTS),$($(x)_HDRS))
LIBC_UNICODE_BINS = $(foreach x,$(LIBC_UNICODE_ARTIFACTS),$($(x)_BINS))
LIBC_UNICODE_CHECKS = $(foreach x,$(LIBC_UNICODE_ARTIFACTS),$($(x)_CHECKS))
LIBC_UNICODE_OBJS = $(foreach x,$(LIBC_UNICODE_ARTIFACTS),$($(x)_OBJS))
LIBC_UNICODE_TESTS = $(foreach x,$(LIBC_UNICODE_ARTIFACTS),$($(x)_TESTS))
$(LIBC_UNICODE_OBJS): $(BUILD_FILES) libc/unicode/unicode.mk
.PHONY: o/$(MODE)/libc/unicode
o/$(MODE)/libc/unicode: $(LIBC_UNICODE) $(LIBC_UNICODE_CHECKS)
o/$(MODE)/libc/unicode/eastasianwidth.bin: \
libc/unicode/eastasianwidth.txt \
o/$(MODE)/tool/decode/mkwides.com
@TARGET=$@ ACTION=MKWIDES build/do \
o/$(MODE)/tool/decode/mkwides.com -o $@ $<
o/$(MODE)/libc/unicode/eastasianwidth.bin.lz4: \
o/$(MODE)/libc/unicode/eastasianwidth.bin \
o/$(MODE)/third_party/lz4cli/lz4cli.com
@TARGET=$@ ACTION=LZ4 build/do \
o/$(MODE)/third_party/lz4cli/lz4cli.com -q -f -9 --content-size $< $@
o/$(MODE)/libc/unicode/eastasianwidth.s: \
o/$(MODE)/libc/unicode/eastasianwidth.bin.lz4 \
o/$(MODE)/tool/build/lz4toasm.com
@TARGET=$@ ACTION=BIN2ASM build/do \
o/$(MODE)/tool/build/lz4toasm.com -s kEastAsianWidth -o $@ $<
o/$(MODE)/libc/unicode/combiningchars.bin: \
libc/unicode/unicodedata.txt \
o/$(MODE)/tool/decode/mkcombos.com
@TARGET=$@ ACTION=MKCOMBOS build/do \
o/$(MODE)/tool/decode/mkcombos.com -o $@ $<
o/$(MODE)/libc/unicode/combiningchars.bin.lz4: \
o/$(MODE)/libc/unicode/combiningchars.bin \
o/$(MODE)/third_party/lz4cli/lz4cli.com
@TARGET=$@ ACTION=LZ4 build/do \
o/$(MODE)/third_party/lz4cli/lz4cli.com -q -f -9 --content-size $< $@
o/$(MODE)/libc/unicode/combiningchars.s: \
o/$(MODE)/libc/unicode/combiningchars.bin.lz4 \
o/$(MODE)/tool/build/lz4toasm.com
@TARGET=$@ ACTION=BIN2ASM build/do \
o/$(MODE)/tool/build/lz4toasm.com -s kCombiningChars -o $@ $<

32841
libc/unicode/unicodedata.txt Normal file

File diff suppressed because it is too large Load diff

35
libc/unicode/wcsnwidth.c Normal file
View file

@ -0,0 +1,35 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/unicode/unicode.h"
/**
* Returns monospace display width of wide character string.
*/
int wcsnwidth(const wchar_t *pwcs, size_t n) {
int w, width = 0;
for (; *pwcs && n-- > 0; pwcs++) {
if ((w = wcwidth(*pwcs)) < 0) {
return -1;
} else {
width += w;
}
}
return width;
}

28
libc/unicode/wcswidth.c Normal file
View file

@ -0,0 +1,28 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/limits.h"
#include "libc/unicode/unicode.h"
/**
* Returns monospace display width of wide character string.
*/
int wcswidth(const wchar_t *pwcs) {
return wcsnwidth(pwcs, SIZE_MAX);
}

38
libc/unicode/wcwidth.c Normal file
View file

@ -0,0 +1,38 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/bits/bits.h"
#include "libc/unicode/unicode.h"
/**
* Returns cell width of monospace character.
*/
int wcwidth(wchar_t ucs) {
if (ucs == 0) return 0;
if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0)) {
return -1;
} else if (0 <= ucs && ucs < kCombiningCharsBits &&
bt(kCombiningChars, ucs)) {
return 0;
} else if (0 <= ucs && ucs < kEastAsianWidthBits) {
return 1 + bt(kEastAsianWidth, ucs);
} else {
return 1;
}
}