cosmopolitan/third_party/python/Tools/unicode/gencjkcodecs.py
Justine Tunney 39bf41f4eb Make numerous improvements
- Python static hello world now 1.8mb
- Python static fully loaded now 10mb
- Python HTTPS client now uses MbedTLS
- Python REPL now completes import stmts
- Increase stack size for Python for now
- Begin synthesizing posixpath and ntpath
- Restore Python \N{UNICODE NAME} support
- Restore Python NFKD symbol normalization
- Add optimized code path for Intel SHA-NI
- Get more Python unit tests passing faster
- Get Python help() pagination working on NT
- Python hashlib now supports MbedTLS PBKDF2
- Make memcpy/memmove/memcmp/bcmp/etc. faster
- Add Mersenne Twister and Vigna to LIBC_RAND
- Provide privileged __printf() for error code
- Fix zipos opendir() so that it reports ENOTDIR
- Add basic chmod() implementation for Windows NT
- Add Cosmo's best functions to Python cosmo module
- Pin function trace indent depth to that of caller
- Show memory diagram on invalid access in MODE=dbg
- Differentiate stack overflow on crash in MODE=dbg
- Add stb_truetype and tools for analyzing font files
- Upgrade to UNICODE 13 and reduce its binary footprint
- COMPILE.COM now logs resource usage of build commands
- Start implementing basic poll() support on bare metal
- Set getauxval(AT_EXECFN) to GetModuleFileName() on NT
- Add descriptions to strerror() in non-TINY build modes
- Add COUNTBRANCH() macro to help with micro-optimizations
- Make error / backtrace / asan / memory code more unbreakable
- Add fast perfect C implementation of μ-Law and a-Law audio codecs
- Make strtol() functions consistent with other libc implementations
- Improve Linenoise implementation (see also github.com/jart/bestline)
- COMPILE.COM now suppresses stdout/stderr of successful build commands
2021-09-28 01:52:34 -07:00

70 lines
2 KiB
Python

import os, string
codecs = {
'cn': ('gb2312', 'gbk', 'gb18030', 'hz'),
'tw': ('big5', 'cp950'),
'hk': ('big5hkscs',),
'jp': ('cp932', 'shift_jis', 'euc_jp', 'euc_jisx0213', 'shift_jisx0213',
'euc_jis_2004', 'shift_jis_2004'),
'kr': ('cp949', 'euc_kr', 'johab'),
'iso2022': ('iso2022_jp', 'iso2022_jp_1', 'iso2022_jp_2',
'iso2022_jp_2004', 'iso2022_jp_3', 'iso2022_jp_ext',
'iso2022_kr'),
}
TEMPLATE = string.Template("""\
#
# $encoding.py: Python Unicode Codec for $ENCODING
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
#
import _codecs_$owner, codecs
import _multibytecodec as mbc
codec = _codecs_$owner.getcodec('$encoding')
class Codec(codecs.Codec):
encode = codec.encode
decode = codec.decode
class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
codecs.IncrementalEncoder):
codec = codec
class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
codecs.IncrementalDecoder):
codec = codec
class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
codec = codec
class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
codec = codec
def getregentry():
return codecs.CodecInfo(
name='$encoding',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
""")
def gencodecs(prefix):
for loc, encodings in codecs.items():
for enc in encodings:
code = TEMPLATE.substitute(ENCODING=enc.upper(),
encoding=enc.lower(),
owner=loc)
codecpath = os.path.join(prefix, enc + '.py')
open(codecpath, 'w').write(code)
if __name__ == '__main__':
import sys
gencodecs(sys.argv[1]
if len(sys.argv) > 1 else
"third_party/python/Lib/encodings")