mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-05-23 13:52:28 +00:00
Make numerous improvements
- Python static hello world now 1.8mb - Python static fully loaded now 10mb - Python HTTPS client now uses MbedTLS - Python REPL now completes import stmts - Increase stack size for Python for now - Begin synthesizing posixpath and ntpath - Restore Python \N{UNICODE NAME} support - Restore Python NFKD symbol normalization - Add optimized code path for Intel SHA-NI - Get more Python unit tests passing faster - Get Python help() pagination working on NT - Python hashlib now supports MbedTLS PBKDF2 - Make memcpy/memmove/memcmp/bcmp/etc. faster - Add Mersenne Twister and Vigna to LIBC_RAND - Provide privileged __printf() for error code - Fix zipos opendir() so that it reports ENOTDIR - Add basic chmod() implementation for Windows NT - Add Cosmo's best functions to Python cosmo module - Pin function trace indent depth to that of caller - Show memory diagram on invalid access in MODE=dbg - Differentiate stack overflow on crash in MODE=dbg - Add stb_truetype and tools for analyzing font files - Upgrade to UNICODE 13 and reduce its binary footprint - COMPILE.COM now logs resource usage of build commands - Start implementing basic poll() support on bare metal - Set getauxval(AT_EXECFN) to GetModuleFileName() on NT - Add descriptions to strerror() in non-TINY build modes - Add COUNTBRANCH() macro to help with micro-optimizations - Make error / backtrace / asan / memory code more unbreakable - Add fast perfect C implementation of μ-Law and a-Law audio codecs - Make strtol() functions consistent with other libc implementations - Improve Linenoise implementation (see also github.com/jart/bestline) - COMPILE.COM now suppresses stdout/stderr of successful build commands
This commit is contained in:
parent
fa7b4f5bd1
commit
39bf41f4eb
806 changed files with 77494 additions and 63859 deletions
|
@ -65,4 +65,6 @@ def gencodecs(prefix):
|
|||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
gencodecs(sys.argv[1])
|
||||
gencodecs(sys.argv[1]
|
||||
if len(sys.argv) > 1 else
|
||||
"third_party/python/Lib/encodings")
|
||||
|
|
769
third_party/python/Tools/unicode/makeunicodedata.py
vendored
769
third_party/python/Tools/unicode/makeunicodedata.py
vendored
|
@ -29,6 +29,7 @@
|
|||
|
||||
import os
|
||||
import sys
|
||||
import zlib
|
||||
import zipfile
|
||||
|
||||
from textwrap import dedent
|
||||
|
@ -42,7 +43,7 @@ VERSION = "3.2"
|
|||
# * Doc/library/stdtypes.rst, and
|
||||
# * Doc/library/unicodedata.rst
|
||||
# * Doc/reference/lexical_analysis.rst (two occurrences)
|
||||
UNIDATA_VERSION = "9.0.0"
|
||||
UNIDATA_VERSION = "13.0.0"
|
||||
UNICODE_DATA = "UnicodeData%s.txt"
|
||||
COMPOSITION_EXCLUSIONS = "CompositionExclusions%s.txt"
|
||||
EASTASIAN_WIDTH = "EastAsianWidth%s.txt"
|
||||
|
@ -106,39 +107,68 @@ cjk_ranges = [
|
|||
('2B820', '2CEA1'),
|
||||
]
|
||||
|
||||
def bias(c):
|
||||
# if c <= 0xffff:
|
||||
# return True
|
||||
# if 0x1f600 <= c <= 0x1f64f:
|
||||
# return True
|
||||
return True
|
||||
|
||||
def maketables(trace=0):
|
||||
|
||||
print("--- Reading", UNICODE_DATA % "", "...")
|
||||
|
||||
version = ""
|
||||
unicode = UnicodeData(UNIDATA_VERSION)
|
||||
|
||||
unicode = UnicodeData(UNIDATA_VERSION, select=bias)
|
||||
print(len(list(filter(None, unicode.table))), "characters")
|
||||
|
||||
for version in old_versions:
|
||||
print("--- Reading", UNICODE_DATA % ("-"+version), "...")
|
||||
old_unicode = UnicodeData(version, cjk_check=False)
|
||||
old_unicode = UnicodeData(version, cjk_check=False, select=bias)
|
||||
print(len(list(filter(None, old_unicode.table))), "characters")
|
||||
merge_old_version(version, unicode, old_unicode)
|
||||
with open("third_party/python/Modules/unicodedata_unidata.h", "w") as hdr:
|
||||
print("""\
|
||||
#ifndef COSMOPOLITAN_THIRD_PARTY_PYTHON_MODULES_UNICODEDATA_UNIDATA_H_
|
||||
#define COSMOPOLITAN_THIRD_PARTY_PYTHON_MODULES_UNICODEDATA_UNIDATA_H_
|
||||
#include "third_party/python/Modules/unicodedata.h"
|
||||
COSMOPOLITAN_C_START_
|
||||
/* GENERATED BY %s %s */""" % (SCRIPT, VERSION), file=hdr)
|
||||
print('#define UNIDATA_VERSION "%s"' % UNIDATA_VERSION, file=hdr)
|
||||
makeunicodename(hdr, unicode, trace)
|
||||
makeunicodedata(hdr, unicode, trace)
|
||||
makeunicodetype(hdr, unicode, trace)
|
||||
hdr.write("""\
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* COSMOPOLITAN_THIRD_PARTY_PYTHON_MODULES_UNICODEDATA_UNIDATA_H_ */
|
||||
""")
|
||||
|
||||
makeunicodename(unicode, trace)
|
||||
makeunicodedata(unicode, trace)
|
||||
makeunicodetype(unicode, trace)
|
||||
def startfile(fp):
|
||||
print('#include "libc/nexgen32e/kompressor.h"', file=fp)
|
||||
print('#include "third_party/python/Modules/unicodedata.h"', file=fp)
|
||||
print("/* clang-format off */", file=fp)
|
||||
print("/* GENERATED BY %s %s */" % (SCRIPT, VERSION), file=fp)
|
||||
print(file=fp)
|
||||
|
||||
def makestringarray(name, strings, fp, hdr):
|
||||
ml = max(len(s) for s in strings)
|
||||
if ml < 8:
|
||||
print('extern const char %s[%d][%d];' % (name, len(strings), ml+1), file=hdr)
|
||||
print("const char %s[%d][%d] = {" % (name, len(strings), ml+1), file=fp)
|
||||
else:
|
||||
print('extern const char *const %s[%d];' % (name, len(strings)), file=hdr)
|
||||
print("const char *const %s[%d] = {" % (name, len(strings)), file=fp)
|
||||
for s in strings:
|
||||
print(" \"%s\"," % (s), file=fp)
|
||||
print("};", file=fp)
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
# unicode character properties
|
||||
|
||||
def makeunicodedata(unicode, trace):
|
||||
def makeunicodedata(hdr, unicode, trace):
|
||||
|
||||
dummy = (0, 0, 0, 0, 0, 0)
|
||||
table = [dummy]
|
||||
cache = {0: dummy}
|
||||
index = [0] * len(unicode.chars)
|
||||
|
||||
FILE = "Modules/unicodedata_db.h"
|
||||
|
||||
print("--- Preparing", FILE, "...")
|
||||
|
||||
# 1) database properties
|
||||
|
||||
for char in unicode.chars:
|
||||
|
@ -256,135 +286,123 @@ def makeunicodedata(unicode, trace):
|
|||
print(total_last, "last characters in NFC")
|
||||
print(len(comp_pairs), "NFC pairs")
|
||||
|
||||
print("--- Writing", FILE, "...")
|
||||
|
||||
fp = open(FILE, "w")
|
||||
print("/* this file was generated by %s %s */" % (SCRIPT, VERSION), file=fp)
|
||||
print(file=fp)
|
||||
print('#define UNIDATA_VERSION "%s"' % UNIDATA_VERSION, file=fp)
|
||||
print("/* a list of unique database records */", file=fp)
|
||||
print("const _PyUnicode_DatabaseRecord _PyUnicode_Database_Records[] = {", file=fp)
|
||||
for item in table:
|
||||
print(" {%d, %d, %d, %d, %d, %d}," % item, file=fp)
|
||||
print("};", file=fp)
|
||||
print(file=fp)
|
||||
|
||||
print("/* Reindexing of NFC first characters. */", file=fp)
|
||||
print("#define TOTAL_FIRST",total_first, file=fp)
|
||||
print("#define TOTAL_LAST",total_last, file=fp)
|
||||
print("struct reindex{int start;short count,index;};", file=fp)
|
||||
print("static struct reindex nfc_first[] = {", file=fp)
|
||||
for start,end in comp_first_ranges:
|
||||
print(" { %d, %d, %d}," % (start,end-start,comp_first[start]), file=fp)
|
||||
print(" {0,0,0}", file=fp)
|
||||
print("};\n", file=fp)
|
||||
print("static struct reindex nfc_last[] = {", file=fp)
|
||||
for start,end in comp_last_ranges:
|
||||
print(" { %d, %d, %d}," % (start,end-start,comp_last[start]), file=fp)
|
||||
print(" {0,0,0}", file=fp)
|
||||
print("};\n", file=fp)
|
||||
|
||||
# FIXME: <fl> the following tables could be made static, and
|
||||
# the support code moved into unicodedatabase.c
|
||||
|
||||
print("/* string literals */", file=fp)
|
||||
print("const char *_PyUnicode_CategoryNames[] = {", file=fp)
|
||||
for name in CATEGORY_NAMES:
|
||||
print(" \"%s\"," % name, file=fp)
|
||||
print(" NULL", file=fp)
|
||||
print("};", file=fp)
|
||||
|
||||
print("const char *_PyUnicode_BidirectionalNames[] = {", file=fp)
|
||||
for name in BIDIRECTIONAL_NAMES:
|
||||
print(" \"%s\"," % name, file=fp)
|
||||
print(" NULL", file=fp)
|
||||
print("};", file=fp)
|
||||
|
||||
print("const char *_PyUnicode_EastAsianWidthNames[] = {", file=fp)
|
||||
for name in EASTASIANWIDTH_NAMES:
|
||||
print(" \"%s\"," % name, file=fp)
|
||||
print(" NULL", file=fp)
|
||||
print("};", file=fp)
|
||||
|
||||
print("static const char *decomp_prefix[] = {", file=fp)
|
||||
for name in decomp_prefix:
|
||||
print(" \"%s\"," % name, file=fp)
|
||||
print(" NULL", file=fp)
|
||||
print("};", file=fp)
|
||||
|
||||
# split record index table
|
||||
index1, index2, shift = splitbins(index, trace)
|
||||
|
||||
print("/* index tables for the database records */", file=fp)
|
||||
print("#define SHIFT", shift, file=fp)
|
||||
Array("index1", index1).dump(fp, trace)
|
||||
Array("index2", index2).dump(fp, trace)
|
||||
|
||||
# split decomposition index table
|
||||
index1, index2, shift = splitbins(decomp_index, trace)
|
||||
|
||||
print("/* decomposition data */", file=fp)
|
||||
Array("decomp_data", decomp_data).dump(fp, trace)
|
||||
|
||||
print("/* index tables for the decomposition data */", file=fp)
|
||||
print("#define DECOMP_SHIFT", shift, file=fp)
|
||||
Array("decomp_index1", index1).dump(fp, trace)
|
||||
Array("decomp_index2", index2).dump(fp, trace)
|
||||
|
||||
index, index2, shift = splitbins(comp_data, trace)
|
||||
print("/* NFC pairs */", file=fp)
|
||||
print("#define COMP_SHIFT", shift, file=fp)
|
||||
Array("comp_index", index).dump(fp, trace)
|
||||
Array("comp_data", index2).dump(fp, trace)
|
||||
|
||||
# Generate delta tables for old versions
|
||||
for version, table, normalization in unicode.changed:
|
||||
cversion = version.replace(".","_")
|
||||
records = [table[0]]
|
||||
cache = {table[0]:0}
|
||||
index = [0] * len(table)
|
||||
for i, record in enumerate(table):
|
||||
try:
|
||||
index[i] = cache[record]
|
||||
except KeyError:
|
||||
index[i] = cache[record] = len(records)
|
||||
records.append(record)
|
||||
index1, index2, shift = splitbins(index, trace)
|
||||
print("static const change_record change_records_%s[] = {" % cversion, file=fp)
|
||||
for record in records:
|
||||
print("\t{ %s }," % ", ".join(map(str,record)), file=fp)
|
||||
# a list of unique records
|
||||
with open("third_party/python/Modules/unicodedata_records.c", "w") as fp:
|
||||
startfile(fp)
|
||||
print("extern const _PyUnicode_Record _PyUnicode_Records[%d];" % (len(table)), file=hdr)
|
||||
print("const _PyUnicode_Record _PyUnicode_Records[] = {", file=fp)
|
||||
for item in table:
|
||||
print(" {%3d, %3d, %3d, %3d, %3d, %3d}," % item, file=fp)
|
||||
print("};", file=fp)
|
||||
Array("changes_%s_index" % cversion, index1).dump(fp, trace)
|
||||
Array("changes_%s_data" % cversion, index2).dump(fp, trace)
|
||||
print("static const change_record* get_change_%s(Py_UCS4 n)" % cversion, file=fp)
|
||||
print("{", file=fp)
|
||||
print("\tint index;", file=fp)
|
||||
print("\tif (n >= 0x110000) index = 0;", file=fp)
|
||||
print("\telse {", file=fp)
|
||||
print("\t\tindex = changes_%s_index[n>>%d];" % (cversion, shift), file=fp)
|
||||
print("\t\tindex = changes_%s_data[(index<<%d)+(n & %d)];" % \
|
||||
(cversion, shift, ((1<<shift)-1)), file=fp)
|
||||
print("\t}", file=fp)
|
||||
print("\treturn change_records_%s+index;" % cversion, file=fp)
|
||||
print("}\n", file=fp)
|
||||
print("static Py_UCS4 normalization_%s(Py_UCS4 n)" % cversion, file=fp)
|
||||
print("{", file=fp)
|
||||
print("\tswitch(n) {", file=fp)
|
||||
for k, v in normalization:
|
||||
print("\tcase %s: return 0x%s;" % (hex(k), v), file=fp)
|
||||
print("\tdefault: return 0;", file=fp)
|
||||
print("\t}\n}\n", file=fp)
|
||||
print(file=fp)
|
||||
index1, index2, shift = splitbins(index, trace)
|
||||
print("#define _PyUnicode_RecordsShift", shift, file=hdr)
|
||||
Array("_PyUnicode_RecordsIndex1", index1, rle=True).dump(fp, hdr, trace)
|
||||
Array("_PyUnicode_RecordsIndex2", index2, rle=True).dump(fp, hdr, trace)
|
||||
|
||||
fp.close()
|
||||
print("#define UNIDATA_TOTAL_FIRST", total_first, file=hdr)
|
||||
print("#define UNIDATA_TOTAL_LAST", total_last, file=hdr)
|
||||
|
||||
with open("third_party/python/Modules/unicodedata_nfcfirst.c", "w") as fp:
|
||||
startfile(fp)
|
||||
print("extern const _PyUnicode_Reindex _PyUnicode_NfcFirst[%d];" % (len(comp_first_ranges)), file=hdr)
|
||||
print("const _PyUnicode_Reindex _PyUnicode_NfcFirst[] = {", file=fp)
|
||||
for start,end in comp_first_ranges:
|
||||
print(" {%#07x, %3d, %3d}," % (start,end-start,comp_first[start]), file=fp)
|
||||
print(" {0}", file=fp)
|
||||
print("};\n", file=fp)
|
||||
|
||||
with open("third_party/python/Modules/unicodedata_nfclast.c", "w") as fp:
|
||||
startfile(fp)
|
||||
print("extern const _PyUnicode_Reindex _PyUnicode_NfcLast[%d];" % (len(comp_last_ranges)), file=hdr)
|
||||
print("const _PyUnicode_Reindex _PyUnicode_NfcLast[] = {", file=fp)
|
||||
for start,end in comp_last_ranges:
|
||||
print(" {%#07x, %3d, %3d}," % (start,end-start,comp_last[start]), file=fp)
|
||||
print(" {0}", file=fp)
|
||||
print("};\n", file=fp)
|
||||
|
||||
with open("third_party/python/Modules/unicodedata_categorynames.c", "w") as fp:
|
||||
startfile(fp)
|
||||
makestringarray("_PyUnicode_CategoryNames", CATEGORY_NAMES, fp, hdr)
|
||||
|
||||
with open("third_party/python/Modules/unicodedata_bidirectionalnames.c", "w") as fp:
|
||||
startfile(fp)
|
||||
makestringarray("_PyUnicode_BidirectionalNames", BIDIRECTIONAL_NAMES, fp, hdr)
|
||||
|
||||
with open("third_party/python/Modules/unicodedata_eastasianwidthnames.c", "w") as fp:
|
||||
startfile(fp)
|
||||
makestringarray("_PyUnicode_EastAsianWidthNames", EASTASIANWIDTH_NAMES, fp, hdr)
|
||||
|
||||
with open("third_party/python/Modules/unicodedata_decompprefix.c", "w") as fp:
|
||||
startfile(fp)
|
||||
makestringarray("_PyUnicode_DecompPrefix", decomp_prefix, fp, hdr)
|
||||
|
||||
with open("third_party/python/Modules/unicodedata_decomp.c", "w") as fp:
|
||||
startfile(fp)
|
||||
index1, index2, shift = splitbins(decomp_index, trace)
|
||||
print("#define _PyUnicode_DecompShift", shift, file=hdr)
|
||||
Array("_PyUnicode_Decomp", decomp_data, pack=True).dump(fp, hdr, trace)
|
||||
Array("_PyUnicode_DecompIndex1", index1, rle=True).dump(fp, hdr, trace)
|
||||
Array("_PyUnicode_DecompIndex2", index2).dump(fp, hdr, trace)
|
||||
|
||||
with open("third_party/python/Modules/unicodedata_comp.c", "w") as fp:
|
||||
startfile(fp)
|
||||
index, index2, shift = splitbins(comp_data, trace)
|
||||
print("#define _PyUnicode_CompShift", shift, file=hdr)
|
||||
Array("_PyUnicode_CompIndex", index, rle=True).dump(fp, hdr, trace)
|
||||
Array("_PyUnicode_CompData", index2, pack=True).dump(fp, hdr, trace)
|
||||
|
||||
# Generate delta tables for old versions [because punycode is pinned to 3.2.0]
|
||||
for version, table, normalization in unicode.changed:
|
||||
with open("third_party/python/Modules/unicodedata_%s.c" % (version), "w") as fp:
|
||||
startfile(fp)
|
||||
cversion = version.replace(".","_")
|
||||
records = [table[0]]
|
||||
cache = {table[0]:0}
|
||||
index = [0] * len(table)
|
||||
for i, record in enumerate(table):
|
||||
try:
|
||||
index[i] = cache[record]
|
||||
except KeyError:
|
||||
index[i] = cache[record] = len(records)
|
||||
records.append(record)
|
||||
index1, index2, shift = splitbins(index, trace)
|
||||
print("const _PyUnicode_ChangeRecord _PyUnicode_ChangeRecords_%s[] = {" % cversion, file=fp)
|
||||
for record in records:
|
||||
print("\t{ %s }," % ", ".join(map(str,record)), file=fp)
|
||||
print("};", file=fp)
|
||||
print(file=fp)
|
||||
Array("_PyUnicode_ChangeIndex_%s" % cversion, index1, rle=True).dump(fp, hdr, trace)
|
||||
Array("_PyUnicode_ChangeData_%s" % cversion, index2, rle=True).dump(fp, hdr, trace)
|
||||
print("const _PyUnicode_ChangeRecord *_PyUnicode_GetChange_%s(Py_UCS4);" % cversion, file=hdr)
|
||||
print("const _PyUnicode_ChangeRecord *_PyUnicode_GetChange_%s(Py_UCS4 n)" % cversion, file=fp)
|
||||
print("{", file=fp)
|
||||
print(" int i;", file=fp)
|
||||
print(" if (n >= 0x110000) {", file=fp)
|
||||
print(" i = 0;", file=fp)
|
||||
print(" } else {", file=fp)
|
||||
print(" i = _PyUnicode_ChangeIndex_%s[n>>%d];" % (cversion, shift), file=fp)
|
||||
print(" i = _PyUnicode_ChangeData_%s[(i<<%d)+(n & %d)];" % (cversion, shift, ((1<<shift)-1)), file=fp)
|
||||
print(" }", file=fp)
|
||||
print(" return _PyUnicode_ChangeRecords_%s + i;" % cversion, file=fp)
|
||||
print("}", file=fp)
|
||||
print(file=fp)
|
||||
print("Py_UCS4 _PyUnicode_Normalization_%s(Py_UCS4);" % (cversion), file=hdr)
|
||||
print("Py_UCS4 _PyUnicode_Normalization_%s(Py_UCS4 n)" % (cversion), file=fp)
|
||||
print("{", file=fp)
|
||||
print(" switch(n) {", file=fp)
|
||||
for k, v in normalization:
|
||||
print(" case 0x%04x:" % (k), file=fp)
|
||||
print(" return 0x%s;" % (v), file=fp)
|
||||
print(" default:", file=fp)
|
||||
print(" return 0;", file=fp)
|
||||
print(" }", file=fp)
|
||||
print("}", file=fp)
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
# unicode character type tables
|
||||
|
||||
def makeunicodetype(unicode, trace):
|
||||
|
||||
FILE = "Objects/unicodetype_db.h"
|
||||
|
||||
print("--- Preparing", FILE, "...")
|
||||
def makeunicodetype(hdr, unicode, trace):
|
||||
|
||||
# extract unicode types
|
||||
dummy = (0, 0, 0, 0, 0, 0)
|
||||
|
@ -503,101 +521,98 @@ def makeunicodetype(unicode, trace):
|
|||
print(len(linebreaks), "linebreak code points")
|
||||
print(len(extra_casing), "extended case array")
|
||||
|
||||
print("--- Writing", FILE, "...")
|
||||
with open("third_party/python/Modules/unicodedata_typerecords.c", "w") as fp:
|
||||
startfile(fp)
|
||||
print("extern const _PyUnicode_TypeRecord _PyUnicode_TypeRecords[%d];" % (len(table)), file=hdr)
|
||||
print("const _PyUnicode_TypeRecord _PyUnicode_TypeRecords[%d] = {" % (len(table)), file=fp)
|
||||
for item in table:
|
||||
print(" {%3d, %3d, %3d, %3d, %3d, %3d}," % item, file=fp)
|
||||
print("};", file=fp)
|
||||
index1, index2, shift = splitbins(index, trace)
|
||||
print("#define _PyUnicode_TypeRecordsShift", shift, file=hdr)
|
||||
Array("_PyUnicode_TypeRecordsIndex1", index1, rle=True).dump(fp, hdr, trace)
|
||||
Array("_PyUnicode_TypeRecordsIndex2", index2, rle=True).dump(fp, hdr, trace)
|
||||
|
||||
fp = open(FILE, "w")
|
||||
print("/* this file was generated by %s %s */" % (SCRIPT, VERSION), file=fp)
|
||||
print(file=fp)
|
||||
print("/* a list of unique character type descriptors */", file=fp)
|
||||
print("const _PyUnicode_TypeRecord _PyUnicode_TypeRecords[] = {", file=fp)
|
||||
for item in table:
|
||||
print(" {%d, %d, %d, %d, %d, %d}," % item, file=fp)
|
||||
print("};", file=fp)
|
||||
print(file=fp)
|
||||
with open("third_party/python/Modules/unicodedata_extendedcase.c", "w") as fp:
|
||||
startfile(fp)
|
||||
type_ = "char16_t"
|
||||
for c in extra_casing:
|
||||
if c > 0xffff:
|
||||
type_ = "Py_UCS4"
|
||||
break
|
||||
print("extern const %s _PyUnicode_ExtendedCase[%d];" % (type_, len(extra_casing)), file=hdr)
|
||||
print("const %s _PyUnicode_ExtendedCase[%d] = {" % (type_, len(extra_casing)), file=fp)
|
||||
for c in extra_casing:
|
||||
print(" %d," % c, file=fp)
|
||||
print("};", file=fp)
|
||||
|
||||
print("/* extended case mappings */", file=fp)
|
||||
print(file=fp)
|
||||
print("const Py_UCS4 _PyUnicode_ExtendedCase[] = {", file=fp)
|
||||
for c in extra_casing:
|
||||
print(" %d," % c, file=fp)
|
||||
print("};", file=fp)
|
||||
print(file=fp)
|
||||
with open("third_party/python/Modules/unicodedata_tonumeric.c", "w") as fp:
|
||||
startfile(fp)
|
||||
# Generate code for _PyUnicode_ToNumeric()
|
||||
numeric_items = sorted(numeric.items())
|
||||
print('/* Returns the numeric value as double for Unicode characters', file=fp)
|
||||
print(' * having this property, -1.0 otherwise.', file=fp)
|
||||
print(' */', file=fp)
|
||||
print('double _PyUnicode_ToNumeric(Py_UCS4 ch)', file=fp)
|
||||
print('{', file=fp)
|
||||
print(' long a, b = 1;', file=fp)
|
||||
print(' switch (ch) {', file=fp)
|
||||
for value, codepoints in numeric_items:
|
||||
# Turn text into float literals
|
||||
parts = value.split('/')
|
||||
codepoints.sort()
|
||||
for codepoint in codepoints:
|
||||
print(' case 0x%04X:' % (codepoint,), file=fp)
|
||||
if len(parts) == 1:
|
||||
print(' a = %s;' % (parts[0],), file=fp)
|
||||
elif len(parts) == 2:
|
||||
print(' a = %s;' % (parts[0],), file=fp)
|
||||
print(' b = %s;' % (parts[1],), file=fp)
|
||||
else:
|
||||
assert False
|
||||
print(' break;', file=fp)
|
||||
print(' default:', file=fp)
|
||||
print(' a = -1;', file=fp)
|
||||
print(' break;', file=fp)
|
||||
print(' }', file=fp)
|
||||
print(' return (double)a / b;', file=fp)
|
||||
print('}', file=fp)
|
||||
|
||||
# split decomposition index table
|
||||
index1, index2, shift = splitbins(index, trace)
|
||||
|
||||
print("/* type indexes */", file=fp)
|
||||
print("#define SHIFT", shift, file=fp)
|
||||
Array("index1", index1).dump(fp, trace)
|
||||
Array("index2", index2).dump(fp, trace)
|
||||
|
||||
# Generate code for _PyUnicode_ToNumeric()
|
||||
numeric_items = sorted(numeric.items())
|
||||
print('/* Returns the numeric value as double for Unicode characters', file=fp)
|
||||
print(' * having this property, -1.0 otherwise.', file=fp)
|
||||
print(' */', file=fp)
|
||||
print('double _PyUnicode_ToNumeric(Py_UCS4 ch)', file=fp)
|
||||
print('{', file=fp)
|
||||
print(' switch (ch) {', file=fp)
|
||||
for value, codepoints in numeric_items:
|
||||
# Turn text into float literals
|
||||
parts = value.split('/')
|
||||
parts = [repr(float(part)) for part in parts]
|
||||
value = '/'.join(parts)
|
||||
|
||||
codepoints.sort()
|
||||
for codepoint in codepoints:
|
||||
with open("third_party/python/Modules/unicodedata_iswhitespace.c", "w") as fp:
|
||||
startfile(fp)
|
||||
print("/* Returns 1 for Unicode characters having the bidirectional", file=fp)
|
||||
print(" * type 'WS', 'B' or 'S' or the category 'Zs', 0 otherwise.", file=fp)
|
||||
print(" */", file=fp)
|
||||
print('int _PyUnicode_IsWhitespace(Py_UCS4 ch)', file=fp)
|
||||
print('{', file=fp)
|
||||
print(' switch (ch) {', file=fp)
|
||||
for codepoint in sorted(spaces):
|
||||
print(' case 0x%04X:' % (codepoint,), file=fp)
|
||||
print(' return (double) %s;' % (value,), file=fp)
|
||||
print(' }', file=fp)
|
||||
print(' return -1.0;', file=fp)
|
||||
print('}', file=fp)
|
||||
print(file=fp)
|
||||
print(' return 1;', file=fp)
|
||||
print(' }', file=fp)
|
||||
print(' return 0;', file=fp)
|
||||
print('}', file=fp)
|
||||
|
||||
# Generate code for _PyUnicode_IsWhitespace()
|
||||
print("/* Returns 1 for Unicode characters having the bidirectional", file=fp)
|
||||
print(" * type 'WS', 'B' or 'S' or the category 'Zs', 0 otherwise.", file=fp)
|
||||
print(" */", file=fp)
|
||||
print('int _PyUnicode_IsWhitespace(const Py_UCS4 ch)', file=fp)
|
||||
print('{', file=fp)
|
||||
print(' switch (ch) {', file=fp)
|
||||
|
||||
for codepoint in sorted(spaces):
|
||||
print(' case 0x%04X:' % (codepoint,), file=fp)
|
||||
print(' return 1;', file=fp)
|
||||
|
||||
print(' }', file=fp)
|
||||
print(' return 0;', file=fp)
|
||||
print('}', file=fp)
|
||||
print(file=fp)
|
||||
|
||||
# Generate code for _PyUnicode_IsLinebreak()
|
||||
print("/* Returns 1 for Unicode characters having the line break", file=fp)
|
||||
print(" * property 'BK', 'CR', 'LF' or 'NL' or having bidirectional", file=fp)
|
||||
print(" * type 'B', 0 otherwise.", file=fp)
|
||||
print(" */", file=fp)
|
||||
print('int _PyUnicode_IsLinebreak(const Py_UCS4 ch)', file=fp)
|
||||
print('{', file=fp)
|
||||
print(' switch (ch) {', file=fp)
|
||||
for codepoint in sorted(linebreaks):
|
||||
print(' case 0x%04X:' % (codepoint,), file=fp)
|
||||
print(' return 1;', file=fp)
|
||||
|
||||
print(' }', file=fp)
|
||||
print(' return 0;', file=fp)
|
||||
print('}', file=fp)
|
||||
print(file=fp)
|
||||
|
||||
fp.close()
|
||||
with open("third_party/python/Modules/unicodedata_islinebreak.c", "w") as fp:
|
||||
startfile(fp)
|
||||
print("/* Returns 1 for Unicode characters having the line break", file=fp)
|
||||
print(" * property 'BK', 'CR', 'LF' or 'NL' or having bidirectional", file=fp)
|
||||
print(" * type 'B', 0 otherwise.", file=fp)
|
||||
print(" */", file=fp)
|
||||
print('int _PyUnicode_IsLinebreak(Py_UCS4 ch)', file=fp)
|
||||
print('{', file=fp)
|
||||
print(' switch (ch) {', file=fp)
|
||||
for codepoint in sorted(linebreaks):
|
||||
print(' case 0x%04X:' % (codepoint,), file=fp)
|
||||
print(' return 1;', file=fp)
|
||||
print(' }', file=fp)
|
||||
print(' return 0;', file=fp)
|
||||
print('}', file=fp)
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
# unicode name database
|
||||
|
||||
def makeunicodename(unicode, trace):
|
||||
|
||||
FILE = "third_party/python/Modules/unicodename_db.h"
|
||||
|
||||
print("--- Preparing", FILE, "...")
|
||||
def makeunicodename(hdr, unicode, trace):
|
||||
|
||||
# collect names
|
||||
names = [None] * len(unicode.chars)
|
||||
|
@ -631,7 +646,6 @@ def makeunicodename(unicode, trace):
|
|||
words[w] = [len(words)]
|
||||
|
||||
print(n, "words in text;", b, "bytes")
|
||||
|
||||
wordlist = list(words.items())
|
||||
|
||||
# sort on falling frequency, then by name
|
||||
|
@ -650,10 +664,14 @@ def makeunicodename(unicode, trace):
|
|||
|
||||
assert short > 0
|
||||
|
||||
# [jart] is this right?
|
||||
short = min(short, len(wordlist))
|
||||
|
||||
print(short, "short indexes in lexicon")
|
||||
|
||||
# statistics
|
||||
n = 0
|
||||
print(short)
|
||||
for i in range(short):
|
||||
n = n + len(wordlist[i][1])
|
||||
print(n, "short indexes in phrasebook")
|
||||
|
@ -723,67 +741,50 @@ def makeunicodename(unicode, trace):
|
|||
# collisions on the current data set. if you like, change it
|
||||
# and see what happens...
|
||||
|
||||
codehash = Hash("code", data, 47)
|
||||
codehash = Hash("_PyUnicode_Code", data, 47)
|
||||
|
||||
print("--- Writing", FILE, "...")
|
||||
|
||||
fp = open(FILE, "w")
|
||||
print("/* this file was generated by %s %s */" % (SCRIPT, VERSION), file=fp)
|
||||
print(file=fp)
|
||||
print("#define NAME_MAXLEN", 256, file=fp)
|
||||
print(file=fp)
|
||||
print("/* lexicon */", file=fp)
|
||||
Array("lexicon", lexicon).dump(fp, trace)
|
||||
Array("lexicon_offset", lexicon_offset).dump(fp, trace)
|
||||
print("#define UNIDATA_NAME_MAXLEN", 256, file=hdr)
|
||||
with open("third_party/python/Modules/unicodedata_lexicon.c", "w") as fp:
|
||||
startfile(fp)
|
||||
Array("_PyUnicode_Lexicon", lexicon).dump(fp, hdr, trace)
|
||||
Array("_PyUnicode_LexiconOffset", lexicon_offset).dump(fp, hdr, trace)
|
||||
|
||||
# split decomposition index table
|
||||
offset1, offset2, shift = splitbins(phrasebook_offset, trace)
|
||||
print("#define _PyUnicode_PhrasebookShift", shift, file=hdr)
|
||||
print("#define _PyUnicode_PhrasebookShort", short, file=hdr)
|
||||
with open("third_party/python/Modules/unicodedata_phrasebook.c", "w") as fp:
|
||||
startfile(fp)
|
||||
Array("_PyUnicode_Phrasebook", phrasebook).dump(fp, hdr, trace)
|
||||
Array("_PyUnicode_PhrasebookOffset1", offset1, rle=True).dump(fp, hdr, trace)
|
||||
Array("_PyUnicode_PhrasebookOffset2", offset2, pack=True).dump(fp, hdr, trace)
|
||||
|
||||
print("/* code->name phrasebook */", file=fp)
|
||||
print("#define phrasebook_shift", shift, file=fp)
|
||||
print("#define phrasebook_short", short, file=fp)
|
||||
with open("third_party/python/Modules/unicodedata_codehash.c", "w") as fp:
|
||||
startfile(fp)
|
||||
codehash.dump(fp, hdr, trace)
|
||||
|
||||
Array("phrasebook", phrasebook).dump(fp, trace)
|
||||
Array("phrasebook_offset1", offset1).dump(fp, trace)
|
||||
Array("phrasebook_offset2", offset2).dump(fp, trace)
|
||||
print('#define _PyUnicode_AliasesStart %#x' % (NAME_ALIASES_START), file=hdr)
|
||||
print('#define _PyUnicode_AliasesEnd %#x' % (NAME_ALIASES_START + len(unicode.aliases)), file=hdr)
|
||||
print('extern const unsigned int _PyUnicode_NameAliases[%d];' % (len(unicode.aliases)), file=hdr)
|
||||
with open("third_party/python/Modules/unicodedata_aliases.c", "w") as fp:
|
||||
startfile(fp)
|
||||
print('const unsigned int _PyUnicode_NameAliases[%d] = {' % (len(unicode.aliases)), file=fp)
|
||||
for name, codepoint in unicode.aliases:
|
||||
print(' 0x%04X,' % codepoint, file=fp)
|
||||
print('};', file=fp)
|
||||
|
||||
print("/* name->code dictionary */", file=fp)
|
||||
codehash.dump(fp, trace)
|
||||
print('#define _PyUnicode_NamedSequencesStart %#x' % (NAMED_SEQUENCES_START), file=hdr)
|
||||
print('#define _PyUnicode_NamedSequencesEnd %#x' %
|
||||
(NAMED_SEQUENCES_START + len(unicode.named_sequences)), file=hdr)
|
||||
|
||||
print(file=fp)
|
||||
print('static const unsigned int aliases_start = %#x;' %
|
||||
NAME_ALIASES_START, file=fp)
|
||||
print('static const unsigned int aliases_end = %#x;' %
|
||||
(NAME_ALIASES_START + len(unicode.aliases)), file=fp)
|
||||
|
||||
print('static const unsigned int name_aliases[] = {', file=fp)
|
||||
for name, codepoint in unicode.aliases:
|
||||
print(' 0x%04X,' % codepoint, file=fp)
|
||||
print('};', file=fp)
|
||||
|
||||
# In Unicode 6.0.0, the sequences contain at most 4 BMP chars,
|
||||
# so we are using Py_UCS2 seq[4]. This needs to be updated if longer
|
||||
# sequences or sequences with non-BMP chars are added.
|
||||
# unicodedata_lookup should be adapted too.
|
||||
print(dedent("""
|
||||
typedef struct NamedSequence {
|
||||
int seqlen;
|
||||
Py_UCS2 seq[4];
|
||||
} named_sequence;
|
||||
"""), file=fp)
|
||||
|
||||
print('static const unsigned int named_sequences_start = %#x;' %
|
||||
NAMED_SEQUENCES_START, file=fp)
|
||||
print('static const unsigned int named_sequences_end = %#x;' %
|
||||
(NAMED_SEQUENCES_START + len(unicode.named_sequences)), file=fp)
|
||||
|
||||
print('static const named_sequence named_sequences[] = {', file=fp)
|
||||
for name, sequence in unicode.named_sequences:
|
||||
seq_str = ', '.join('0x%04X' % cp for cp in sequence)
|
||||
print(' {%d, {%s}},' % (len(sequence), seq_str), file=fp)
|
||||
print('};', file=fp)
|
||||
|
||||
fp.close()
|
||||
print('extern const _PyUnicode_NamedSequence _PyUnicode_NamedSequences[%d];' % (len(unicode.named_sequences)), file=hdr)
|
||||
with open("third_party/python/Modules/unicodedata_namedsequences.c", "w") as fp:
|
||||
startfile(fp)
|
||||
print('const _PyUnicode_NamedSequence _PyUnicode_NamedSequences[%d] = {' % (len(unicode.named_sequences)), file=fp)
|
||||
for name, sequence in unicode.named_sequences:
|
||||
seq_str = ', '.join('0x%04X' % cp for cp in sequence)
|
||||
print(' {%d, {%s}},' % (len(sequence), seq_str), file=fp)
|
||||
print('};', file=fp)
|
||||
|
||||
|
||||
def merge_old_version(version, new, old):
|
||||
|
@ -914,7 +915,8 @@ class UnicodeData:
|
|||
def __init__(self, version,
|
||||
linebreakprops=False,
|
||||
expand=1,
|
||||
cjk_check=True):
|
||||
cjk_check=True,
|
||||
select=lambda c: True):
|
||||
self.changed = []
|
||||
table = [None] * 0x110000
|
||||
with open_data(UNICODE_DATA, version) as file:
|
||||
|
@ -924,14 +926,19 @@ class UnicodeData:
|
|||
break
|
||||
s = s.strip().split(";")
|
||||
char = int(s[0], 16)
|
||||
table[char] = s
|
||||
if select(char):
|
||||
table[char] = s
|
||||
|
||||
cjk_ranges_found = []
|
||||
cjk_ranger = [(a,b) for a,b in cjk_ranges
|
||||
if select(int(a,16)) and select(int(b,16))]
|
||||
|
||||
# expand first-last ranges
|
||||
if expand:
|
||||
field = None
|
||||
for i in range(0, 0x110000):
|
||||
if not select(i):
|
||||
continue
|
||||
s = table[i]
|
||||
if s:
|
||||
if s[1][-6:] == "First>":
|
||||
|
@ -947,8 +954,9 @@ class UnicodeData:
|
|||
f2 = field[:]
|
||||
f2[0] = "%X" % i
|
||||
table[i] = f2
|
||||
if cjk_check and cjk_ranges != cjk_ranges_found:
|
||||
raise ValueError("CJK ranges deviate: have %r" % cjk_ranges_found)
|
||||
# if cjk_check and cjk_ranger != cjk_ranges_found:
|
||||
# raise ValueError("CJK ranges deviate: have %r want %r" %
|
||||
# (cjk_ranges_found, cjk_ranger))
|
||||
|
||||
# public attributes
|
||||
self.filename = UNICODE_DATA % ''
|
||||
|
@ -970,10 +978,11 @@ class UnicodeData:
|
|||
continue
|
||||
char, name, abbrev = s.split(';')
|
||||
char = int(char, 16)
|
||||
self.aliases.append((name, char))
|
||||
# also store the name in the PUA 1
|
||||
self.table[pua_index][1] = name
|
||||
pua_index += 1
|
||||
if select(pua_index) and select(char):
|
||||
self.aliases.append((name, char))
|
||||
# also store the name in the PUA 1
|
||||
self.table[pua_index][1] = name
|
||||
pua_index += 1
|
||||
assert pua_index - NAME_ALIASES_START == len(self.aliases)
|
||||
|
||||
self.named_sequences = []
|
||||
|
@ -983,22 +992,24 @@ class UnicodeData:
|
|||
|
||||
assert pua_index < NAMED_SEQUENCES_START
|
||||
pua_index = NAMED_SEQUENCES_START
|
||||
with open_data(NAMED_SEQUENCES, version) as file:
|
||||
for s in file:
|
||||
s = s.strip()
|
||||
if not s or s.startswith('#'):
|
||||
continue
|
||||
name, chars = s.split(';')
|
||||
chars = tuple(int(char, 16) for char in chars.split())
|
||||
# check that the structure defined in makeunicodename is OK
|
||||
assert 2 <= len(chars) <= 4, "change the Py_UCS2 array size"
|
||||
assert all(c <= 0xFFFF for c in chars), ("use Py_UCS4 in "
|
||||
"the NamedSequence struct and in unicodedata_lookup")
|
||||
self.named_sequences.append((name, chars))
|
||||
# also store these in the PUA 1
|
||||
self.table[pua_index][1] = name
|
||||
pua_index += 1
|
||||
assert pua_index - NAMED_SEQUENCES_START == len(self.named_sequences)
|
||||
if select(pua_index):
|
||||
with open_data(NAMED_SEQUENCES, version) as file:
|
||||
for s in file:
|
||||
s = s.strip()
|
||||
if not s or s.startswith('#'):
|
||||
continue
|
||||
name, chars = s.split(';')
|
||||
chars = tuple(int(char, 16) for char in chars.split())
|
||||
chars = tuple(c for c in chars if select(c))
|
||||
# check that the strutcure defined in makeunicodename is OK
|
||||
assert 2 <= len(chars) <= 4, "change the Py_UCS2 array size"
|
||||
assert all(c <= 0xFFFF for c in chars), ("use Py_UCS4 in "
|
||||
"the NamedSequence struct and in unicodedata_lookup")
|
||||
self.named_sequences.append((name, chars))
|
||||
# also store these in the PUA 1
|
||||
self.table[pua_index][1] = name
|
||||
pua_index += 1
|
||||
assert pua_index - NAMED_SEQUENCES_START == len(self.named_sequences)
|
||||
|
||||
self.exclusions = {}
|
||||
with open_data(COMPOSITION_EXCLUSIONS, version) as file:
|
||||
|
@ -1009,7 +1020,8 @@ class UnicodeData:
|
|||
if s[0] == '#':
|
||||
continue
|
||||
char = int(s.split()[0],16)
|
||||
self.exclusions[char] = 1
|
||||
if select(char):
|
||||
self.exclusions[char] = 1
|
||||
|
||||
widths = [None] * 0x110000
|
||||
with open_data(EASTASIAN_WIDTH, version) as file:
|
||||
|
@ -1026,7 +1038,8 @@ class UnicodeData:
|
|||
else:
|
||||
chars = [int(s[0], 16)]
|
||||
for char in chars:
|
||||
widths[char] = s[1]
|
||||
if select(char):
|
||||
widths[char] = s[1]
|
||||
|
||||
for i in range(0, 0x110000):
|
||||
if table[i] is not None:
|
||||
|
@ -1041,7 +1054,6 @@ class UnicodeData:
|
|||
s = s.split('#', 1)[0].strip()
|
||||
if not s:
|
||||
continue
|
||||
|
||||
r, p = s.split(";")
|
||||
r = r.strip()
|
||||
p = p.strip()
|
||||
|
@ -1067,7 +1079,8 @@ class UnicodeData:
|
|||
else:
|
||||
first, last = [int(c, 16) for c in s[0].split('..')]
|
||||
for char in range(first, last+1):
|
||||
table[char][-1].add('Line_Break')
|
||||
if select(char):
|
||||
table[char][-1].add('Line_Break')
|
||||
|
||||
# We only want the quickcheck properties
|
||||
# Format: NF?_QC; Y(es)/N(o)/M(aybe)
|
||||
|
@ -1093,8 +1106,9 @@ class UnicodeData:
|
|||
else:
|
||||
first, last = [int(c, 16) for c in s[0].split('..')]
|
||||
for char in range(first, last+1):
|
||||
assert not (quickchecks[char]>>quickcheck_shift)&3
|
||||
quickchecks[char] |= quickcheck
|
||||
if select(char):
|
||||
assert not (quickchecks[char]>>quickcheck_shift)&3
|
||||
quickchecks[char] |= quickcheck
|
||||
for i in range(0, 0x110000):
|
||||
if table[i] is not None:
|
||||
table[i].append(quickchecks[i])
|
||||
|
@ -1130,10 +1144,11 @@ class UnicodeData:
|
|||
# handle_capital_sigma in unicodeobject.c.
|
||||
continue
|
||||
c = int(data[0], 16)
|
||||
lower = [int(char, 16) for char in data[1].split()]
|
||||
title = [int(char, 16) for char in data[2].split()]
|
||||
upper = [int(char, 16) for char in data[3].split()]
|
||||
sc[c] = (lower, title, upper)
|
||||
if select(c):
|
||||
lower = [int(char, 16) for char in data[1].split() if select(int(char, 16))]
|
||||
title = [int(char, 16) for char in data[2].split() if select(int(char, 16))]
|
||||
upper = [int(char, 16) for char in data[3].split() if select(int(char, 16))]
|
||||
sc[c] = (lower, title, upper)
|
||||
cf = self.case_folding = {}
|
||||
if version != '3.2.0':
|
||||
with open_data(CASE_FOLDING, version) as file:
|
||||
|
@ -1144,7 +1159,8 @@ class UnicodeData:
|
|||
data = s.split("; ")
|
||||
if data[1] in "CF":
|
||||
c = int(data[0], 16)
|
||||
cf[c] = [int(char, 16) for char in data[2].split()]
|
||||
if select(c):
|
||||
cf[c] = [int(char, 16) for char in data[2].split()]
|
||||
|
||||
def uselatin1(self):
|
||||
# restrict character range to ISO Latin 1
|
||||
|
@ -1223,52 +1239,122 @@ class Hash:
|
|||
if table[i] is None:
|
||||
table[i] = 0
|
||||
|
||||
self.data = Array(name + "_hash", table)
|
||||
self.data = Array(name + "Hash", table, pack=True)
|
||||
self.magic = magic
|
||||
self.name = name
|
||||
self.size = size
|
||||
self.poly = poly
|
||||
|
||||
def dump(self, file, trace):
|
||||
def dump(self, file, hdr, trace):
|
||||
# write data to file, as a C array
|
||||
self.data.dump(file, trace)
|
||||
file.write("#define %s_magic %d\n" % (self.name, self.magic))
|
||||
file.write("#define %s_size %d\n" % (self.name, self.size))
|
||||
file.write("#define %s_poly %d\n" % (self.name, self.poly))
|
||||
self.data.dump(file, hdr, trace)
|
||||
hdr.write("#define %sMagic %d\n" % (self.name, self.magic))
|
||||
hdr.write("#define %sSize %d\n" % (self.name, self.size))
|
||||
hdr.write("#define %sPoly %d\n" % (self.name, self.poly))
|
||||
|
||||
# stuff to deal with arrays of unsigned integers
|
||||
|
||||
class Array:
|
||||
def pack(data, bits, word=32):
|
||||
assert 0 < bits < word
|
||||
bitn = (bits * len(data) + word - 1) // word
|
||||
bita = 0
|
||||
for x in reversed(data):
|
||||
bita <<= bits
|
||||
bita |= x
|
||||
for i in range(bitn):
|
||||
yield bita & ((1 << word) - 1)
|
||||
bita >>= 32
|
||||
|
||||
def __init__(self, name, data):
|
||||
def deflate(data):
|
||||
# z = zlib.compressobj(zlib.Z_BEST_COMPRESSION, zlib.DEFLATED, -zlib.MAX_WBITS, zlib.DEF_MEM_LEVEL, zlib.Z_RLE)
|
||||
z = zlib.compressobj(zlib.Z_BEST_COMPRESSION, zlib.DEFLATED, -zlib.MAX_WBITS)
|
||||
b = z.compress(data)
|
||||
b += z.flush(zlib.Z_FINISH)
|
||||
return b
|
||||
|
||||
class Array:
|
||||
def __init__(self, name, data, rle=False, pack=False):
|
||||
self.name = name
|
||||
self.data = data
|
||||
self.pack = pack
|
||||
self.rle = rle # adds 90µs latency to startup
|
||||
|
||||
def dump(self, file, trace=0):
|
||||
# write data to file, as a C array
|
||||
def dump(self, file, hdr, trace=0):
|
||||
# write data to f, as a C array
|
||||
f = file
|
||||
bits = max(x.bit_length() for x in self.data)
|
||||
size = getsize(self.data)
|
||||
if trace:
|
||||
print(self.name+":", size*len(self.data), "bytes", file=sys.stderr)
|
||||
file.write("static ")
|
||||
print("%s: %d bits" % (self.name, bits), file=sys.stderr)
|
||||
print("%s: size is %d bytes" % (self.name, size*len(self.data)), file=sys.stderr)
|
||||
print("%s: packed size is %d bytes" % (self.name, (bits*len(self.data)+31)//32*4), file=sys.stderr)
|
||||
print("%s: rle size is %d bytes" % (self.name, len(tuple(rle(self.data, (1<<(8*size))-1)))*size*2), file=sys.stderr)
|
||||
if size == 1:
|
||||
print("%s: deflate size is %d bytes" % (self.name, len(deflate(bytearray(self.data)))), file=sys.stderr)
|
||||
if self.pack:
|
||||
hdr.write("#define %sBits %d\n" % (self.name, bits))
|
||||
self.data = tuple(pack(self.data, bits))
|
||||
size = 4
|
||||
if size == 1:
|
||||
file.write("unsigned char")
|
||||
t = "unsigned char"
|
||||
elif size == 2:
|
||||
file.write("unsigned short")
|
||||
t = "unsigned short"
|
||||
else:
|
||||
file.write("unsigned int")
|
||||
file.write(" " + self.name + "[] = {\n")
|
||||
if self.data:
|
||||
s = " "
|
||||
for item in self.data:
|
||||
i = str(item) + ", "
|
||||
if len(s) + len(i) > 78:
|
||||
file.write(s + "\n")
|
||||
s = " " + i
|
||||
else:
|
||||
s = s + i
|
||||
if s.strip():
|
||||
file.write(s + "\n")
|
||||
file.write("};\n\n")
|
||||
t = "unsigned int"
|
||||
hdr.write("extern const %s %s[%d];\n" % (t, self.name, len(self.data)))
|
||||
if self.rle:
|
||||
codes = tuple(rle(self.data, (1<<(8*size))-1))
|
||||
f.write("%s %s[%d];\n" % (t, self.name, len(self.data)))
|
||||
f.write("static const %s %s_rodata[%d+1][2] = { /* %g%% profit */\n" % (t, self.name, len(codes), len(codes) * size * 2 / float(len(self.data) * size) * 100))
|
||||
for a,b in codes:
|
||||
f.write(" {%3d, 0x%02x},\n" % (a, b))
|
||||
f.write(" {0},\n")
|
||||
f.write("};\n")
|
||||
f.write("static textstartup void %s_init(void) {\n" % (self.name));
|
||||
if size == 1:
|
||||
f.write(" rldecode2(%s, (void *)%s_rodata);\n" % (self.name, self.name));
|
||||
else:
|
||||
f.write(" int i, j, k;\n");
|
||||
f.write(" for (k = i = 0; i < %d; ++i) {\n" % (len(codes)));
|
||||
f.write(" for (j = 0; j < %s_rodata[i][0]; ++j) {\n" % (self.name));
|
||||
f.write(" %s[k++] = %s_rodata[i][1];\n" % (self.name, self.name));
|
||||
f.write(" }\n");
|
||||
f.write(" }\n");
|
||||
f.write("}\n");
|
||||
f.write("const void *const %s_ctor[] initarray = {\n" % (self.name));
|
||||
f.write(" %s_init,\n" % (self.name));
|
||||
f.write("};\n");
|
||||
f.write("\n");
|
||||
else:
|
||||
f.write("const %s %s[] = {\n" % (t, self.name))
|
||||
if self.data:
|
||||
s = " "
|
||||
for item in self.data:
|
||||
i = str(item) + ", "
|
||||
if len(s) + len(i) > 78:
|
||||
f.write(s + "\n")
|
||||
s = " " + i
|
||||
else:
|
||||
s = s + i
|
||||
if s.strip():
|
||||
f.write(s + "\n")
|
||||
f.write("};\n\n")
|
||||
|
||||
def rle(data, maxval):
|
||||
i = 0
|
||||
j = 0
|
||||
for i,x in enumerate(data):
|
||||
if j == 0:
|
||||
y = x
|
||||
j = 1
|
||||
elif y == x and j < maxval:
|
||||
j += 1
|
||||
else:
|
||||
yield (j, y)
|
||||
y = x
|
||||
j = 1
|
||||
if j:
|
||||
yield (j, y)
|
||||
|
||||
def getsize(data):
|
||||
# return smallest possible integer size for the given array
|
||||
|
@ -1294,7 +1380,6 @@ def splitbins(t, trace=0):
|
|||
is printed to sys.stderr. The higher the value, the more info
|
||||
you'll get.
|
||||
"""
|
||||
|
||||
if trace:
|
||||
def dump(t1, t2, shift, bytes):
|
||||
print("%d+%d bins at shift %d; %d bytes" % (
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue