Productionize new APE loader and more

The APE_NO_MODIFY_SELF loader payload has been moved out of the examples
folder and improved so that it works on BSD systems, and permits general
elf program headers. This brings its quality up enough that it should be
acceptable to use by default for many programs, e.g. Python, Lua, SQLite
and Python. It's the responsibility of the user to define an appropriate
TMPDIR if /tmp is considered an adversarial environment. Mac OS shall be
supported by APE_NO_MODIFY_SELF soon.

Fixes and improvements have been made to program_executable_name as it's
now the one true way to get the absolute path of the executing image.

This change fixes a memory leak in linenoise history loading, introduced
by performance optimizations in 51904e2687
This change fixes a longstanding regression with Mach system calls, that
23ae9dfceb back in February which impacted
our sched_yield() implementation, which is why no one noticed until now.

The Blinkenlights PC emulator has been improved. We now fix rendering on
XNU and BSD by not making the assumption that the kernel terminal driver
understands UTF8 since that seems to break its internal modeling of \r\n
which is now being addressed by using \e[𝑦H instead. The paneling is now
more compact in real mode so you won't need to make your font as tiny if
you're only emulating an 8086 program. The CLMUL ISA is now emulated too

This change also makes improvement to time. CLOCK_MONOTONIC now does the
right thing on Windows NT. The nanosecond time module functions added in
Python 3.7 have been backported.

This change doubles the performance of Argon2 password stretching simply
by not using its copy_block and xor_block helper functions, as they were
trivial to inline thus resulting in us needing to iterate over each 1024
byte block four fewer times.

This change makes code size improvements. _PyUnicode_ToNumeric() was 64k
in size and now it's 10k. The CJK codec lookup tables now use lazy delta
zigzag deflate (δzd) encoding which reduces their size from 600k to 200k
plus the code bloat caused by macro abuse in _decimal.c is now addressed
so our fully-loaded statically-linked hermetically-sealed Python virtual
interpreter container is now 9.4 megs in the default build mode and 5.5m
in MODE=tiny which leaves plenty of room for chibicc.

The pydoc web server now accommodates the use case of people who work by
SSH'ing into a different machine w/ python.com -m pydoc -p8080 -h0.0.0.0

Finally Python Capsulae delenda est and won't be supported in the future
This commit is contained in:
Justine Tunney 2021-10-02 08:17:04 -07:00
parent 9cb54218ab
commit 47a53e143b
270 changed files with 214544 additions and 23331 deletions

View file

@ -0,0 +1,212 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/assert.h"
#include "libc/fmt/leb128.h"
#include "libc/log/check.h"
#include "libc/nexgen32e/crc32.h"
#include "libc/runtime/runtime.h"
#include "libc/stdio/append.internal.h"
#include "libc/stdio/stdio.h"
#include "libc/x/x.h"
#include "third_party/zlib/zlib.h"
#define MAC(x) m(#x, x, sizeof(x))
#define DZD(x) dzd(#x, x, sizeof(x), sizeof(x[0]))
static void *Deflate(const void *data, size_t size, size_t *out_size) {
void *res;
z_stream zs;
zs.zfree = 0;
zs.zalloc = 0;
CHECK_EQ(Z_OK, deflateInit2(&zs, Z_BEST_COMPRESSION, Z_DEFLATED, -MAX_WBITS,
DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY));
zs.next_in = data;
zs.avail_in = size;
zs.avail_out = compressBound(size);
zs.next_out = res = xmalloc(zs.avail_out);
CHECK_EQ(Z_STREAM_END, deflate(&zs, Z_FINISH));
CHECK_EQ(Z_OK, deflateEnd(&zs));
*out_size = zs.total_out;
return xrealloc(res, zs.total_out);
}
void m(const char *s, void *p, size_t n) {
FILE *f;
size_t i, m;
unsigned char *q;
q = Deflate(p, n, &m);
f = fopen(xstrcat("third_party/python/Modules/cjkcodecs/", s, ".c"), "wb");
fprintf(f, "#include \"libc/x/x.h\"\n");
fprintf(f, "/* clang-format off */\n");
fprintf(f, "\n");
fprintf(f, "static bool %s_once;\n", s);
fprintf(f, "static void *%s_ptr;\n", s);
fprintf(f, "static const unsigned char %s_rodata[] = {", s);
for (i = 0; i < m; ++i) {
if (i % 13 == 0) fprintf(f, "\n ");
fprintf(f, " 0x%02x,", q[i]);
}
fprintf(f, "\n};\n");
fprintf(f, "\n");
fprintf(f, "optimizesize void *%s(void) {\n", s);
fprintf(f, " if (%s_once) return %s_ptr;\n", s, s);
fprintf(f, " return xload(&%s_once,\n", s);
fprintf(f, " &%s_ptr,\n", s);
fprintf(f, " %s_rodata,\n", s);
fprintf(f, " %d, %d); /* %g%% profit */\n", m, n,
(double)m / n * 100);
fprintf(f, "}\n");
fclose(f);
printf("\tthird_party/python/Modules/cjkcodecs/%s.c\t\\\n", s);
}
void dzd(const char *s, void *p, size_t n, size_t z) {
FILE *f;
uint32_t S;
size_t i, m;
int64_t x, y;
char *r, *q, *b, t[19];
S = crc32_z(0, p, n);
for (r = 0, y = i = 0; i < n / z; ++i) {
if (z == 2) {
x = ((const uint16_t *)p)[i];
} else if (z == 4) {
x = ((const uint32_t *)p)[i];
} else {
unreachable;
}
appendd(&r, t, zleb64(t, x - y) - t);
y = x;
}
q = Deflate(r, appendz(r).i, &m);
f = fopen(xstrcat("third_party/python/Modules/cjkcodecs/", s, ".c"), "wb");
fprintf(f, "#include \"libc/x/x.h\"\n");
fprintf(f, "/* clang-format off */\n");
fprintf(f, "\n");
fprintf(f, "static bool %s_once;\n", s);
fprintf(f, "static void *%s_ptr;\n", s);
fprintf(f, "static const unsigned char %s_rodata[%zu] = {", s, m);
for (i = 0; i < m; ++i) {
if (i % 13 == 0) fprintf(f, "\n ");
fprintf(f, " 0x%02x,", q[i] & 255);
}
fprintf(f, "\n};\n");
fprintf(f, "\n");
fprintf(f, "optimizesize void *%s(void) {\n", s);
fprintf(f, " if (%s_once) return %s_ptr;\n", s, s);
fprintf(f, " return xloadzd(&%s_once,\n", s);
fprintf(f, " &%s_ptr,\n", s);
fprintf(f, " %s_rodata,\n", s);
fprintf(f, " %d, %d, %d, %d, 0x%08xu); /* %g%% profit */\n",
m, appendz(r).i, n / z, z, S, (double)m / n * 100);
fprintf(f, "}\n");
fclose(f);
printf("\tthird_party/python/Modules/cjkcodecs/%s.c\t\\\n", s);
}
#define JISX0213_ENCPAIRS 46
#define UNIINV 0xFFFE
#define NOCHAR 0xFFFF
#define MULTIC 0xFFFE
#define DBCINV 0xFFFD
#define U UNIINV
#define N NOCHAR
#define M MULTIC
#define D DBCINV
typedef uint16_t ucs2_t;
typedef uint16_t DBCHAR;
typedef uint32_t Py_UCS4;
struct CjkIndex {
uint16_t map;
unsigned char bottom, top;
};
struct CjkPairEncodeMap {
uint32_t uniseq;
uint16_t code;
};
#include "mappings_cn.inc"
#include "mappings_hk.inc"
#include "mappings_jisx0213_pair.inc"
#include "mappings_jp.inc"
#include "mappings_kr.inc"
#include "mappings_tw.inc"
int main(int argc, char *argv[]) {
MAC(big5_decmap);
MAC(big5_encmap);
MAC(big5hkscs_bmp_encmap);
MAC(big5hkscs_decmap);
MAC(big5hkscs_nonbmp_encmap);
MAC(cp932ext_decmap);
MAC(cp932ext_encmap);
MAC(cp949_encmap);
MAC(cp949ext_decmap);
MAC(cp950ext_decmap);
MAC(cp950ext_encmap);
MAC(gb18030ext_decmap);
MAC(gb18030ext_encmap);
MAC(gb2312_decmap);
MAC(gbcommon_encmap);
MAC(gbkext_decmap);
MAC(jisx0208_decmap);
MAC(jisx0212_decmap);
MAC(jisx0213_1_bmp_decmap);
MAC(jisx0213_1_emp_decmap);
MAC(jisx0213_2_bmp_decmap);
MAC(jisx0213_2_emp_decmap);
MAC(jisx0213_bmp_encmap);
MAC(jisx0213_emp_encmap);
MAC(jisx0213_pair_decmap);
MAC(jisxcommon_encmap);
MAC(ksx1001_decmap);
MAC(jisx0213_pair_encmap);
DZD(__big5_decmap);
MAC(__big5_encmap);
MAC(__big5hkscs_bmp_encmap);
MAC(__big5hkscs_decmap);
MAC(__big5hkscs_nonbmp_encmap);
DZD(__cp932ext_decmap);
MAC(__cp932ext_encmap);
DZD(__cp949_encmap);
DZD(__cp949ext_decmap);
DZD(__cp950ext_decmap);
MAC(__cp950ext_encmap);
DZD(__gb18030ext_decmap);
DZD(__gb18030ext_encmap);
DZD(__gb2312_decmap);
DZD(__gbcommon_encmap);
DZD(__gbkext_decmap);
DZD(__jisx0208_decmap);
DZD(__jisx0212_decmap);
DZD(__jisx0213_1_bmp_decmap);
MAC(__jisx0213_1_emp_decmap);
DZD(__jisx0213_2_bmp_decmap);
MAC(__jisx0213_2_emp_decmap);
MAC(__jisx0213_bmp_encmap);
MAC(__jisx0213_emp_encmap);
DZD(__jisx0213_pair_decmap);
MAC(__jisxcommon_encmap);
DZD(__ksx1001_decmap);
return 0;
}

View file

@ -29,6 +29,7 @@
import os
import sys
import bz2
import zlib
import zipfile
@ -115,6 +116,9 @@ def bias(c):
return True
def maketables(trace=0):
if not os.path.isdir("third_party/python"):
print("please cd to cosmopolitan root")
sys.exit(1)
print("--- Reading", UNICODE_DATA % "", "...")
version = ""
unicode = UnicodeData(UNIDATA_VERSION, select=bias)
@ -399,6 +403,86 @@ def makeunicodedata(hdr, unicode, trace):
print(" }", file=fp)
print("}", file=fp)
def GenerateToNumeric(db, fp):
dubble=[]
normal=[]
astral=[]
for k,v in db:
dubble.append(k)
i = len(dubble) - 1
for c in v:
if c < 0x10000:
normal.append((c, i))
else:
astral.append((c, i))
if len(dubble) < 255:
t = 'uint8_t'
else:
t = 'uint16_t'
print('static const double kNumeric[] = {', file=fp)
for d in dubble:
print(' %s.,' % (d), file=fp)
print('};', file=fp)
print(file=fp)
normal.sort()
print('static const uint32_t kNumericCodes[] = {', file=fp)
for c,i in normal:
print(' 0x%04x,' % (c), file=fp)
print('};', file=fp)
print(file=fp)
print('static const %s kNumericIndices[] = {' % (t), file=fp)
for c,i in normal:
print(' %d,' % (i), file=fp)
print('};', file=fp)
print(file=fp)
astral.sort()
print('static const uint32_t kNumericAstralCodes[] = {', file=fp)
for c,i in astral:
print(' 0x%05x,' % (c), file=fp)
print('};', file=fp)
print(file=fp)
print('static const %s kNumericAstralIndices[] = {' % (t), file=fp)
for c,i in astral:
print(' %d,' % (i), file=fp)
print('};', file=fp)
print("""
/* Returns the numeric value as double for Unicode characters
* having this property, -1.0 otherwise.
*/
double _PyUnicode_ToNumeric(Py_UCS4 c)
{
int l, m, r;
if (c <= 0xFFFF) {
l = 0;
r = sizeof(kNumericCodes) / sizeof(kNumericCodes[0]) - 1;
while (l <= r) {
m = (l + r) >> 1;
if (kNumericCodes[m] < c) {
l = m + 1;
} else if (kNumericCodes[m] > c) {
r = m - 1;
} else {
return kNumeric[kNumericIndices[m]];
}
}
} else {
l = 0;
r = sizeof(kNumericAstralCodes) / sizeof(kNumericAstralCodes[0]) - 1;
while (l <= r) {
m = (l + r) >> 1;
if (kNumericAstralCodes[m] < c) {
l = m + 1;
} else if (kNumericAstralCodes[m] > c) {
r = m - 1;
} else {
return kNumeric[kNumericAstralIndices[m]];
}
}
}
return -1;
}
""", file=fp)
# --------------------------------------------------------------------
# unicode character type tables
@ -548,35 +632,7 @@ def makeunicodetype(hdr, unicode, trace):
with open("third_party/python/Modules/unicodedata_tonumeric.c", "w") as fp:
startfile(fp)
# Generate code for _PyUnicode_ToNumeric()
numeric_items = sorted(numeric.items())
print('/* Returns the numeric value as double for Unicode characters', file=fp)
print(' * having this property, -1.0 otherwise.', file=fp)
print(' */', file=fp)
print('double _PyUnicode_ToNumeric(Py_UCS4 ch)', file=fp)
print('{', file=fp)
print(' long a, b = 1;', file=fp)
print(' switch (ch) {', file=fp)
for value, codepoints in numeric_items:
# Turn text into float literals
parts = value.split('/')
codepoints.sort()
for codepoint in codepoints:
print(' case 0x%04X:' % (codepoint,), file=fp)
if len(parts) == 1:
print(' a = %s;' % (parts[0],), file=fp)
elif len(parts) == 2:
print(' a = %s;' % (parts[0],), file=fp)
print(' b = %s;' % (parts[1],), file=fp)
else:
assert False
print(' break;', file=fp)
print(' default:', file=fp)
print(' a = -1;', file=fp)
print(' break;', file=fp)
print(' }', file=fp)
print(' return (double)a / b;', file=fp)
print('}', file=fp)
GenerateToNumeric(sorted(numeric.items()), fp)
with open("third_party/python/Modules/unicodedata_iswhitespace.c", "w") as fp:
startfile(fp)
@ -671,7 +727,6 @@ def makeunicodename(hdr, unicode, trace):
# statistics
n = 0
print(short)
for i in range(short):
n = n + len(wordlist[i][1])
print(n, "short indexes in phrasebook")
@ -747,7 +802,7 @@ def makeunicodename(hdr, unicode, trace):
with open("third_party/python/Modules/unicodedata_lexicon.c", "w") as fp:
startfile(fp)
Array("_PyUnicode_Lexicon", lexicon).dump(fp, hdr, trace)
Array("_PyUnicode_LexiconOffset", lexicon_offset).dump(fp, hdr, trace)
Array("_PyUnicode_LexiconOffset", lexicon_offset, pack=True).dump(fp, hdr, trace)
# split decomposition index table
offset1, offset2, shift = splitbins(phrasebook_offset, trace)
@ -786,12 +841,10 @@ def makeunicodename(hdr, unicode, trace):
print(' {%d, {%s}},' % (len(sequence), seq_str), file=fp)
print('};', file=fp)
def merge_old_version(version, new, old):
# Changes to exclusion file not implemented yet
if old.exclusions != new.exclusions:
raise NotImplementedError("exclusions differ")
# In these change records, 0xFF means "no change"
bidir_changes = [0xFF]*0x110000
category_changes = [0xFF]*0x110000
@ -884,14 +937,18 @@ def merge_old_version(version, new, old):
normalization_changes))
def open_data(template, version):
local = template % ('-'+version,)
if not os.path.isdir('o/unicode'):
os.makedirs('o/unicode')
name = template % ('-'+version,)
local = os.path.join('o/unicode', name)
if not os.path.exists(local):
import urllib.request
if version == '3.2.0':
# irregular url structure
url = 'http://www.unicode.org/Public/3.2-Update/' + local
url = 'http://www.unicode.org/Public/3.2-Update/' + name
else:
url = ('http://www.unicode.org/Public/%s/ucd/'+template) % (version, '')
print('Downloading %s' % (url))
urllib.request.urlretrieve(url, filename=local)
if local.endswith('.txt'):
return open(local, encoding='utf-8')
@ -1265,6 +1322,24 @@ def pack(data, bits, word=32):
yield bita & ((1 << word) - 1)
bita >>= 32
def spack(data, bits, word=32):
assert 0 < bits < word
bitn = (bits * len(data) + word - 1) // word
bita = 0
sign = 1 << (bits - 1)
mask = sign - 1
for x in reversed(data):
assert -sign <= x < sign, "x=%d bits=%d" % (x, bits)
x = (x & mask) | (sign if x < 0 else 0)
bita <<= bits
bita |= x
for i in range(bitn):
yield bita & ((1 << word) - 1)
bita >>= 32
def bzip(data):
return bz2.compress(data)
def deflate(data):
# z = zlib.compressobj(zlib.Z_BEST_COMPRESSION, zlib.DEFLATED, -zlib.MAX_WBITS, zlib.DEF_MEM_LEVEL, zlib.Z_RLE)
z = zlib.compressobj(zlib.Z_BEST_COMPRESSION, zlib.DEFLATED, -zlib.MAX_WBITS)
@ -1272,12 +1347,82 @@ def deflate(data):
b += z.flush(zlib.Z_FINISH)
return b
def rle(data, maxval):
i = 0
j = 0
for i,x in enumerate(data):
if j == 0:
y = x
j = 1
elif y == x and j < maxval:
j += 1
else:
yield (j, y)
y = x
j = 1
if j:
yield (j, y)
def uleb(a, x):
while True:
b = x & 127
x >>= 7
if x:
a.append(b | 128)
else:
a.append(b)
break
def zig(x):
m = (2 << x.bit_length()) - 1
return ((x & (m >> 1)) << 1) ^ (m if x < 0 else 0)
def zleb(a, x):
return uleb(a, zig(x))
def sleb(a, x):
t = 0
while not t:
b = x & 127
x >>= 7
if (x == 0 and not (b & 64)) or (x == -1 and (b & 64)):
t = 1
else:
b |= 128
a.append(b)
def δleb(data):
i = 0
p = 0
a = bytearray()
for x in data:
sleb(a, x - p)
p = x
return a
def δzd(data):
n = 0;
i = 0
p = 0
a = bytearray()
for x in data:
zleb(a, x - p)
p = x
return deflate(a), len(a)
def com(x):
return '{:,}'.format(x)
class Array:
def __init__(self, name, data, rle=False, pack=False):
def __init__(self, name, data, rle=False, pack=False, δzd=False):
self.name = name
self.data = data
self.pack = pack
self.rle = rle # adds 90µs latency to startup
self.δzd = δzd
def to_bytes(self, size, order):
return b''.join(i.to_bytes(size, order) for i in self.data)
def dump(self, file, hdr, trace=0):
# write data to f, as a C array
@ -1285,16 +1430,22 @@ class Array:
bits = max(x.bit_length() for x in self.data)
size = getsize(self.data)
if trace:
print("%s: %d bits" % (self.name, bits), file=sys.stderr)
print("%s: size is %d bytes" % (self.name, size*len(self.data)), file=sys.stderr)
print("%s: packed size is %d bytes" % (self.name, (bits*len(self.data)+31)//32*4), file=sys.stderr)
print("%s: rle size is %d bytes" % (self.name, len(tuple(rle(self.data, (1<<(8*size))-1)))*size*2), file=sys.stderr)
if size == 1:
print("%s: deflate size is %d bytes" % (self.name, len(deflate(bytearray(self.data)))), file=sys.stderr)
print("%s: %d bits" % (self.name, bits))
print("%s: size is %12s bytes" % (self.name, com(size*len(self.data))))
print("%s: packed size is %12s bytes" % (self.name, com((bits*len(self.data)+31)//32*4)))
print("%s: rle size is %12s bytes" % (self.name, com(len(tuple(rle(self.data, (1<<(8*size))-1)))*size*2)))
print("%s: deflate size is %12s bytes" % (self.name, com(len(deflate(self.to_bytes(size, 'little'))))))
print("%s: bz2 size is %12s bytes" % (self.name, com(len(bzip(self.to_bytes(size, 'little'))))))
print("%s: δleb size is %12s bytes" % (self.name, com(len(δleb(self.data)))))
print("%s: δzd size is %12s bytes" % (self.name, com(len(δzd(self.data)[0]))))
if self.pack:
hdr.write("#define %sBits %d\n" % (self.name, bits))
self.data = tuple(pack(self.data, bits))
size = 4
if self.δzd:
m = size
self.data, n = δzd(self.data)
size = 1
if size == 1:
t = "unsigned char"
elif size == 2:
@ -1326,7 +1477,7 @@ class Array:
f.write("};\n");
f.write("\n");
else:
f.write("const %s %s[] = {\n" % (t, self.name))
f.write("const %s %s[%d] = {\n" % (t, self.name, len(self.data)))
if self.data:
s = " "
for item in self.data:
@ -1339,22 +1490,8 @@ class Array:
if s.strip():
f.write(s + "\n")
f.write("};\n\n")
def rle(data, maxval):
i = 0
j = 0
for i,x in enumerate(data):
if j == 0:
y = x
j = 1
elif y == x and j < maxval:
j += 1
else:
yield (j, y)
y = x
j = 1
if j:
yield (j, y)
if self.δzd:
f.write("/* %d %d */\n" % (n, m))
def getsize(data):
# return smallest possible integer size for the given array
@ -1383,9 +1520,9 @@ def splitbins(t, trace=0):
if trace:
def dump(t1, t2, shift, bytes):
print("%d+%d bins at shift %d; %d bytes" % (
len(t1), len(t2), shift, bytes), file=sys.stderr)
len(t1), len(t2), shift, bytes))
print("Size of original table:", len(t)*getsize(t), \
"bytes", file=sys.stderr)
"bytes")
n = len(t)-1 # last valid index
maxshift = 0 # the most we can shift n and still have something left
if n > 0:
@ -1417,7 +1554,7 @@ def splitbins(t, trace=0):
bytes = b
t1, t2, shift = best
if trace:
print("Best:", end=' ', file=sys.stderr)
print("Best:", end=' ')
dump(t1, t2, shift, bytes)
if __debug__:
# exhaustively verify that the decomposition is correct