Decentralize Python native module linkage

We can now link even smaller Python binaries. For example, the hello.com
program in the Python build directory is a compiled linked executable of
hello.py which just prints hello world. Using decentralized sections, we
can make that binary 1.9mb in size (noting that python.com is 6.3 megs!)

This works for nontrivial programs too. For example, say we want an APE
binary that's equivalent to python.com -m http.server. Our makefile now
builds such a binary using the new launcher and it's only 3.2mb in size
since Python sources get turned into ELF objects, which tell our linker
that we need things like native hashing algorithm code.
This commit is contained in:
Justine Tunney 2021-09-07 11:40:11 -07:00
parent dfa0359b50
commit 559b024e1d
129 changed files with 2798 additions and 13514 deletions

View file

@ -7,28 +7,15 @@ Don't import directly from third-party code; use the `locale` module instead!
import sys
import _locale
if sys.platform.startswith("win"):
def getpreferredencoding(do_setlocale=True):
return _locale._getdefaultlocale()[1]
else:
try:
_locale.CODESET
except AttributeError:
def getpreferredencoding(do_setlocale=True):
# This path for legacy systems needs the more complex
# getdefaultlocale() function, import the full locale module.
import locale
return locale.getpreferredencoding(do_setlocale)
else:
def getpreferredencoding(do_setlocale=True):
assert not do_setlocale
result = _locale.nl_langinfo(_locale.CODESET)
if not result and sys.platform in ('darwin', 'cosmo'):
# nl_langinfo can return an empty string
# when the setting has an invalid value.
# Default to UTF-8 in that case because
# UTF-8 is the default charset on OSX and
# returning nothing will crash the
# interpreter.
result = 'UTF-8'
return result
def getpreferredencoding(do_setlocale=True):
assert not do_setlocale
result = _locale.nl_langinfo(_locale.CODESET)
if not result and sys.platform in ('darwin', 'cosmo'):
# nl_langinfo can return an empty string
# when the setting has an invalid value.
# Default to UTF-8 in that case because
# UTF-8 is the default charset on OSX and
# returning nothing will crash the
# interpreter.
result = 'UTF-8'
return result

View file

@ -11,7 +11,7 @@ FUNCTIONS:
"""
import time
import locale
# import locale
import calendar
from re import compile as re_compile
from re import IGNORECASE
@ -28,7 +28,8 @@ __all__ = []
def _getlang():
# Figure out what the current language is set to.
return locale.getlocale(locale.LC_TIME)
# return locale.getlocale(locale.LC_TIME)
return (None, None)
class LocaleTime(object):
"""Stores and handles locale-specific information related to time.

View file

@ -39,17 +39,10 @@ from _weakref import proxy as _proxy
from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
from reprlib import recursive_repr as _recursive_repr
try:
from _collections import deque
except ImportError:
pass
else:
MutableSequence.register(deque)
from _collections import deque
MutableSequence.register(deque)
try:
from _collections import defaultdict
except ImportError:
pass
from _collections import defaultdict
################################################################################

View file

@ -11,8 +11,7 @@ new(name, data=b'', **kwargs) - returns a new hash object implementing the
Named constructor functions are also available, these are faster
than using new(name):
md5(), sha1(), sha224(), sha256(), sha384(), sha512(),
sha3_224, sha3_256, sha3_384, sha3_512, shake_128, and shake_256.
md5(), sha1(), sha224(), sha256(), sha384(), sha512(), and blake2b256().
More algorithms may be available on your platform but the above are guaranteed
to exist. See the algorithms_guaranteed and algorithms_available attributes
@ -56,9 +55,7 @@ More condensed:
# This tuple and __get_builtin_constructor() must be modified if a new
# always available algorithm is added.
__always_supported = ('md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512',
'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512',
'shake_128', 'shake_256')
'blake2b256')
algorithms_guaranteed = set(__always_supported)
algorithms_available = set(__always_supported)
@ -73,23 +70,12 @@ def __get_builtin_constructor(name):
constructor = cache.get(name)
if constructor is not None:
return constructor
try:
if name in ('SHA1', 'sha1'):
import _sha1
cache['SHA1'] = cache['sha1'] = _sha1.sha1
elif name in ('MD5', 'md5'):
import _md5
cache['MD5'] = cache['md5'] = _md5.md5
elif name in ('SHA256', 'sha256', 'SHA224', 'sha224'):
import _sha256
cache['SHA224'] = cache['sha224'] = _sha256.sha224
cache['SHA256'] = cache['sha256'] = _sha256.sha256
elif name in ('SHA512', 'sha512', 'SHA384', 'sha384'):
import _sha512
cache['SHA384'] = cache['sha384'] = _sha512.sha384
cache['SHA512'] = cache['sha512'] = _sha512.sha512
elif name in {'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512',
'shake_128', 'shake_256'}:
if name in ('MD5', 'md5'):
import _md5
cache['MD5'] = cache['md5'] = _md5.md5
elif name in {'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512',
'shake_128', 'shake_256'}:
try:
import _sha3
cache['sha3_224'] = _sha3.sha3_224
cache['sha3_256'] = _sha3.sha3_256
@ -97,21 +83,19 @@ def __get_builtin_constructor(name):
cache['sha3_512'] = _sha3.sha3_512
cache['shake_128'] = _sha3.shake_128
cache['shake_256'] = _sha3.shake_256
except ImportError:
pass # no extension module, this hash is unsupported.
except ImportError:
raise ValueError('unsupported hash type ' + name)
constructor = cache.get(name)
if constructor is not None:
return constructor
raise ValueError('unsupported hash type ' + name)
def __get_openssl_constructor(name):
def __get_mbedtls_constructor(name):
try:
f = getattr(_hashlib, 'openssl_' + name)
f = getattr(_hashlib, 'mbedtls_' + name)
# Allow the C module to raise ValueError. The function will be
# defined but the hash not actually available thanks to OpenSSL.
# defined but the hash not actually available thanks to Mbedtls.
f()
# Use the C function directly (very fast)
return f
@ -134,25 +118,21 @@ def __hash_new(name, data=b'', **kwargs):
try:
return _hashlib.new(name, data)
except ValueError:
# If the _hashlib module (OpenSSL) doesn't support the named
# If the _hashlib module (Mbedtls) doesn't support the named
# hash, try using our builtin implementations.
# This allows for SHA224/256 and SHA384/512 support even though
# the OpenSSL library prior to 0.9.8 doesn't provide them.
# the Mbedtls library prior to 0.9.8 doesn't provide them.
return __get_builtin_constructor(name)(data)
try:
import _hashlib
new = __hash_new
__get_hash = __get_openssl_constructor
algorithms_available = algorithms_available.union(
_hashlib.openssl_md_meth_names)
except ImportError:
new = __py_new
__get_hash = __get_builtin_constructor
import _hashlib
new = __hash_new
__get_hash = __get_mbedtls_constructor
algorithms_available = algorithms_available.union(
_hashlib.mbedtls_md_meth_names)
try:
# OpenSSL's PKCS5_PBKDF2_HMAC requires OpenSSL 1.0+ with HMAC and SHA
# Mbedtls's PKCS5_PBKDF2_HMAC requires Mbedtls 1.0+ with HMAC and SHA
from _hashlib import pbkdf2_hmac
except ImportError:
_trans_5C = bytes((x ^ 0x5C) for x in range(256))
@ -162,7 +142,7 @@ except ImportError:
"""Password based key derivation function 2 (PKCS #5 v2.0)
This Python implementations based on the hmac module about as fast
as OpenSSL's PKCS5_PBKDF2_HMAC for short passwords and much faster
as Mbedtls's PKCS5_PBKDF2_HMAC for short passwords and much faster
for long passwords.
"""
if not isinstance(hash_name, str):
@ -216,26 +196,19 @@ except ImportError:
return dkey[:dklen]
try:
# OpenSSL's scrypt requires OpenSSL 1.1+
# Mbedtls's scrypt requires Mbedtls 1.1+
from _hashlib import scrypt
except ImportError:
pass
md5 = __get_hash('md5')
sha1 = __get_hash('sha1')
sha224 = __get_hash('sha224')
sha256 = __get_hash('sha256')
sha384 = __get_hash('sha384')
sha512 = __get_hash('sha512')
sha3_224 = __get_hash('sha3_224')
sha3_256 = __get_hash('sha3_256')
sha3_384 = __get_hash('sha3_384')
sha3_512 = __get_hash('sha3_512')
shake_128 = __get_hash('shake_128')
shake_256 = __get_hash('shake_256')
blake2b256 = __get_hash('blake2b256')
# Cleanup locals()
del __always_supported, __get_hash
del __py_new, __hash_new, __get_openssl_constructor
del __py_new, __hash_new, __get_mbedtls_constructor

1
third_party/python/Lib/hello.py vendored Normal file
View file

@ -0,0 +1 @@
print("hello world")

View file

@ -1364,7 +1364,7 @@ def _get_supported_file_loaders():
extensions = ExtensionFileLoader, _imp.extension_suffixes()
source = SourceFileLoader, SOURCE_SUFFIXES
bytecode = SourcelessFileLoader, BYTECODE_SUFFIXES
return [extensions, bytecode, source]
return [bytecode, extensions, source]
def _setup(_bootstrap_module):

15
third_party/python/Lib/launchpy.py vendored Normal file
View file

@ -0,0 +1,15 @@
import sys
from importlib import _bootstrap_external
def run_module_as_main(mod_name):
path = "/zip/.python/%s.pyc" % (mod_name.replace(".", "/"))
loader = _bootstrap_external.SourcelessFileLoader(mod_name, path)
code = loader.get_code(mod_name)
globs = sys.modules["__main__"].__dict__
globs["__name__"] = "__main__"
globs["__file__"] = path
globs["__package__"] = None
globs["__loader__"] = loader
globs["__spec__"] = None
exec(code, globs)
return globs

View file

@ -1158,7 +1158,10 @@ def popen(cmd, mode="r", buffering=-1):
raise ValueError("invalid mode %r" % mode)
if buffering == 0 or buffering is None:
raise ValueError("popen() does not support unbuffered streams")
import subprocess, io
try:
import subprocess, io
except ImportError:
raise ImportError('please use subprocess module')
if mode == "r":
proc = subprocess.Popen(cmd,
shell=True,

View file

@ -2193,10 +2193,13 @@ def _start_server(urlhandler, port):
>>> print(serverthread.error)
None
"""
import http.server
import email.message
import select
import threading
try:
import http.server
import email.message
import select
import threading
except ImportError:
sys.exit(1)
class DocHandler(http.server.BaseHTTPRequestHandler):

View file

@ -626,7 +626,10 @@ def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
raise ValueError("bad value for 'compress', or compression format not "
"supported : {0}".format(compress))
import tarfile # late import for breaking circular dependency
try:
import tarfile
except ImportError:
raise
compress_ext = '.' + tar_compression if compress else ''
archive_name = base_name + '.tar' + compress_ext
@ -669,7 +672,10 @@ def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
The output zip file will be named 'base_name' + ".zip". Returns the
name of the output zip file.
"""
import zipfile # late import for breaking circular dependency
try:
import zipfile
except ImportError:
raise
zip_filename = base_name + ".zip"
archive_dir = os.path.dirname(base_name)
@ -877,7 +883,10 @@ def _ensure_directory(path):
def _unpack_zipfile(filename, extract_dir):
"""Unpack zip `filename` to `extract_dir`
"""
import zipfile # late import for breaking circular dependency
try:
import zipfile
except ImportError:
raise
if not zipfile.is_zipfile(filename):
raise ReadError("%s is not a zip file" % filename)
@ -911,7 +920,10 @@ def _unpack_zipfile(filename, extract_dir):
def _unpack_tarfile(filename, extract_dir):
"""Unpack tar/tar.gz/tar.bz2/tar.xz `filename` to `extract_dir`
"""
import tarfile # late import for breaking circular dependency
try:
import tarfile
except ImportError:
raise
try:
tarobj = tarfile.open(filename)
except tarfile.TarError:
@ -1003,22 +1015,6 @@ if hasattr(os, 'statvfs'):
used = (st.f_blocks - st.f_bfree) * st.f_frsize
return _ntuple_diskusage(total, used, free)
elif os.name == 'nt':
import nt
__all__.append('disk_usage')
_ntuple_diskusage = collections.namedtuple('usage', 'total used free')
def disk_usage(path):
"""Return disk usage statistics about the given path.
Returned values is a named tuple with attributes 'total', 'used' and
'free', which are the amount of total, used and free space, in bytes.
"""
total, free = nt._getdiskusage(path)
used = total - free
return _ntuple_diskusage(total, used, free)
def chown(path, user=None, group=None):
"""Change owner user and group of the given path.