Decentralize Python native module linkage

We can now link even smaller Python binaries. For example, the hello.com
program in the Python build directory is a compiled linked executable of
hello.py which just prints hello world. Using decentralized sections, we
can make that binary 1.9mb in size (noting that python.com is 6.3 megs!)

This works for nontrivial programs too. For example, say we want an APE
binary that's equivalent to python.com -m http.server. Our makefile now
builds such a binary using the new launcher and it's only 3.2mb in size
since Python sources get turned into ELF objects, which tell our linker
that we need things like native hashing algorithm code.
This commit is contained in:
Justine Tunney 2021-09-07 11:40:11 -07:00
parent dfa0359b50
commit 559b024e1d
129 changed files with 2798 additions and 13514 deletions

View file

@ -11,8 +11,7 @@ new(name, data=b'', **kwargs) - returns a new hash object implementing the
Named constructor functions are also available, these are faster
than using new(name):
md5(), sha1(), sha224(), sha256(), sha384(), sha512(),
sha3_224, sha3_256, sha3_384, sha3_512, shake_128, and shake_256.
md5(), sha1(), sha224(), sha256(), sha384(), sha512(), and blake2b256().
More algorithms may be available on your platform but the above are guaranteed
to exist. See the algorithms_guaranteed and algorithms_available attributes
@ -56,9 +55,7 @@ More condensed:
# This tuple and __get_builtin_constructor() must be modified if a new
# always available algorithm is added.
__always_supported = ('md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512',
'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512',
'shake_128', 'shake_256')
'blake2b256')
algorithms_guaranteed = set(__always_supported)
algorithms_available = set(__always_supported)
@ -73,23 +70,12 @@ def __get_builtin_constructor(name):
constructor = cache.get(name)
if constructor is not None:
return constructor
try:
if name in ('SHA1', 'sha1'):
import _sha1
cache['SHA1'] = cache['sha1'] = _sha1.sha1
elif name in ('MD5', 'md5'):
import _md5
cache['MD5'] = cache['md5'] = _md5.md5
elif name in ('SHA256', 'sha256', 'SHA224', 'sha224'):
import _sha256
cache['SHA224'] = cache['sha224'] = _sha256.sha224
cache['SHA256'] = cache['sha256'] = _sha256.sha256
elif name in ('SHA512', 'sha512', 'SHA384', 'sha384'):
import _sha512
cache['SHA384'] = cache['sha384'] = _sha512.sha384
cache['SHA512'] = cache['sha512'] = _sha512.sha512
elif name in {'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512',
'shake_128', 'shake_256'}:
if name in ('MD5', 'md5'):
import _md5
cache['MD5'] = cache['md5'] = _md5.md5
elif name in {'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512',
'shake_128', 'shake_256'}:
try:
import _sha3
cache['sha3_224'] = _sha3.sha3_224
cache['sha3_256'] = _sha3.sha3_256
@ -97,21 +83,19 @@ def __get_builtin_constructor(name):
cache['sha3_512'] = _sha3.sha3_512
cache['shake_128'] = _sha3.shake_128
cache['shake_256'] = _sha3.shake_256
except ImportError:
pass # no extension module, this hash is unsupported.
except ImportError:
raise ValueError('unsupported hash type ' + name)
constructor = cache.get(name)
if constructor is not None:
return constructor
raise ValueError('unsupported hash type ' + name)
def __get_openssl_constructor(name):
def __get_mbedtls_constructor(name):
try:
f = getattr(_hashlib, 'openssl_' + name)
f = getattr(_hashlib, 'mbedtls_' + name)
# Allow the C module to raise ValueError. The function will be
# defined but the hash not actually available thanks to OpenSSL.
# defined but the hash not actually available thanks to Mbedtls.
f()
# Use the C function directly (very fast)
return f
@ -134,25 +118,21 @@ def __hash_new(name, data=b'', **kwargs):
try:
return _hashlib.new(name, data)
except ValueError:
# If the _hashlib module (OpenSSL) doesn't support the named
# If the _hashlib module (Mbedtls) doesn't support the named
# hash, try using our builtin implementations.
# This allows for SHA224/256 and SHA384/512 support even though
# the OpenSSL library prior to 0.9.8 doesn't provide them.
# the Mbedtls library prior to 0.9.8 doesn't provide them.
return __get_builtin_constructor(name)(data)
try:
import _hashlib
new = __hash_new
__get_hash = __get_openssl_constructor
algorithms_available = algorithms_available.union(
_hashlib.openssl_md_meth_names)
except ImportError:
new = __py_new
__get_hash = __get_builtin_constructor
import _hashlib
new = __hash_new
__get_hash = __get_mbedtls_constructor
algorithms_available = algorithms_available.union(
_hashlib.mbedtls_md_meth_names)
try:
# OpenSSL's PKCS5_PBKDF2_HMAC requires OpenSSL 1.0+ with HMAC and SHA
# Mbedtls's PKCS5_PBKDF2_HMAC requires Mbedtls 1.0+ with HMAC and SHA
from _hashlib import pbkdf2_hmac
except ImportError:
_trans_5C = bytes((x ^ 0x5C) for x in range(256))
@ -162,7 +142,7 @@ except ImportError:
"""Password based key derivation function 2 (PKCS #5 v2.0)
This Python implementations based on the hmac module about as fast
as OpenSSL's PKCS5_PBKDF2_HMAC for short passwords and much faster
as Mbedtls's PKCS5_PBKDF2_HMAC for short passwords and much faster
for long passwords.
"""
if not isinstance(hash_name, str):
@ -216,26 +196,19 @@ except ImportError:
return dkey[:dklen]
try:
# OpenSSL's scrypt requires OpenSSL 1.1+
# Mbedtls's scrypt requires Mbedtls 1.1+
from _hashlib import scrypt
except ImportError:
pass
md5 = __get_hash('md5')
sha1 = __get_hash('sha1')
sha224 = __get_hash('sha224')
sha256 = __get_hash('sha256')
sha384 = __get_hash('sha384')
sha512 = __get_hash('sha512')
sha3_224 = __get_hash('sha3_224')
sha3_256 = __get_hash('sha3_256')
sha3_384 = __get_hash('sha3_384')
sha3_512 = __get_hash('sha3_512')
shake_128 = __get_hash('shake_128')
shake_256 = __get_hash('shake_256')
blake2b256 = __get_hash('blake2b256')
# Cleanup locals()
del __always_supported, __get_hash
del __py_new, __hash_new, __get_openssl_constructor
del __py_new, __hash_new, __get_mbedtls_constructor