cosmopolitan/third_party/python/Lib/hashlib.py
Justine Tunney 559b024e1d Decentralize Python native module linkage
We can now link even smaller Python binaries. For example, the hello.com
program in the Python build directory is a compiled linked executable of
hello.py which just prints hello world. Using decentralized sections, we
can make that binary 1.9mb in size (noting that python.com is 6.3 megs!)

This works for nontrivial programs too. For example, say we want an APE
binary that's equivalent to python.com -m http.server. Our makefile now
builds such a binary using the new launcher and it's only 3.2mb in size
since Python sources get turned into ELF objects, which tell our linker
that we need things like native hashing algorithm code.
2021-09-07 11:40:11 -07:00

214 lines
7.7 KiB
Python

#. Copyright (C) 2005-2010 Gregory P. Smith (greg@krypto.org)
# Licensed to PSF under a Contributor Agreement.
#
__doc__ = """hashlib module - A common interface to many hash functions.
new(name, data=b'', **kwargs) - returns a new hash object implementing the
given hash function; initializing the hash
using the given binary data.
Named constructor functions are also available, these are faster
than using new(name):
md5(), sha1(), sha224(), sha256(), sha384(), sha512(), and blake2b256().
More algorithms may be available on your platform but the above are guaranteed
to exist. See the algorithms_guaranteed and algorithms_available attributes
to find out what algorithm names can be passed to new().
NOTE: If you want the adler32 or crc32 hash functions they are available in
the zlib module.
Choose your hash function wisely. Some have known collision weaknesses.
sha384 and sha512 will be slow on 32 bit platforms.
Hash objects have these methods:
- update(data): Update the hash object with the bytes in data. Repeated calls
are equivalent to a single call with the concatenation of all
the arguments.
- digest(): Return the digest of the bytes passed to the update() method
so far as a bytes object.
- hexdigest(): Like digest() except the digest is returned as a string
of double length, containing only hexadecimal digits.
- copy(): Return a copy (clone) of the hash object. This can be used to
efficiently compute the digests of datas that share a common
initial substring.
For example, to obtain the digest of the byte string 'Nobody inspects the
spammish repetition':
>>> import hashlib
>>> m = hashlib.md5()
>>> m.update(b"Nobody inspects")
>>> m.update(b" the spammish repetition")
>>> m.digest()
b'\\xbbd\\x9c\\x83\\xdd\\x1e\\xa5\\xc9\\xd9\\xde\\xc9\\xa1\\x8d\\xf0\\xff\\xe9'
More condensed:
>>> hashlib.sha224(b"Nobody inspects the spammish repetition").hexdigest()
'a4337bc45a8fc544c03f52dc550cd6e1e87021bc896588bd79e901e2'
"""
# This tuple and __get_builtin_constructor() must be modified if a new
# always available algorithm is added.
__always_supported = ('md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512',
'blake2b256')
algorithms_guaranteed = set(__always_supported)
algorithms_available = set(__always_supported)
__all__ = __always_supported + ('new', 'algorithms_guaranteed',
'algorithms_available', 'pbkdf2_hmac')
__builtin_constructor_cache = {}
def __get_builtin_constructor(name):
cache = __builtin_constructor_cache
constructor = cache.get(name)
if constructor is not None:
return constructor
if name in ('MD5', 'md5'):
import _md5
cache['MD5'] = cache['md5'] = _md5.md5
elif name in {'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512',
'shake_128', 'shake_256'}:
try:
import _sha3
cache['sha3_224'] = _sha3.sha3_224
cache['sha3_256'] = _sha3.sha3_256
cache['sha3_384'] = _sha3.sha3_384
cache['sha3_512'] = _sha3.sha3_512
cache['shake_128'] = _sha3.shake_128
cache['shake_256'] = _sha3.shake_256
except ImportError:
raise ValueError('unsupported hash type ' + name)
constructor = cache.get(name)
if constructor is not None:
return constructor
raise ValueError('unsupported hash type ' + name)
def __get_mbedtls_constructor(name):
try:
f = getattr(_hashlib, 'mbedtls_' + name)
# Allow the C module to raise ValueError. The function will be
# defined but the hash not actually available thanks to Mbedtls.
f()
# Use the C function directly (very fast)
return f
except (AttributeError, ValueError):
return __get_builtin_constructor(name)
def __py_new(name, data=b'', **kwargs):
"""new(name, data=b'', **kwargs) - Return a new hashing object using the
named algorithm; optionally initialized with data (which must be
a bytes-like object).
"""
return __get_builtin_constructor(name)(data, **kwargs)
def __hash_new(name, data=b'', **kwargs):
"""new(name, data=b'') - Return a new hashing object using the named algorithm;
optionally initialized with data (which must be a bytes-like object).
"""
try:
return _hashlib.new(name, data)
except ValueError:
# If the _hashlib module (Mbedtls) doesn't support the named
# hash, try using our builtin implementations.
# This allows for SHA224/256 and SHA384/512 support even though
# the Mbedtls library prior to 0.9.8 doesn't provide them.
return __get_builtin_constructor(name)(data)
import _hashlib
new = __hash_new
__get_hash = __get_mbedtls_constructor
algorithms_available = algorithms_available.union(
_hashlib.mbedtls_md_meth_names)
try:
# Mbedtls's PKCS5_PBKDF2_HMAC requires Mbedtls 1.0+ with HMAC and SHA
from _hashlib import pbkdf2_hmac
except ImportError:
_trans_5C = bytes((x ^ 0x5C) for x in range(256))
_trans_36 = bytes((x ^ 0x36) for x in range(256))
def pbkdf2_hmac(hash_name, password, salt, iterations, dklen=None):
"""Password based key derivation function 2 (PKCS #5 v2.0)
This Python implementations based on the hmac module about as fast
as Mbedtls's PKCS5_PBKDF2_HMAC for short passwords and much faster
for long passwords.
"""
if not isinstance(hash_name, str):
raise TypeError(hash_name)
if not isinstance(password, (bytes, bytearray)):
password = bytes(memoryview(password))
if not isinstance(salt, (bytes, bytearray)):
salt = bytes(memoryview(salt))
# Fast inline HMAC implementation
inner = new(hash_name)
outer = new(hash_name)
blocksize = getattr(inner, 'block_size', 64)
if len(password) > blocksize:
password = new(hash_name, password).digest()
password = password + b'\x00' * (blocksize - len(password))
inner.update(password.translate(_trans_36))
outer.update(password.translate(_trans_5C))
def prf(msg, inner=inner, outer=outer):
# PBKDF2_HMAC uses the password as key. We can re-use the same
# digest objects and just update copies to skip initialization.
icpy = inner.copy()
ocpy = outer.copy()
icpy.update(msg)
ocpy.update(icpy.digest())
return ocpy.digest()
if iterations < 1:
raise ValueError(iterations)
if dklen is None:
dklen = outer.digest_size
if dklen < 1:
raise ValueError(dklen)
dkey = b''
loop = 1
from_bytes = int.from_bytes
while len(dkey) < dklen:
prev = prf(salt + loop.to_bytes(4, 'big'))
# endianess doesn't matter here as long to / from use the same
rkey = int.from_bytes(prev, 'big')
for i in range(iterations - 1):
prev = prf(prev)
# rkey = rkey ^ prev
rkey ^= from_bytes(prev, 'big')
loop += 1
dkey += rkey.to_bytes(inner.digest_size, 'big')
return dkey[:dklen]
try:
# Mbedtls's scrypt requires Mbedtls 1.1+
from _hashlib import scrypt
except ImportError:
pass
md5 = __get_hash('md5')
sha1 = __get_hash('sha1')
sha224 = __get_hash('sha224')
sha256 = __get_hash('sha256')
sha384 = __get_hash('sha384')
sha512 = __get_hash('sha512')
blake2b256 = __get_hash('blake2b256')
# Cleanup locals()
del __always_supported, __get_hash
del __py_new, __hash_new, __get_mbedtls_constructor