initial import for Open Source 🎉

This commit is contained in:
Jimmy Zelinskie 2019-11-12 11:09:47 -05:00
parent 1898c361f3
commit 9c0dd3b722
2048 changed files with 218743 additions and 0 deletions

0
digest/__init__.py Normal file
View file

98
digest/checksums.py Normal file
View file

@ -0,0 +1,98 @@
import hashlib
import logging
import tarfile
TarError = tarfile.TarError
logger = logging.getLogger(__name__)
def sha256_file(fp, data=None):
h = hashlib.sha256(data or '')
if not fp:
return h.hexdigest()
while True:
buf = fp.read(4096)
if not buf:
break
h.update(buf)
return h.hexdigest()
def sha256_string(s):
return hashlib.sha256(s).hexdigest()
def compute_tarsum(fp, json_data):
header_fields = ('name', 'mode', 'uid', 'gid', 'size', 'mtime',
'type', 'linkname', 'uname', 'gname', 'devmajor',
'devminor')
tar = None
hashes = []
try:
tar = tarfile.open(mode='r|*', fileobj=fp)
for member in tar:
header = ''
for field in header_fields:
value = getattr(member, field)
if field == 'type':
field = 'typeflag'
elif field == 'name':
if member.isdir() and not value.endswith('/'):
value += '/'
header += '{0}{1}'.format(field, value)
h = None
try:
if member.size > 0:
f = tar.extractfile(member)
h = sha256_file(f, header)
else:
h = sha256_string(header)
except KeyError:
h = sha256_string(header)
hashes.append(h)
hashes.sort()
except tarfile.ReadError as e:
if e.message != 'empty file':
# NOTE(samalba): ignore empty tarfiles but still let the tarsum
# compute with json data
raise
finally:
if tar:
tar.close()
data = json_data + ''.join(hashes)
tarsum = 'tarsum+sha256:{0}'.format(sha256_string(data))
logger.debug('checksums.compute_tarsum: return %s', tarsum)
return tarsum
def simple_checksum_handler(json_data):
h = hashlib.sha256(json_data.encode('utf8') + '\n')
def fn(buf):
h.update(buf)
return h, fn
def content_checksum_handler():
h = hashlib.sha256()
def fn(buf):
h.update(buf)
return h, fn
def compute_simple(fp, json_data):
data = json_data + '\n'
return 'sha256:{0}'.format(sha256_file(fp, data))
if __name__ == '__main__':
import sys
if len(sys.argv) < 3:
print 'Usage: {0} json_file layer_file'.format(sys.argv[0])
sys.exit(1)
json_data = file(sys.argv[1]).read()
fp = open(sys.argv[2])
print compute_simple(fp, json_data)
print compute_tarsum(fp, json_data)

82
digest/digest_tools.py Normal file
View file

@ -0,0 +1,82 @@
import re
import os.path
import hashlib
DIGEST_PATTERN = r'([A-Za-z0-9_+.-]+):([A-Fa-f0-9]+)'
REPLACE_WITH_PATH = re.compile(r'[+.]')
REPLACE_DOUBLE_SLASHES = re.compile(r'/+')
class InvalidDigestException(RuntimeError):
pass
class Digest(object):
DIGEST_REGEX = re.compile(DIGEST_PATTERN)
def __init__(self, hash_alg, hash_bytes):
self._hash_alg = hash_alg
self._hash_bytes = hash_bytes
def __str__(self):
return '{0}:{1}'.format(self._hash_alg, self._hash_bytes)
def __eq__(self, rhs):
return isinstance(rhs, Digest) and str(self) == str(rhs)
@staticmethod
def parse_digest(digest):
""" Returns the digest parsed out to its components. """
match = Digest.DIGEST_REGEX.match(digest)
if match is None or match.end() != len(digest):
raise InvalidDigestException('Not a valid digest: %s', digest)
return Digest(match.group(1), match.group(2))
@property
def hash_alg(self):
return self._hash_alg
@property
def hash_bytes(self):
return self._hash_bytes
def content_path(digest):
""" Returns a relative path to the parsed digest. """
parsed = Digest.parse_digest(digest)
components = []
# Generate a prefix which is always two characters, and which will be filled with leading zeros
# if the input does not contain at least two characters. e.g. ABC -> AB, A -> 0A
prefix = parsed.hash_bytes[0:2].zfill(2)
pathish = REPLACE_WITH_PATH.sub('/', parsed.hash_alg)
normalized = REPLACE_DOUBLE_SLASHES.sub('/', pathish).lstrip('/')
components.extend([normalized, prefix, parsed.hash_bytes])
return os.path.join(*components)
def sha256_digest(content):
""" Returns a sha256 hash of the content bytes in digest form. """
def single_chunk_generator():
yield content
return sha256_digest_from_generator(single_chunk_generator())
def sha256_digest_from_generator(content_generator):
""" Reads all of the data from the iterator and creates a sha256 digest from the content
"""
digest = hashlib.sha256()
for chunk in content_generator:
digest.update(chunk)
return 'sha256:{0}'.format(digest.hexdigest())
def sha256_digest_from_hashlib(sha256_hash_obj):
return 'sha256:{0}'.format(sha256_hash_obj.hexdigest())
def digests_equal(lhs_digest_string, rhs_digest_string):
""" Parse and compare the two digests, returns True if the digests are equal, False otherwise.
"""
return Digest.parse_digest(lhs_digest_string) == Digest.parse_digest(rhs_digest_string)

View file

@ -0,0 +1,43 @@
import pytest
from digest.digest_tools import Digest, content_path, InvalidDigestException
@pytest.mark.parametrize('digest, output_args', [
('tarsum.v123123+sha1:123deadbeef', ('tarsum.v123123+sha1', '123deadbeef')),
('tarsum.v1+sha256:123123', ('tarsum.v1+sha256', '123123')),
('tarsum.v0+md5:abc', ('tarsum.v0+md5', 'abc')),
('tarsum+sha1:abc', ('tarsum+sha1', 'abc')),
('sha1:123deadbeef', ('sha1', '123deadbeef')),
('sha256:123123', ('sha256', '123123')),
('md5:abc', ('md5', 'abc')),
])
def test_parse_good(digest, output_args):
assert Digest.parse_digest(digest) == Digest(*output_args)
assert str(Digest.parse_digest(digest)) == digest
@pytest.mark.parametrize('bad_digest', [
'tarsum.v+md5:abc:',
'sha1:123deadbeefzxczxv',
'sha256123123',
'tarsum.v1+',
'tarsum.v1123+sha1:',
])
def test_parse_fail(bad_digest):
with pytest.raises(InvalidDigestException):
Digest.parse_digest(bad_digest)
@pytest.mark.parametrize('digest, path', [
('tarsum.v123123+sha1:123deadbeef', 'tarsum/v123123/sha1/12/123deadbeef'),
('tarsum.v1+sha256:123123', 'tarsum/v1/sha256/12/123123'),
('tarsum.v0+md5:abc', 'tarsum/v0/md5/ab/abc'),
('sha1:123deadbeef', 'sha1/12/123deadbeef'),
('sha256:123123', 'sha256/12/123123'),
('md5:abc', 'md5/ab/abc'),
('md5:1', 'md5/01/1'),
('md5.....+++:1', 'md5/01/1'),
('.md5.:1', 'md5/01/1'),
])
def test_paths(digest, path):
assert content_path(digest) == path