initial import for Open Source 🎉
This commit is contained in:
parent
1898c361f3
commit
9c0dd3b722
2048 changed files with 218743 additions and 0 deletions
0
digest/__init__.py
Normal file
0
digest/__init__.py
Normal file
98
digest/checksums.py
Normal file
98
digest/checksums.py
Normal file
|
@ -0,0 +1,98 @@
|
|||
import hashlib
|
||||
import logging
|
||||
import tarfile
|
||||
|
||||
|
||||
TarError = tarfile.TarError
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def sha256_file(fp, data=None):
|
||||
h = hashlib.sha256(data or '')
|
||||
if not fp:
|
||||
return h.hexdigest()
|
||||
while True:
|
||||
buf = fp.read(4096)
|
||||
if not buf:
|
||||
break
|
||||
h.update(buf)
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def sha256_string(s):
|
||||
return hashlib.sha256(s).hexdigest()
|
||||
|
||||
|
||||
def compute_tarsum(fp, json_data):
|
||||
header_fields = ('name', 'mode', 'uid', 'gid', 'size', 'mtime',
|
||||
'type', 'linkname', 'uname', 'gname', 'devmajor',
|
||||
'devminor')
|
||||
tar = None
|
||||
hashes = []
|
||||
try:
|
||||
tar = tarfile.open(mode='r|*', fileobj=fp)
|
||||
for member in tar:
|
||||
header = ''
|
||||
for field in header_fields:
|
||||
value = getattr(member, field)
|
||||
if field == 'type':
|
||||
field = 'typeflag'
|
||||
elif field == 'name':
|
||||
if member.isdir() and not value.endswith('/'):
|
||||
value += '/'
|
||||
header += '{0}{1}'.format(field, value)
|
||||
h = None
|
||||
try:
|
||||
if member.size > 0:
|
||||
f = tar.extractfile(member)
|
||||
h = sha256_file(f, header)
|
||||
else:
|
||||
h = sha256_string(header)
|
||||
except KeyError:
|
||||
h = sha256_string(header)
|
||||
hashes.append(h)
|
||||
hashes.sort()
|
||||
except tarfile.ReadError as e:
|
||||
if e.message != 'empty file':
|
||||
# NOTE(samalba): ignore empty tarfiles but still let the tarsum
|
||||
# compute with json data
|
||||
raise
|
||||
finally:
|
||||
if tar:
|
||||
tar.close()
|
||||
data = json_data + ''.join(hashes)
|
||||
tarsum = 'tarsum+sha256:{0}'.format(sha256_string(data))
|
||||
logger.debug('checksums.compute_tarsum: return %s', tarsum)
|
||||
return tarsum
|
||||
|
||||
|
||||
def simple_checksum_handler(json_data):
|
||||
h = hashlib.sha256(json_data.encode('utf8') + '\n')
|
||||
|
||||
def fn(buf):
|
||||
h.update(buf)
|
||||
return h, fn
|
||||
|
||||
|
||||
def content_checksum_handler():
|
||||
h = hashlib.sha256()
|
||||
|
||||
def fn(buf):
|
||||
h.update(buf)
|
||||
return h, fn
|
||||
|
||||
|
||||
def compute_simple(fp, json_data):
|
||||
data = json_data + '\n'
|
||||
return 'sha256:{0}'.format(sha256_file(fp, data))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
if len(sys.argv) < 3:
|
||||
print 'Usage: {0} json_file layer_file'.format(sys.argv[0])
|
||||
sys.exit(1)
|
||||
json_data = file(sys.argv[1]).read()
|
||||
fp = open(sys.argv[2])
|
||||
print compute_simple(fp, json_data)
|
||||
print compute_tarsum(fp, json_data)
|
82
digest/digest_tools.py
Normal file
82
digest/digest_tools.py
Normal file
|
@ -0,0 +1,82 @@
|
|||
import re
|
||||
import os.path
|
||||
import hashlib
|
||||
|
||||
|
||||
DIGEST_PATTERN = r'([A-Za-z0-9_+.-]+):([A-Fa-f0-9]+)'
|
||||
REPLACE_WITH_PATH = re.compile(r'[+.]')
|
||||
REPLACE_DOUBLE_SLASHES = re.compile(r'/+')
|
||||
|
||||
class InvalidDigestException(RuntimeError):
|
||||
pass
|
||||
|
||||
|
||||
class Digest(object):
|
||||
DIGEST_REGEX = re.compile(DIGEST_PATTERN)
|
||||
|
||||
def __init__(self, hash_alg, hash_bytes):
|
||||
self._hash_alg = hash_alg
|
||||
self._hash_bytes = hash_bytes
|
||||
|
||||
def __str__(self):
|
||||
return '{0}:{1}'.format(self._hash_alg, self._hash_bytes)
|
||||
|
||||
def __eq__(self, rhs):
|
||||
return isinstance(rhs, Digest) and str(self) == str(rhs)
|
||||
|
||||
@staticmethod
|
||||
def parse_digest(digest):
|
||||
""" Returns the digest parsed out to its components. """
|
||||
match = Digest.DIGEST_REGEX.match(digest)
|
||||
if match is None or match.end() != len(digest):
|
||||
raise InvalidDigestException('Not a valid digest: %s', digest)
|
||||
|
||||
return Digest(match.group(1), match.group(2))
|
||||
|
||||
@property
|
||||
def hash_alg(self):
|
||||
return self._hash_alg
|
||||
|
||||
@property
|
||||
def hash_bytes(self):
|
||||
return self._hash_bytes
|
||||
|
||||
|
||||
def content_path(digest):
|
||||
""" Returns a relative path to the parsed digest. """
|
||||
parsed = Digest.parse_digest(digest)
|
||||
components = []
|
||||
|
||||
# Generate a prefix which is always two characters, and which will be filled with leading zeros
|
||||
# if the input does not contain at least two characters. e.g. ABC -> AB, A -> 0A
|
||||
prefix = parsed.hash_bytes[0:2].zfill(2)
|
||||
pathish = REPLACE_WITH_PATH.sub('/', parsed.hash_alg)
|
||||
normalized = REPLACE_DOUBLE_SLASHES.sub('/', pathish).lstrip('/')
|
||||
components.extend([normalized, prefix, parsed.hash_bytes])
|
||||
return os.path.join(*components)
|
||||
|
||||
|
||||
def sha256_digest(content):
|
||||
""" Returns a sha256 hash of the content bytes in digest form. """
|
||||
def single_chunk_generator():
|
||||
yield content
|
||||
return sha256_digest_from_generator(single_chunk_generator())
|
||||
|
||||
|
||||
def sha256_digest_from_generator(content_generator):
|
||||
""" Reads all of the data from the iterator and creates a sha256 digest from the content
|
||||
"""
|
||||
digest = hashlib.sha256()
|
||||
for chunk in content_generator:
|
||||
digest.update(chunk)
|
||||
return 'sha256:{0}'.format(digest.hexdigest())
|
||||
|
||||
|
||||
def sha256_digest_from_hashlib(sha256_hash_obj):
|
||||
return 'sha256:{0}'.format(sha256_hash_obj.hexdigest())
|
||||
|
||||
|
||||
def digests_equal(lhs_digest_string, rhs_digest_string):
|
||||
""" Parse and compare the two digests, returns True if the digests are equal, False otherwise.
|
||||
"""
|
||||
return Digest.parse_digest(lhs_digest_string) == Digest.parse_digest(rhs_digest_string)
|
43
digest/test/test_digest_tools.py
Normal file
43
digest/test/test_digest_tools.py
Normal file
|
@ -0,0 +1,43 @@
|
|||
import pytest
|
||||
|
||||
from digest.digest_tools import Digest, content_path, InvalidDigestException
|
||||
|
||||
@pytest.mark.parametrize('digest, output_args', [
|
||||
('tarsum.v123123+sha1:123deadbeef', ('tarsum.v123123+sha1', '123deadbeef')),
|
||||
('tarsum.v1+sha256:123123', ('tarsum.v1+sha256', '123123')),
|
||||
('tarsum.v0+md5:abc', ('tarsum.v0+md5', 'abc')),
|
||||
('tarsum+sha1:abc', ('tarsum+sha1', 'abc')),
|
||||
('sha1:123deadbeef', ('sha1', '123deadbeef')),
|
||||
('sha256:123123', ('sha256', '123123')),
|
||||
('md5:abc', ('md5', 'abc')),
|
||||
])
|
||||
def test_parse_good(digest, output_args):
|
||||
assert Digest.parse_digest(digest) == Digest(*output_args)
|
||||
assert str(Digest.parse_digest(digest)) == digest
|
||||
|
||||
|
||||
@pytest.mark.parametrize('bad_digest', [
|
||||
'tarsum.v+md5:abc:',
|
||||
'sha1:123deadbeefzxczxv',
|
||||
'sha256123123',
|
||||
'tarsum.v1+',
|
||||
'tarsum.v1123+sha1:',
|
||||
])
|
||||
def test_parse_fail(bad_digest):
|
||||
with pytest.raises(InvalidDigestException):
|
||||
Digest.parse_digest(bad_digest)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('digest, path', [
|
||||
('tarsum.v123123+sha1:123deadbeef', 'tarsum/v123123/sha1/12/123deadbeef'),
|
||||
('tarsum.v1+sha256:123123', 'tarsum/v1/sha256/12/123123'),
|
||||
('tarsum.v0+md5:abc', 'tarsum/v0/md5/ab/abc'),
|
||||
('sha1:123deadbeef', 'sha1/12/123deadbeef'),
|
||||
('sha256:123123', 'sha256/12/123123'),
|
||||
('md5:abc', 'md5/ab/abc'),
|
||||
('md5:1', 'md5/01/1'),
|
||||
('md5.....+++:1', 'md5/01/1'),
|
||||
('.md5.:1', 'md5/01/1'),
|
||||
])
|
||||
def test_paths(digest, path):
|
||||
assert content_path(digest) == path
|
Reference in a new issue