More changes for registry-v2 in python.

Implement the minimal changes to the local filesystem storage driver and feed them through the distributed storage driver.
Create a digest package which contains digest_tools and checksums.
Fix the tests to use the new v1 endpoint locations.
Fix repository.delete_instance to properly filter the generated queries to avoid most subquery deletes, but still generate them when not explicitly filtered.
This commit is contained in:
Jake Moshenko 2015-07-06 15:00:07 -04:00
parent acbcc2e206
commit bea8b9ac53
23 changed files with 397 additions and 179 deletions

0
digest/__init__.py Normal file
View file

91
digest/checksums.py Normal file
View file

@ -0,0 +1,91 @@
import hashlib
import logging
import tarfile
TarError = tarfile.TarError
logger = logging.getLogger(__name__)
def sha256_file(fp, data=None):
h = hashlib.sha256(data or '')
if not fp:
return h.hexdigest()
while True:
buf = fp.read(4096)
if not buf:
break
h.update(buf)
return h.hexdigest()
def sha256_string(s):
return hashlib.sha256(s).hexdigest()
def compute_tarsum(fp, json_data):
header_fields = ('name', 'mode', 'uid', 'gid', 'size', 'mtime',
'type', 'linkname', 'uname', 'gname', 'devmajor',
'devminor')
tar = None
hashes = []
try:
tar = tarfile.open(mode='r|*', fileobj=fp)
for member in tar:
header = ''
for field in header_fields:
value = getattr(member, field)
if field == 'type':
field = 'typeflag'
elif field == 'name':
if member.isdir() and not value.endswith('/'):
value += '/'
header += '{0}{1}'.format(field, value)
h = None
try:
if member.size > 0:
f = tar.extractfile(member)
h = sha256_file(f, header)
else:
h = sha256_string(header)
except KeyError:
h = sha256_string(header)
hashes.append(h)
hashes.sort()
except tarfile.ReadError as e:
if e.message != 'empty file':
# NOTE(samalba): ignore empty tarfiles but still let the tarsum
# compute with json data
raise
finally:
if tar:
tar.close()
data = json_data + ''.join(hashes)
tarsum = 'tarsum+sha256:{0}'.format(sha256_string(data))
logger.debug('checksums.compute_tarsum: return {0}'.format(tarsum))
return tarsum
def simple_checksum_handler(json_data):
h = hashlib.sha256(json_data + '\n')
def fn(buf):
h.update(buf)
return h, fn
def compute_simple(fp, json_data):
data = json_data + '\n'
return 'sha256:{0}'.format(sha256_file(fp, data))
if __name__ == '__main__':
import sys
if len(sys.argv) < 3:
print 'Usage: {0} json_file layer_file'.format(sys.argv[0])
sys.exit(1)
json_data = file(sys.argv[1]).read()
fp = open(sys.argv[2])
print compute_simple(fp, json_data)
print compute_tarsum(fp, json_data)

62
digest/digest_tools.py Normal file
View file

@ -0,0 +1,62 @@
import re
import os.path
import hashlib
from collections import namedtuple
Digest = namedtuple('Digest', ['is_tarsum', 'tarsum_version', 'hash_alg', 'hash_bytes'])
DIGEST_PATTERN = r'(tarsum\.(v[\w]+)\+)?([\w]+):([0-9a-f]+)'
DIGEST_REGEX = re.compile(DIGEST_PATTERN)
class InvalidDigestException(RuntimeError):
pass
def parse_digest(digest):
""" Returns the digest parsed out to its components. """
match = DIGEST_REGEX.match(digest)
if match is None or match.end() != len(digest):
raise InvalidDigestException('Not a valid digest: %s', digest)
is_tarsum = match.group(1) is not None
return Digest(is_tarsum, match.group(2), match.group(3), match.group(4))
def content_path(digest):
""" Returns a relative path to the parsed digest. """
parsed = parse_digest(digest)
components = []
if parsed.is_tarsum:
components.extend(['tarsum', parsed.tarsum_version])
prefix = parsed.hash_bytes[0:2].zfill(2)
components.extend([parsed.hash_alg, prefix, parsed.hash_bytes])
return os.path.join(*components)
def sha256_digest(content):
""" Returns a sha256 hash of the content bytes in digest form. """
def single_chunk_generator():
yield content
return sha256_digest_from_generator(single_chunk_generator())
def sha256_digest_from_generator(content_generator):
""" Reads all of the data from the iterator and creates a sha256 digest from the content
"""
digest = hashlib.sha256()
for chunk in content_generator:
digest.update(chunk)
return 'sha256:{0}'.format(digest.hexdigest())
def digests_equal(lhs_digest_string, rhs_digest_string):
""" Parse and compare the two digests, returns True if the digests are equal, False otherwise.
"""
return parse_digest(lhs_digest_string) == parse_digest(rhs_digest_string)