From 99e5429e8644396182e4023cfaa548386496c444 Mon Sep 17 00:00:00 2001 From: Jake Moshenko Date: Fri, 6 Nov 2015 17:47:28 -0500 Subject: [PATCH] Relax the digest specification to handle more formats --- digest/digest_tools.py | 30 ++++++++---------------------- test/test_digest_tools.py | 19 ++++++++++--------- 2 files changed, 18 insertions(+), 31 deletions(-) diff --git a/digest/digest_tools.py b/digest/digest_tools.py index 6c3f444dc..212088236 100644 --- a/digest/digest_tools.py +++ b/digest/digest_tools.py @@ -3,8 +3,9 @@ import os.path import hashlib -DIGEST_PATTERN = r'(tarsum\.(v[\w]+)\+)?([\w]+):([0-9a-f]+)' - +DIGEST_PATTERN = r'([A-Za-z0-9_+.-]+):([A-Fa-f0-9]+)' +REPLACE_WITH_PATH = re.compile(r'[+.]') +REPLACE_DOUBLE_SLASHES = re.compile(r'/+') class InvalidDigestException(RuntimeError): pass @@ -13,15 +14,11 @@ class InvalidDigestException(RuntimeError): class Digest(object): DIGEST_REGEX = re.compile(DIGEST_PATTERN) - def __init__(self, hash_alg, hash_bytes, is_tarsum=False, tarsum_version=None): + def __init__(self, hash_alg, hash_bytes): self._hash_alg = hash_alg self._hash_bytes = hash_bytes - self._is_tarsum = is_tarsum - self._tarsum_version = tarsum_version def __str__(self): - if self._is_tarsum: - return 'tarsum.{0}+{1}:{2}'.format(self._tarsum_version, self._hash_alg, self._hash_bytes) return '{0}:{1}'.format(self._hash_alg, self._hash_bytes) def __eq__(self, rhs): @@ -34,16 +31,7 @@ class Digest(object): if match is None or match.end() != len(digest): raise InvalidDigestException('Not a valid digest: %s', digest) - is_tarsum = match.group(1) is not None - return Digest(match.group(3), match.group(4), is_tarsum, match.group(2)) - - @property - def is_tarsum(self): - return self._is_tarsum - - @property - def tarsum_version(self): - return self._tarsum_version + return Digest(match.group(1), match.group(2)) @property def hash_alg(self): @@ -59,14 +47,12 @@ def content_path(digest): parsed = Digest.parse_digest(digest) components = [] - if parsed.is_tarsum: - components.extend(['tarsum', parsed.tarsum_version]) - # Generate a prefix which is always two characters, and which will be filled with leading zeros # if the input does not contain at least two characters. e.g. ABC -> AB, A -> 0A prefix = parsed.hash_bytes[0:2].zfill(2) - components.extend([parsed.hash_alg, prefix, parsed.hash_bytes]) - + pathish = REPLACE_WITH_PATH.sub('/', parsed.hash_alg) + normalized = REPLACE_DOUBLE_SLASHES.sub('/', pathish).lstrip('/') + components.extend([normalized, prefix, parsed.hash_bytes]) return os.path.join(*components) diff --git a/test/test_digest_tools.py b/test/test_digest_tools.py index 2ba2c6aec..8fcb71237 100644 --- a/test/test_digest_tools.py +++ b/test/test_digest_tools.py @@ -5,12 +5,13 @@ from digest.digest_tools import Digest, content_path, InvalidDigestException class TestParseDigest(unittest.TestCase): def test_parse_good(self): examples = [ - ('tarsum.v123123+sha1:123deadbeef', ('sha1', '123deadbeef', True, 'v123123')), - ('tarsum.v1+sha256:123123', ('sha256', '123123', True, 'v1')), - ('tarsum.v0+md5:abc', ('md5', 'abc', True, 'v0')), - ('sha1:123deadbeef', ('sha1', '123deadbeef', False, None)), - ('sha256:123123', ('sha256', '123123', False, None)), - ('md5:abc', ('md5', 'abc', False, None)), + ('tarsum.v123123+sha1:123deadbeef', ('tarsum.v123123+sha1', '123deadbeef')), + ('tarsum.v1+sha256:123123', ('tarsum.v1+sha256', '123123')), + ('tarsum.v0+md5:abc', ('tarsum.v0+md5', 'abc')), + ('tarsum+sha1:abc', ('tarsum+sha1', 'abc')), + ('sha1:123deadbeef', ('sha1', '123deadbeef')), + ('sha256:123123', ('sha256', '123123')), + ('md5:abc', ('md5', 'abc')), ] for digest, output_args in examples: @@ -21,9 +22,7 @@ class TestParseDigest(unittest.TestCase): def test_parse_fail(self): examples = [ - 'tarsum.v++sha1:123deadbeef', - '.v1+sha256:123123', - 'tarsum.v+md5:abc', + 'tarsum.v+md5:abc:', 'sha1:123deadbeefzxczxv', 'sha256123123', 'tarsum.v1+', @@ -45,6 +44,8 @@ class TestDigestPath(unittest.TestCase): ('sha256:123123', 'sha256/12/123123'), ('md5:abc', 'md5/ab/abc'), ('md5:1', 'md5/01/1'), + ('md5.....+++:1', 'md5/01/1'), + ('.md5.:1', 'md5/01/1'), ] for digest, path in examples: