diff --git a/image/docker/interfaces.py b/image/docker/interfaces.py index d34e1e140..008a7eb23 100644 --- a/image/docker/interfaces.py +++ b/image/docker/interfaces.py @@ -71,14 +71,13 @@ class ManifestInterface(object): """ @abstractmethod - def child_manifests(self, lookup_manifest_fn): + def child_manifests(self, content_retriever): """ Returns an iterator of all manifests that live under this manifest, if any or None if not - applicable. The lookup_manifest_fn is a function that, when given a blob content SHA, - returns the contents of that blob in storage if any or None if none. + applicable. """ @abstractmethod - def get_manifest_labels(self, lookup_config_fn): + def get_manifest_labels(self, content_retriever): """ Returns a dictionary of all the labels defined inside this manifest or None if this kind of manifest does not support labels. """ pass @@ -88,7 +87,7 @@ class ManifestInterface(object): """ Returns an unsigned version of this manifest. """ @abstractmethod - def generate_legacy_layers(self, images_map, lookup_config_fn): + def generate_legacy_layers(self, images_map, content_retriever): """ Rewrites Docker v1 image IDs and returns a generator of DockerV1Metadata. @@ -100,7 +99,19 @@ class ManifestInterface(object): """ @abstractmethod - def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, lookup_fn): + def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, content_retriever): """ Returns the manifest that is compatible with V1, by virtue of being `amd64` and `linux`. If none, returns None. """ + + +@add_metaclass(ABCMeta) +class ContentRetriever(object): + """ Defines the interface for retrieval of various content referneced by a manifest. """ + @abstractmethod + def get_manifest_bytes_with_digest(self, digest): + """ Returns the bytes of the manifest with the given digest or None if none found. """ + + @abstractmethod + def get_blob_bytes_with_digest(self, digest): + """ Returns the bytes of the blob with the given digest or None if none found. """ diff --git a/image/docker/schema1.py b/image/docker/schema1.py index e5698151d..23abb3a84 100644 --- a/image/docker/schema1.py +++ b/image/docker/schema1.py @@ -314,10 +314,10 @@ class DockerSchema1Manifest(ManifestInterface): def local_blob_digests(self): return self.blob_digests - def child_manifests(self, lookup_manifest_fn): + def child_manifests(self, content_retriever): return None - def get_manifest_labels(self, lookup_config_fn): + def get_manifest_labels(self, content_retriever): return self.layers[-1].v1_metadata.labels def unsigned(self): @@ -374,10 +374,10 @@ class DockerSchema1Manifest(ManifestInterface): signed_content_tail = base64url_decode(str(parsed_protected[DOCKER_SCHEMA1_FORMAT_TAIL_KEY])) return signed_content_head + signed_content_tail - def generate_legacy_layers(self, images_map, lookup_config_fn): + def generate_legacy_layers(self, images_map, content_retriever): return self.rewrite_invalid_image_ids(images_map) - def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, lookup_fn): + def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, content_retriever): """ Returns the manifest that is compatible with V1, by virtue of being `amd64` and `linux`. If none, returns None. """ diff --git a/image/docker/schema2/list.py b/image/docker/schema2/list.py index 0b068f0c6..6658f74e9 100644 --- a/image/docker/schema2/list.py +++ b/image/docker/schema2/list.py @@ -40,9 +40,9 @@ class MalformedSchema2ManifestList(Exception): class LazyManifestLoader(object): - def __init__(self, manifest_data, lookup_manifest_fn): + def __init__(self, manifest_data, content_retriever): self._manifest_data = manifest_data - self._lookup_manifest_fn = lookup_manifest_fn + self._content_retriever = content_retriever self._loaded_manifest = None @property @@ -56,7 +56,7 @@ class LazyManifestLoader(object): def _load_manifest(self): digest = self._manifest_data[DOCKER_SCHEMA2_MANIFESTLIST_DIGEST_KEY] size = self._manifest_data[DOCKER_SCHEMA2_MANIFESTLIST_SIZE_KEY] - manifest_bytes = self._lookup_manifest_fn(digest) + manifest_bytes = self._content_retriever.get_manifest_bytes_with_digest(digest) if manifest_bytes is None: raise MalformedSchema2ManifestList('Could not find child manifest with digest `%s`' % digest) @@ -237,24 +237,23 @@ class DockerSchema2ManifestList(ManifestInterface): return None @lru_cache(maxsize=1) - def manifests(self, lookup_manifest_fn): - """ Returns the manifests in the list. The `lookup_manifest_fn` is a function - that returns the manifest bytes for the specified digest. + def manifests(self, content_retriever): + """ Returns the manifests in the list. """ manifests = self._parsed[DOCKER_SCHEMA2_MANIFESTLIST_MANIFESTS_KEY] - return [LazyManifestLoader(m, lookup_manifest_fn) for m in manifests] + return [LazyManifestLoader(m, content_retriever) for m in manifests] - def child_manifests(self, lookup_manifest_fn): - return self.manifests(lookup_manifest_fn) + def child_manifests(self, content_retriever): + return self.manifests(content_retriever) - def get_manifest_labels(self, lookup_config_fn): + def get_manifest_labels(self, content_retriever): return None - def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, lookup_fn): + def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, content_retriever): """ Returns the manifest that is compatible with V1, by virtue of being `amd64` and `linux`. If none, returns None. """ - for manifest_ref in self.manifests(lookup_fn): + for manifest_ref in self.manifests(content_retriever): platform = manifest_ref._manifest_data[DOCKER_SCHEMA2_MANIFESTLIST_PLATFORM_KEY] architecture = platform[DOCKER_SCHEMA2_MANIFESTLIST_ARCHITECTURE_KEY] os = platform[DOCKER_SCHEMA2_MANIFESTLIST_OS_KEY] @@ -263,21 +262,19 @@ class DockerSchema2ManifestList(ManifestInterface): try: manifest = manifest_ref.manifest_obj - except ManifestException: - logger.exception('Could not load child manifest') - return None - except IOError: + except (ManifestException, IOError): logger.exception('Could not load child manifest') return None - return manifest.get_v1_compatible_manifest(namespace_name, repo_name, tag_name, lookup_fn) + return manifest.get_v1_compatible_manifest(namespace_name, repo_name, tag_name, + content_retriever) return None def unsigned(self): return self - def generate_legacy_layers(self, images_map, lookup_config_fn): + def generate_legacy_layers(self, images_map, content_retriever): return None diff --git a/image/docker/schema2/manifest.py b/image/docker/schema2/manifest.py index b38f53ac4..9b9ff930a 100644 --- a/image/docker/schema2/manifest.py +++ b/image/docker/schema2/manifest.py @@ -214,11 +214,14 @@ class DockerSchema2Manifest(ManifestInterface): return ([str(layer.digest) for layer in self.layers if not layer.urls] + [str(self.config.digest)]) - def get_manifest_labels(self, lookup_config_fn): - return self._get_built_config(lookup_config_fn).labels + def get_manifest_labels(self, content_retriever): + return self._get_built_config(content_retriever).labels + + def _get_built_config(self, content_retriever): + config_bytes = content_retriever.get_blob_bytes_with_digest(self.config.digest) + if config_bytes is None: + raise MalformedSchema2Manifest('Could not load config blob for manifest') - def _get_built_config(self, lookup_config_fn): - config_bytes = lookup_config_fn(self.config.digest) if len(config_bytes) != self.config.size: raise MalformedSchema2Manifest('Size of config does not match that retrieved: %s vs %s', len(config_bytes), self.config.size) @@ -229,7 +232,7 @@ class DockerSchema2Manifest(ManifestInterface): def bytes(self): return self._payload - def child_manifests(self, lookup_manifest_fn): + def child_manifests(self, content_retriever): return None def _generate_layers(self): @@ -269,13 +272,12 @@ class DockerSchema2Manifest(ManifestInterface): v1_layer_id = digest_history.hexdigest() yield LayerWithV1ID(layer=layer, v1_id=v1_layer_id, v1_parent_id=v1_layer_parent_id) - def populate_schema1_builder(self, v1_builder, lookup_config_fn): + def populate_schema1_builder(self, v1_builder, content_retriever): """ Populates a DockerSchema1ManifestBuilder with the layers and config from - this schema. The `lookup_config_fn` is a function that, when given the config - digest SHA, returns the associated configuration JSON bytes for this schema. + this schema. """ assert not self.has_remote_layer - schema2_config = self._get_built_config(lookup_config_fn) + schema2_config = self._get_built_config(content_retriever) # Build the V1 IDs for the layers. layers = list(self.layers_with_v1_ids) @@ -287,22 +289,22 @@ class DockerSchema2Manifest(ManifestInterface): return v1_builder - def generate_legacy_layers(self, images_map, lookup_config_fn): + def generate_legacy_layers(self, images_map, content_retriever): assert not self.has_remote_layer # NOTE: We use the DockerSchema1ManifestBuilder here because it already contains # the logic for generating the DockerV1Metadata. All of this will go away once we get # rid of legacy images in the database, so this is a temporary solution. v1_builder = DockerSchema1ManifestBuilder('', '', '') - self.populate_schema1_builder(v1_builder, lookup_config_fn) - return v1_builder.build().generate_legacy_layers(images_map, lookup_config_fn) + self.populate_schema1_builder(v1_builder, content_retriever) + return v1_builder.build().generate_legacy_layers(images_map, content_retriever) - def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, lookup_fn): + def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, content_retriever): if self.has_remote_layer: return None v1_builder = DockerSchema1ManifestBuilder(namespace_name, repo_name, tag_name) - self.populate_schema1_builder(v1_builder, lookup_fn) + self.populate_schema1_builder(v1_builder, content_retriever) return v1_builder.build() def unsigned(self): diff --git a/image/docker/schema2/test/test_list.py b/image/docker/schema2/test/test_list.py index dc5ca5252..c4bfc16ac 100644 --- a/image/docker/schema2/test/test_list.py +++ b/image/docker/schema2/test/test_list.py @@ -6,6 +6,7 @@ from image.docker.schema2.manifest import DockerSchema2Manifest from image.docker.schema2.list import (MalformedSchema2ManifestList, DockerSchema2ManifestList, DockerSchema2ManifestListBuilder) from image.docker.schema2.test.test_manifest import MANIFEST_BYTES as v22_bytes +from image.docker.schemautil import ContentRetrieverForTesting from image.docker.test.test_schema1 import MANIFEST_BYTES as v21_bytes @pytest.mark.parametrize('json_data', [ @@ -66,22 +67,21 @@ NO_AMD_MANIFESTLIST_BYTES = json.dumps({ ] }) -def test_valid_manifestlist(): - def _get_manifest(digest): - if digest == 'sha256:e6': - return v22_bytes - else: - return v21_bytes +retriever = ContentRetrieverForTesting({ + 'sha256:e6': v22_bytes, + 'sha256:5b': v21_bytes, +}) +def test_valid_manifestlist(): manifestlist = DockerSchema2ManifestList(MANIFESTLIST_BYTES) - assert len(manifestlist.manifests(_get_manifest)) == 2 + assert len(manifestlist.manifests(retriever)) == 2 assert manifestlist.media_type == 'application/vnd.docker.distribution.manifest.list.v2+json' assert manifestlist.bytes == MANIFESTLIST_BYTES assert manifestlist.manifest_dict == json.loads(MANIFESTLIST_BYTES) assert set(manifestlist.blob_digests) == {'sha256:e6', 'sha256:5b'} - for index, manifest in enumerate(manifestlist.manifests(_get_manifest)): + for index, manifest in enumerate(manifestlist.manifests(retriever)): if index == 0: assert isinstance(manifest.manifest_obj, DockerSchema2Manifest) assert manifest.manifest_obj.schema_version == 2 @@ -89,7 +89,7 @@ def test_valid_manifestlist(): assert isinstance(manifest.manifest_obj, DockerSchema1Manifest) assert manifest.manifest_obj.schema_version == 1 - compatible_manifest = manifestlist.get_v1_compatible_manifest('foo', 'bar', 'baz', _get_manifest) + compatible_manifest = manifestlist.get_v1_compatible_manifest('foo', 'bar', 'baz', retriever) assert compatible_manifest.schema_version == 1 assert manifestlist.layers is None @@ -98,35 +98,21 @@ def test_valid_manifestlist(): def test_get_v1_compatible_manifest_no_matching_list(): - def _get_manifest(digest): - if digest == 'sha256:e6': - return v22_bytes - else: - return v21_bytes - manifestlist = DockerSchema2ManifestList(NO_AMD_MANIFESTLIST_BYTES) - assert len(manifestlist.manifests(_get_manifest)) == 1 + assert len(manifestlist.manifests(retriever)) == 1 assert manifestlist.media_type == 'application/vnd.docker.distribution.manifest.list.v2+json' assert manifestlist.bytes == NO_AMD_MANIFESTLIST_BYTES - compatible_manifest = manifestlist.get_v1_compatible_manifest('foo', 'bar', 'baz', _get_manifest) + compatible_manifest = manifestlist.get_v1_compatible_manifest('foo', 'bar', 'baz', retriever) assert compatible_manifest is None def test_builder(): - def _get_manifest(digest): - if digest == 'sha256:e6': - return v22_bytes - else: - return v21_bytes - existing = DockerSchema2ManifestList(MANIFESTLIST_BYTES) - builder = DockerSchema2ManifestListBuilder() - for index, manifest in enumerate(existing.manifests(_get_manifest)): + for index, manifest in enumerate(existing.manifests(retriever)): builder.add_manifest(manifest.manifest_obj, "amd64", "os") built = builder.build() - assert len(built.manifests(_get_manifest)) == 2 - + assert len(built.manifests(retriever)) == 2 diff --git a/image/docker/schema2/test/test_manifest.py b/image/docker/schema2/test/test_manifest.py index 560c4bb26..1a515893f 100644 --- a/image/docker/schema2/test/test_manifest.py +++ b/image/docker/schema2/test/test_manifest.py @@ -8,6 +8,8 @@ from image.docker.schema1 import (DockerSchema1ManifestBuilder, from image.docker.schema2.manifest import (MalformedSchema2Manifest, DockerSchema2Manifest, DockerSchema2ManifestBuilder) from image.docker.schema2.test.test_config import CONFIG_BYTES +from image.docker.schemautil import ContentRetrieverForTesting + @pytest.mark.parametrize('json_data', [ '', @@ -156,27 +158,28 @@ def test_schema2_builder(): def test_get_manifest_labels(): labels = dict(foo='bar', baz='meh') - - def _lookup_config(digest): - config_str = json.dumps({ - "config": { - "Labels": labels, - }, - "rootfs": {"type": "layers", "diff_ids": []}, - "history": [], - }) - return config_str + ' ' * (1885 - len(config_str)) + retriever = ContentRetrieverForTesting.for_config({ + "config": { + "Labels": labels, + }, + "rootfs": {"type": "layers", "diff_ids": []}, + "history": [], + }, 'sha256:b5b2b2c507a0944348e0303114d8d93aaaa081732b86451d9bce1f432a537bc7', 1885) manifest = DockerSchema2Manifest(MANIFEST_BYTES) - assert manifest.get_manifest_labels(_lookup_config) == labels + assert manifest.get_manifest_labels(retriever) == labels def test_build_schema1(): manifest = DockerSchema2Manifest(MANIFEST_BYTES) assert not manifest.has_remote_layer + retriever = ContentRetrieverForTesting({ + 'sha256:b5b2b2c507a0944348e0303114d8d93aaaa081732b86451d9bce1f432a537bc7': CONFIG_BYTES, + }) + builder = DockerSchema1ManifestBuilder('somenamespace', 'somename', 'sometag') - manifest.populate_schema1_builder(builder, lambda digest: CONFIG_BYTES) + manifest.populate_schema1_builder(builder, retriever) schema1 = builder.build(docker_v2_signing_key) assert schema1.media_type == DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE @@ -196,35 +199,33 @@ def test_build_schema1(): def test_get_v1_compatible_manifest(): - def _get_config(digest): - config_str = json.dumps({ - "config": { - "Labels": {}, + retriever = ContentRetrieverForTesting.for_config({ + "config": { + "Labels": {}, + }, + "rootfs": {"type": "layers", "diff_ids": []}, + "history": [ + { + "created": "2018-04-03T18:37:09.284840891Z", + "created_by": "foo" }, - "rootfs": {"type": "layers", "diff_ids": []}, - "history": [ - { - "created": "2018-04-03T18:37:09.284840891Z", - "created_by": "foo" - }, - { - "created": "2018-04-12T18:37:09.284840891Z", - "created_by": "bar" - }, - { - "created": "2018-04-03T18:37:09.284840891Z", - "created_by": "foo" - }, - { - "created": "2018-04-12T18:37:09.284840891Z", - "created_by": "bar" - }, - ], - }) - return config_str + ' ' * (1885 - len(config_str)) + { + "created": "2018-04-12T18:37:09.284840891Z", + "created_by": "bar" + }, + { + "created": "2018-04-03T18:37:09.284840891Z", + "created_by": "foo" + }, + { + "created": "2018-04-12T18:37:09.284840891Z", + "created_by": "bar" + }, + ], + }, 'sha256:b5b2b2c507a0944348e0303114d8d93aaaa081732b86451d9bce1f432a537bc7', 1885) manifest = DockerSchema2Manifest(MANIFEST_BYTES) - schema1 = manifest.get_v1_compatible_manifest('somenamespace', 'somename', 'sometag', _get_config) + schema1 = manifest.get_v1_compatible_manifest('somenamespace', 'somename', 'sometag', retriever) assert schema1 is not None assert schema1.media_type == DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE assert len(schema1.layers) == len(manifest.layers) @@ -240,25 +241,23 @@ def test_generate_legacy_layers(): builder.set_config_digest('sha256:def456', 2000) manifest = builder.build() - def _lookup_config(digest): - config_str = json.dumps({ - "config": { + retriever = ContentRetrieverForTesting.for_config({ + "config": { + }, + "rootfs": {"type": "layers", "diff_ids": []}, + "history": [ + { + "created": "2018-04-03T18:37:09.284840891Z", + "created_by": "foo" }, - "rootfs": {"type": "layers", "diff_ids": []}, - "history": [ - { - "created": "2018-04-03T18:37:09.284840891Z", - "created_by": "foo" - }, - { - "created": "2018-04-12T18:37:09.284840891Z", - "created_by": "bar" - }, - ], - }) - return config_str + ' ' * (2000 - len(config_str)) + { + "created": "2018-04-12T18:37:09.284840891Z", + "created_by": "bar" + }, + ], + }, 'sha256:def456', 2000) - legacy_layers = list(manifest.generate_legacy_layers({}, _lookup_config)) + legacy_layers = list(manifest.generate_legacy_layers({}, retriever)) assert len(legacy_layers) == 2 assert legacy_layers[0].content_checksum == 'sha256:abc123' assert legacy_layers[1].content_checksum == 'sha256:def456' diff --git a/image/docker/schemautil.py b/image/docker/schemautil.py new file mode 100644 index 000000000..1a231c62c --- /dev/null +++ b/image/docker/schemautil.py @@ -0,0 +1,24 @@ +import json + +from image.docker.interfaces import ContentRetriever + +class ContentRetrieverForTesting(ContentRetriever): + def __init__(self, digests=None): + self.digests = digests or {} + + def add_digest(self, digest, content): + self.digests[digest] = content + + def get_manifest_bytes_with_digest(self, digest): + return self.digests.get(digest) + + def get_blob_bytes_with_digest(self, digest): + return self.digests.get(digest) + + @classmethod + def for_config(cls, config_obj, digest, size): + config_str = json.dumps(config_obj) + padded_string = config_str + ' ' * (size - len(config_str)) + digests = {} + digests[digest] = padded_string + return ContentRetrieverForTesting(digests)