From e344d4a5cf291150e7dcb1da5c39eef715e589f6 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Mon, 12 Nov 2018 23:27:01 +0200 Subject: [PATCH] Enhancements for Docker schema implementations in preparing for supporting schema 2 in the OCI model This adds additional required properties and methods to the Docker schema interface to allow us to treat both schema1 and schema2 manifests and lists logically equivalent from the OCI mode perspective --- image/docker/interfaces.py | 35 +++++++- image/docker/schema1.py | 43 ++++++++-- image/docker/schema2/config.py | 19 +++++ image/docker/schema2/list.py | 95 ++++++++++++++++++++- image/docker/schema2/manifest.py | 98 ++++++++++++++++++++-- image/docker/schema2/test/test_config.py | 2 + image/docker/schema2/test/test_list.py | 32 ++++++- image/docker/schema2/test/test_manifest.py | 85 ++++++++++++++++++- image/docker/schemas.py | 22 +++++ image/docker/test/test_schema1.py | 8 ++ image/docker/test/test_schemas.py | 18 ++++ workers/manifestbackfillworker.py | 12 +++ 12 files changed, 447 insertions(+), 22 deletions(-) create mode 100644 image/docker/schemas.py create mode 100644 image/docker/test/test_schemas.py diff --git a/image/docker/interfaces.py b/image/docker/interfaces.py index 62dd4563c..a55b662b4 100644 --- a/image/docker/interfaces.py +++ b/image/docker/interfaces.py @@ -1,4 +1,4 @@ -from abc import ABCMeta, abstractproperty +from abc import ABCMeta, abstractproperty, abstractmethod from six import add_metaclass @add_metaclass(ABCMeta) @@ -26,7 +26,7 @@ class ManifestInterface(object): @abstractproperty def layers(self): - """ Returns the layers of this manifest, from base to leaf. """ + """ Returns the layers of this manifest, from base to leaf or None if none. """ pass @abstractproperty @@ -43,5 +43,34 @@ class ManifestInterface(object): @abstractproperty def blob_digests(self): """ Returns an iterator over all the blob digests referenced by this manifest, - from base to leaf. The blob digests are strings with prefixes. + from base to leaf. The blob digests are strings with prefixes. For manifests that reference + config as a blob, the blob will be included here. + """ + + @abstractmethod + def child_manifests(self, lookup_manifest_fn): + """ Returns an iterator of all manifests that live under this manifest, if any or None if none. + The lookup_manifest_fn is a function that, when given a blob content SHA, returns the + contents of that blob in storage if any or None if none. + """ + + @abstractmethod + def get_manifest_labels(self, lookup_config_fn): + """ Returns a dictionary of all the labels defined inside this manifest or None if none. """ + pass + + @abstractmethod + def unsigned(self): + """ Returns an unsigned version of this manifest. """ + + @abstractmethod + def generate_legacy_layers(self, images_map, lookup_config_fn): + """ + Rewrites Docker v1 image IDs and returns a generator of DockerV1Metadata. + + If Docker gives us a layer with a v1 image ID that already points to existing + content, but the checksums don't match, then we need to rewrite the image ID + to something new in order to ensure consistency. + + Returns None if there are no legacy images associated with the manifest. """ diff --git a/image/docker/schema1.py b/image/docker/schema1.py index f7184c43b..bb15a561b 100644 --- a/image/docker/schema1.py +++ b/image/docker/schema1.py @@ -151,7 +151,7 @@ class DockerSchema1Manifest(ManifestInterface): }, }, }, - 'required': [DOCKER_SCHEMA1_SIGNATURES_KEY, DOCKER_SCHEMA1_REPO_TAG_KEY, + 'required': [DOCKER_SCHEMA1_REPO_TAG_KEY, DOCKER_SCHEMA1_REPO_NAME_KEY, DOCKER_SCHEMA1_FS_LAYERS_KEY, DOCKER_SCHEMA1_HISTORY_KEY], } @@ -170,8 +170,9 @@ class DockerSchema1Manifest(ManifestInterface): except ValidationError as ve: raise MalformedSchema1Manifest('manifest data does not match schema: %s' % ve) - self._signatures = self._parsed[DOCKER_SCHEMA1_SIGNATURES_KEY] + self._signatures = self._parsed.get(DOCKER_SCHEMA1_SIGNATURES_KEY) self._tag = self._parsed[DOCKER_SCHEMA1_REPO_TAG_KEY] + self._architecture = self._parsed[DOCKER_SCHEMA1_ARCH_KEY] repo_name = self._parsed[DOCKER_SCHEMA1_REPO_NAME_KEY] repo_name_tuple = repo_name.split('/') @@ -191,6 +192,9 @@ class DockerSchema1Manifest(ManifestInterface): return DockerSchema1Manifest(encoded_bytes.encode('utf-8'), validate) def _validate(self): + if not self._signatures: + return + for signature in self._signatures: bytes_to_verify = '{0}.{1}'.format(signature['protected'], base64url_encode(self._payload)) @@ -208,11 +212,12 @@ class DockerSchema1Manifest(ManifestInterface): @property def content_type(self): - return DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE + return (DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE + if self._signatures else DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE) @property def media_type(self): - return DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE + return self.content_type @property def signatures(self): @@ -297,6 +302,24 @@ class DockerSchema1Manifest(ManifestInterface): def blob_digests(self): return [str(layer.digest) for layer in self.layers] + def child_manifests(self, lookup_manifest_fn): + return None + + def get_manifest_labels(self, lookup_config_fn): + return self.layers[-1].v1_metadata.labels + + def unsigned(self): + if self.media_type == DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE: + return self + + # Create an unsigned version of the manifest. + builder = DockerSchema1ManifestBuilder(self._namespace, self._repo_name, self._tag, + self._architecture) + for layer in reversed(self.layers): + builder.add_layer(str(layer.digest), layer.raw_v1_metadata) + + return builder.build() + def _generate_layers(self): """ Returns a generator of objects that have the blobSum and v1Compatibility keys in them, @@ -330,12 +353,18 @@ class DockerSchema1Manifest(ManifestInterface): @property def _payload(self): + if self._signatures is None: + return self._bytes + protected = str(self._signatures[0][DOCKER_SCHEMA1_PROTECTED_KEY]) parsed_protected = json.loads(base64url_decode(protected)) signed_content_head = self._bytes[:parsed_protected[DOCKER_SCHEMA1_FORMAT_LENGTH_KEY]] signed_content_tail = base64url_decode(str(parsed_protected[DOCKER_SCHEMA1_FORMAT_TAIL_KEY])) return signed_content_head + signed_content_tail + def generate_legacy_layers(self, images_map, lookup_config_fn): + return self.rewrite_invalid_image_ids(images_map) + def rewrite_invalid_image_ids(self, images_map): """ Rewrites Docker v1 image IDs and returns a generator of DockerV1Metadata. @@ -428,9 +457,9 @@ class DockerSchema1ManifestBuilder(object): return self - def build(self, json_web_key): + def build(self, json_web_key=None): """ - Builds a DockerSchema1Manifest object complete with signature. + Builds a DockerSchema1Manifest object, with optional signature. """ payload = OrderedDict(self._base_payload) payload.update({ @@ -439,6 +468,8 @@ class DockerSchema1ManifestBuilder(object): }) payload_str = json.dumps(payload, indent=3) + if json_web_key is None: + return DockerSchema1Manifest(payload_str) split_point = payload_str.rfind('\n}') diff --git a/image/docker/schema2/config.py b/image/docker/schema2/config.py index faf09c26a..b78552430 100644 --- a/image/docker/schema2/config.py +++ b/image/docker/schema2/config.py @@ -99,6 +99,8 @@ from collections import namedtuple from jsonschema import validate as validate_schema, ValidationError from dateutil.parser import parse as parse_date +from digest import digest_tools + DOCKER_SCHEMA2_CONFIG_HISTORY_KEY = "history" DOCKER_SCHEMA2_CONFIG_ROOTFS_KEY = "rootfs" DOCKER_SCHEMA2_CONFIG_CREATED_KEY = "created" @@ -167,6 +169,8 @@ class DockerSchema2Config(object): } def __init__(self, config_bytes): + self._config_bytes = config_bytes + try: self._parsed = json.loads(config_bytes) except ValueError as ve: @@ -177,6 +181,21 @@ class DockerSchema2Config(object): except ValidationError as ve: raise MalformedSchema2Config('config data does not match schema: %s' % ve) + @property + def digest(self): + """ Returns the digest of this config object. """ + return digest_tools.sha256_digest(self._config_bytes) + + @property + def size(self): + """ Returns the size of this config object. """ + return len(self._config_bytes) + + @property + def labels(self): + """ Returns a dictionary of all the labels defined in this configuration. """ + return self._parsed.get('config', {}).get('Labels', {}) or {} + @property def history(self): """ Returns the history of the image, started at the base layer. """ diff --git a/image/docker/schema2/list.py b/image/docker/schema2/list.py index 24092de41..f7358cbb3 100644 --- a/image/docker/schema2/list.py +++ b/image/docker/schema2/list.py @@ -3,6 +3,8 @@ import json from cachetools import lru_cache from jsonschema import validate as validate_schema, ValidationError +from digest import digest_tools +from image.docker.interfaces import ManifestInterface from image.docker.schema1 import DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE from image.docker.schema1 import DockerSchema1Manifest from image.docker.schema2 import (DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE, @@ -50,6 +52,9 @@ class LazyManifestLoader(object): digest = self._manifest_data[DOCKER_SCHEMA2_MANIFESTLIST_DIGEST_KEY] size = self._manifest_data[DOCKER_SCHEMA2_MANIFESTLIST_SIZE_KEY] manifest_bytes = self._lookup_manifest_fn(digest) + if manifest_bytes is None: + raise MalformedSchema2ManifestList('Could not find child manifest with digest `%s`' % digest) + if len(manifest_bytes) != size: raise MalformedSchema2ManifestList('Size of manifest does not match that retrieved: %s vs %s', len(manifest_bytes), size) @@ -64,7 +69,7 @@ class LazyManifestLoader(object): raise MalformedSchema2ManifestList('Unknown manifest content type') -class DockerSchema2ManifestList(object): +class DockerSchema2ManifestList(ManifestInterface): METASCHEMA = { 'type': 'object', 'properties': { @@ -161,6 +166,7 @@ class DockerSchema2ManifestList(object): def __init__(self, manifest_bytes): self._layers = None + self._manifest_bytes = manifest_bytes try: self._parsed = json.loads(manifest_bytes) @@ -172,6 +178,42 @@ class DockerSchema2ManifestList(object): except ValidationError as ve: raise MalformedSchema2ManifestList('manifest data does not match schema: %s' % ve) + @property + def digest(self): + """ The digest of the manifest, including type prefix. """ + return digest_tools.sha256_digest(self._manifest_bytes) + + @property + def media_type(self): + """ The media type of the schema. """ + return self._parsed[DOCKER_SCHEMA2_MANIFESTLIST_MEDIATYPE_KEY] + + @property + def manifest_dict(self): + """ Returns the manifest as a dictionary ready to be serialized to JSON. """ + return self._parsed + + @property + def bytes(self): + return self._manifest_bytes + + @property + def layers(self): + return None + + @property + def leaf_layer_v1_image_id(self): + return None + + @property + def legacy_image_ids(self): + return None + + @property + def blob_digests(self): + manifests = self._parsed[DOCKER_SCHEMA2_MANIFESTLIST_MANIFESTS_KEY] + return [m[DOCKER_SCHEMA2_MANIFESTLIST_DIGEST_KEY] for m in manifests] + @lru_cache(maxsize=1) def manifests(self, lookup_manifest_fn): """ Returns the manifests in the list. The `lookup_manifest_fn` is a function @@ -180,6 +222,12 @@ class DockerSchema2ManifestList(object): manifests = self._parsed[DOCKER_SCHEMA2_MANIFESTLIST_MANIFESTS_KEY] return [LazyManifestLoader(m, lookup_manifest_fn) for m in manifests] + def child_manifests(self, lookup_manifest_fn): + return self.manifests(lookup_manifest_fn) + + def get_manifest_labels(self, lookup_config_fn): + return None + def get_v1_compatible_manifest(self, lookup_manifest_fn): """ Returns the manifest that is compatible with V1, by virtue of being `amd64` and `linux`. If none, returns None. @@ -192,3 +240,48 @@ class DockerSchema2ManifestList(object): return manifest return None + + def unsigned(self): + return self + + def generate_legacy_layers(self, images_map, lookup_config_fn): + return None + + +class DockerSchema2ManifestListBuilder(object): + """ + A convenient abstraction around creating new DockerSchema2ManifestList's. + """ + def __init__(self): + self.manifests = [] + + def add_manifest(self, manifest, architecture, os): + """ Adds a manifest to the list. """ + manifest = manifest.unsigned() # Make sure we add the unsigned version to the list. + self.add_manifest_digest(manifest.digest, len(manifest.bytes), manifest.media_type, + architecture, os) + + def add_manifest_digest(self, manifest_digest, manifest_size, media_type, architecture, os): + """ Adds a manifest to the list. """ + self.manifests.append((manifest_digest, manifest_size, media_type, { + DOCKER_SCHEMA2_MANIFESTLIST_ARCHITECTURE_KEY: architecture, + DOCKER_SCHEMA2_MANIFESTLIST_OS_KEY: os, + })) + + def build(self): + """ Builds and returns the DockerSchema2ManifestList. """ + assert self.manifests + + manifest_list_dict = { + DOCKER_SCHEMA2_MANIFESTLIST_VERSION_KEY: 2, + DOCKER_SCHEMA2_MANIFESTLIST_MEDIATYPE_KEY: DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE, + DOCKER_SCHEMA2_MANIFESTLIST_MANIFESTS_KEY: [ + { + DOCKER_SCHEMA2_MANIFESTLIST_MEDIATYPE_KEY: manifest[2], + DOCKER_SCHEMA2_MANIFESTLIST_DIGEST_KEY: manifest[0], + DOCKER_SCHEMA2_MANIFESTLIST_SIZE_KEY: manifest[1], + DOCKER_SCHEMA2_MANIFESTLIST_PLATFORM_KEY: manifest[3], + } for manifest in self.manifests + ], + } + return DockerSchema2ManifestList(json.dumps(manifest_list_dict, indent=3)) diff --git a/image/docker/schema2/manifest.py b/image/docker/schema2/manifest.py index ceb07fa30..c74a6f714 100644 --- a/image/docker/schema2/manifest.py +++ b/image/docker/schema2/manifest.py @@ -12,6 +12,7 @@ from image.docker.schema2 import (DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE, DOCKER_SCHEMA2_CONFIG_CONTENT_TYPE, DOCKER_SCHEMA2_LAYER_CONTENT_TYPE, DOCKER_SCHEMA2_REMOTE_LAYER_CONTENT_TYPE) +from image.docker.schema1 import DockerSchema1ManifestBuilder from image.docker.schema2.config import DockerSchema2Config # Keys. @@ -178,12 +179,26 @@ class DockerSchema2Manifest(ManifestInterface): @property def blob_digests(self): - return [str(layer.digest) for layer in self.layers] + return [str(layer.digest) for layer in self.layers] + [str(self.config.digest)] + + def get_manifest_labels(self, lookup_config_fn): + return self._get_built_config(lookup_config_fn).labels + + def _get_built_config(self, lookup_config_fn): + config_bytes = lookup_config_fn(self.config.digest) + if len(config_bytes) != self.config.size: + raise MalformedSchema2Manifest('Size of config does not match that retrieved: %s vs %s', + len(config_bytes), self.config.size) + + return DockerSchema2Config(config_bytes) @property def bytes(self): return self._payload + def child_manifests(self, lookup_manifest_fn): + return None + def _generate_layers(self): for index, layer in enumerate(self._parsed[DOCKER_SCHEMA2_MANIFEST_LAYERS_KEY]): content_type = layer[DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY] @@ -225,12 +240,7 @@ class DockerSchema2Manifest(ManifestInterface): this schema. The `lookup_config_fn` is a function that, when given the config digest SHA, returns the associated configuration JSON bytes for this schema. """ - config_bytes = lookup_config_fn(self.config.digest) - if len(config_bytes) != self.config.size: - raise MalformedSchema2Manifest('Size of config does not match that retrieved: %s vs %s', - len(config_bytes), self.config.size) - - schema2_config = DockerSchema2Config(config_bytes) + schema2_config = self._get_built_config(lookup_config_fn) # Build the V1 IDs for the layers. layers = list(self.layers_with_v1_ids) @@ -241,3 +251,77 @@ class DockerSchema2Manifest(ManifestInterface): v1_builder.add_layer(str(layer_with_ids.layer.digest), json.dumps(v1_compatibility)) return v1_builder + + def generate_legacy_layers(self, images_map, lookup_config_fn): + # NOTE: We use the DockerSchema1ManifestBuilder here because it already contains + # the logic for generating the DockerV1Metadata. All of this will go away once we get + # rid of legacy images in the database, so this is a temporary solution. + v1_builder = DockerSchema1ManifestBuilder('', '', '') + self.populate_schema1_builder(v1_builder, lookup_config_fn) + return v1_builder.build().generate_legacy_layers(images_map, lookup_config_fn) + + def unsigned(self): + return self + + +class DockerSchema2ManifestBuilder(object): + """ + A convenient abstraction around creating new DockerSchema2Manifests. + """ + def __init__(self): + self.config = None + self.layers = [] + + def set_config(self, schema2_config): + """ Sets the configuration for the manifest being built. """ + self.set_config_digest(schema2_config.digest, schema2_config.size) + + def set_config_digest(self, config_digest, config_size): + """ Sets the digest and size of the configuration layer. """ + self.config = DockerV2ManifestConfig(size=config_size, digest=config_digest) + + def add_layer(self, digest, size, urls=None): + """ Adds a layer to the manifest. """ + self.layers.append(DockerV2ManifestLayer(index=len(self.layers), + digest=digest, + compressed_size=size, + urls=urls, + is_remote=bool(urls))) + + def build(self): + """ Builds and returns the DockerSchema2Manifest. """ + assert self.layers + assert self.config + + def _build_layer(layer): + if layer.urls: + return { + DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY: DOCKER_SCHEMA2_REMOTE_LAYER_CONTENT_TYPE, + DOCKER_SCHEMA2_MANIFEST_SIZE_KEY: layer.compressed_size, + DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY: str(layer.digest), + DOCKER_SCHEMA2_MANIFEST_URLS_KEY: layer.urls, + } + + return { + DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY: DOCKER_SCHEMA2_LAYER_CONTENT_TYPE, + DOCKER_SCHEMA2_MANIFEST_SIZE_KEY: layer.compressed_size, + DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY: str(layer.digest), + } + + manifest_dict = { + DOCKER_SCHEMA2_MANIFEST_VERSION_KEY: 2, + DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY: DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE, + + # Config + DOCKER_SCHEMA2_MANIFEST_CONFIG_KEY: { + DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY: DOCKER_SCHEMA2_CONFIG_CONTENT_TYPE, + DOCKER_SCHEMA2_MANIFEST_SIZE_KEY: self.config.size, + DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY: str(self.config.digest), + }, + + # Layers + DOCKER_SCHEMA2_MANIFEST_LAYERS_KEY: [ + _build_layer(layer) for layer in self.layers + ], + } + return DockerSchema2Manifest(json.dumps(manifest_dict, indent=3)) diff --git a/image/docker/schema2/test/test_config.py b/image/docker/schema2/test/test_config.py index 345e63fd6..2c2b2f6d5 100644 --- a/image/docker/schema2/test/test_config.py +++ b/image/docker/schema2/test/test_config.py @@ -127,3 +127,5 @@ def test_valid_config(): else: assert 'Hostname' not in v1_compat['container_config'] assert v1_compat['container_config']['Cmd'] == history_entry.command + + assert config.labels == {} diff --git a/image/docker/schema2/test/test_list.py b/image/docker/schema2/test/test_list.py index c2f98ff7d..7b7bf3ff5 100644 --- a/image/docker/schema2/test/test_list.py +++ b/image/docker/schema2/test/test_list.py @@ -3,7 +3,8 @@ import pytest from image.docker.schema1 import DockerSchema1Manifest from image.docker.schema2.manifest import DockerSchema2Manifest -from image.docker.schema2.list import MalformedSchema2ManifestList, DockerSchema2ManifestList +from image.docker.schema2.list import (MalformedSchema2ManifestList, DockerSchema2ManifestList, + DockerSchema2ManifestListBuilder) from image.docker.schema2.test.test_manifest import MANIFEST_BYTES as v22_bytes from image.docker.test.test_schema1 import MANIFEST_BYTES as v21_bytes @@ -58,6 +59,13 @@ def test_valid_manifestlist(): manifestlist = DockerSchema2ManifestList(MANIFESTLIST_BYTES) assert len(manifestlist.manifests(_get_manifest)) == 2 + assert (manifestlist.digest == + 'sha256:7e22fdbe49736329786c9b4fdc154cc9251b190ca6b4cf33aed00efc0fc3df25') + + assert manifestlist.media_type == 'application/vnd.docker.distribution.manifest.list.v2+json' + assert manifestlist.bytes == MANIFESTLIST_BYTES + assert manifestlist.manifest_dict == json.loads(MANIFESTLIST_BYTES) + assert set(manifestlist.blob_digests) == {'sha256:e6', 'sha256:5b'} for index, manifest in enumerate(manifestlist.manifests(_get_manifest)): if index == 0: @@ -68,3 +76,25 @@ def test_valid_manifestlist(): assert manifest.manifest_obj.schema_version == 1 assert manifestlist.get_v1_compatible_manifest(_get_manifest).manifest_obj.schema_version == 1 + + assert manifestlist.layers is None + assert manifestlist.leaf_layer_v1_image_id is None + assert manifestlist.legacy_image_ids is None + + +def test_builder(): + def _get_manifest(digest): + if digest == 'sha256:e6': + return v22_bytes + else: + return v21_bytes + + existing = DockerSchema2ManifestList(MANIFESTLIST_BYTES) + + builder = DockerSchema2ManifestListBuilder() + for index, manifest in enumerate(existing.manifests(_get_manifest)): + builder.add_manifest(manifest.manifest_obj, "amd64", "os") + + built = builder.build() + assert len(built.manifests(_get_manifest)) == 2 + diff --git a/image/docker/schema2/test/test_manifest.py b/image/docker/schema2/test/test_manifest.py index 03b847d7c..476df4d0b 100644 --- a/image/docker/schema2/test/test_manifest.py +++ b/image/docker/schema2/test/test_manifest.py @@ -4,7 +4,8 @@ import pytest from app import docker_v2_signing_key from image.docker.schema1 import (DockerSchema1ManifestBuilder, DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE) -from image.docker.schema2.manifest import MalformedSchema2Manifest, DockerSchema2Manifest +from image.docker.schema2.manifest import (MalformedSchema2Manifest, DockerSchema2Manifest, + DockerSchema2ManifestBuilder) from image.docker.schema2.test.test_config import CONFIG_BYTES @pytest.mark.parametrize('json_data', [ @@ -71,8 +72,41 @@ def test_valid_manifest(): assert manifest.leaf_layer.compressed_size == 73109 blob_digests = list(manifest.blob_digests) - assert len(blob_digests) == len(manifest.layers) - assert blob_digests == [str(layer.digest) for layer in manifest.layers] + assert len(blob_digests) == len(manifest.layers) + 1 + + expected = [str(layer.digest) for layer in manifest.layers] + [manifest.config.digest] + assert blob_digests == expected + + +def test_schema2_builder(): + manifest = DockerSchema2Manifest(MANIFEST_BYTES) + + builder = DockerSchema2ManifestBuilder() + builder.set_config_digest(manifest.config.digest, manifest.config.size) + + for layer in manifest.layers: + builder.add_layer(layer.digest, layer.compressed_size, urls=layer.urls) + + built = builder.build() + assert built.layers == manifest.layers + assert built.config == manifest.config + + +def test_get_manifest_labels(): + labels = dict(foo='bar', baz='meh') + + def _lookup_config(digest): + config_str = json.dumps({ + "config": { + "Labels": labels, + }, + "rootfs": {"type": "layers", "diff_ids": []}, + "history": [], + }) + return config_str + ' ' * (1885 - len(config_str)) + + manifest = DockerSchema2Manifest(MANIFEST_BYTES) + assert manifest.get_manifest_labels(_lookup_config) == labels def test_build_schema1(): @@ -85,7 +119,8 @@ def test_build_schema1(): assert schema1.media_type == DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE assert len(schema1.layers) == len(manifest.layers) assert set(schema1.image_ids) == set([l.v1_id for l in manifest.layers_with_v1_ids]) - assert set(schema1.parent_image_ids) == set([l.v1_parent_id for l in manifest.layers_with_v1_ids if l.v1_parent_id]) + assert set(schema1.parent_image_ids) == set([l.v1_parent_id for l in + manifest.layers_with_v1_ids if l.v1_parent_id]) manifest_layers = list(manifest.layers_with_v1_ids) for index, layer in enumerate(schema1.layers): @@ -95,3 +130,45 @@ def test_build_schema1(): for index, digest in enumerate(schema1.blob_digests): assert digest == str(list(manifest.blob_digests)[index]) + + +def test_generate_legacy_layers(): + builder = DockerSchema2ManifestBuilder() + builder.add_layer('sha256:abc123', 123) + builder.add_layer('sha256:def456', 789) + builder.set_config_digest('sha256:def456', 2000) + manifest = builder.build() + + def _lookup_config(digest): + config_str = json.dumps({ + "config": { + }, + "rootfs": {"type": "layers", "diff_ids": []}, + "history": [ + { + "created": "2018-04-03T18:37:09.284840891Z", + "created_by": "foo" + }, + { + "created": "2018-04-12T18:37:09.284840891Z", + "created_by": "bar" + }, + ], + }) + return config_str + ' ' * (2000 - len(config_str)) + + legacy_layers = list(manifest.generate_legacy_layers({}, _lookup_config)) + assert len(legacy_layers) == 2 + assert legacy_layers[0].content_checksum == 'sha256:abc123' + assert legacy_layers[1].content_checksum == 'sha256:def456' + + assert legacy_layers[0].created == "2018-04-03T18:37:09.284840891Z" + assert legacy_layers[1].created == "2018-04-12T18:37:09.284840891Z" + + assert legacy_layers[0].command == '"foo"' + assert legacy_layers[1].command == '"bar"' + + assert legacy_layers[1].parent_image_id == legacy_layers[0].image_id + assert legacy_layers[0].parent_image_id is None + + assert legacy_layers[0].image_id != legacy_layers[1] diff --git a/image/docker/schemas.py b/image/docker/schemas.py new file mode 100644 index 000000000..8bc46051f --- /dev/null +++ b/image/docker/schemas.py @@ -0,0 +1,22 @@ +from image.docker import ManifestException +from image.docker.schema1 import DockerSchema1Manifest, DOCKER_SCHEMA1_CONTENT_TYPES +from image.docker.schema2 import (DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE, + DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE) +from image.docker.schema2.manifest import DockerSchema2Manifest +from image.docker.schema2.list import DockerSchema2ManifestList + + +def parse_manifest_from_bytes(manifest_bytes, media_type, validate=True): + """ Parses and returns a manifest from the given bytes, for the given media type. + Raises a ManifestException if the parse fails for some reason. + """ + if media_type == DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE: + return DockerSchema2Manifest(manifest_bytes) + + if media_type == DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE: + return DockerSchema2ManifestList(manifest_bytes) + + if media_type in DOCKER_SCHEMA1_CONTENT_TYPES: + return DockerSchema1Manifest(manifest_bytes, validate=validate) + + raise ManifestException('Unknown or unsupported manifest media type `%s`' % media_type) diff --git a/image/docker/test/test_schema1.py b/image/docker/test/test_schema1.py index 6fc0d5719..414da365b 100644 --- a/image/docker/test/test_schema1.py +++ b/image/docker/test/test_schema1.py @@ -83,6 +83,14 @@ def test_valid_manifest(): assert manifest.leaf_layer == manifest.layers[1] assert manifest.created_datetime is None + unsigned = manifest.unsigned() + assert unsigned.namespace == manifest.namespace + assert unsigned.repo_name == manifest.repo_name + assert unsigned.tag == manifest.tag + assert unsigned.layers == manifest.layers + assert unsigned.blob_digests == manifest.blob_digests + assert unsigned.digest != manifest.digest + def test_validate_manifest(): test_dir = os.path.dirname(os.path.abspath(__file__)) diff --git a/image/docker/test/test_schemas.py b/image/docker/test/test_schemas.py new file mode 100644 index 000000000..143323fd1 --- /dev/null +++ b/image/docker/test/test_schemas.py @@ -0,0 +1,18 @@ +import pytest + +from image.docker.schemas import parse_manifest_from_bytes +from image.docker.schema1 import DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE +from image.docker.schema2 import DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE +from image.docker.schema2 import DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE +from image.docker.test.test_schema1 import MANIFEST_BYTES as SCHEMA1_BYTES +from image.docker.schema2.test.test_list import MANIFESTLIST_BYTES +from image.docker.schema2.test.test_manifest import MANIFEST_BYTES as SCHEMA2_BYTES + + +@pytest.mark.parametrize('media_type, manifest_bytes', [ + (DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE, SCHEMA1_BYTES), + (DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE, SCHEMA2_BYTES), + (DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE, MANIFESTLIST_BYTES), +]) +def test_parse_manifest_from_bytes(media_type, manifest_bytes): + assert parse_manifest_from_bytes(manifest_bytes, media_type, validate=False) diff --git a/workers/manifestbackfillworker.py b/workers/manifestbackfillworker.py index decb9dd44..3dd4d2153 100644 --- a/workers/manifestbackfillworker.py +++ b/workers/manifestbackfillworker.py @@ -62,6 +62,18 @@ class BrokenManifest(ManifestInterface): @property def blob_digests(self): return [] + + def child_manifests(self, lookup_manifest_fn): + return None + + def get_manifest_labels(self, lookup_config_fn): + return {} + + def unsigned(self): + return self + + def generate_legacy_layers(self, images_map, lookup_config_fn): + return None class ManifestBackfillWorker(Worker):