diff --git a/image/docker/interfaces.py b/image/docker/interfaces.py index 008a7eb23..3fd498a79 100644 --- a/image/docker/interfaces.py +++ b/image/docker/interfaces.py @@ -38,24 +38,24 @@ class ManifestInterface(object): does not support layers. """ pass - @abstractproperty - def leaf_layer_v1_image_id(self): - """ Returns the Docker V1 image ID for the leaf (top) layer, if any, or None if - not applicable. """ - pass - - @abstractproperty - def legacy_image_ids(self): - """ Returns the Docker V1 image IDs for the layers of this manifest or None if not applicable. - """ - pass - @abstractproperty def layers_compressed_size(self): """ Returns the total compressed size of all the layers in this manifest. Returns None if this cannot be computed locally. """ + @abstractmethod + def get_leaf_layer_v1_image_id(self, content_retriever): + """ Returns the Docker V1 image ID for the leaf (top) layer, if any, or None if + not applicable. """ + pass + + @abstractmethod + def get_legacy_image_ids(self, content_retriever): + """ Returns the Docker V1 image IDs for the layers of this manifest or None if not applicable. + """ + pass + @abstractproperty def blob_digests(self): """ Returns an iterator over all the blob digests referenced by this manifest, @@ -86,10 +86,15 @@ class ManifestInterface(object): def unsigned(self): """ Returns an unsigned version of this manifest. """ + @abstractproperty + def has_legacy_image(self): + """ Returns True if this manifest has a legacy V1 image, or False if not. """ + @abstractmethod def generate_legacy_layers(self, images_map, content_retriever): """ - Rewrites Docker v1 image IDs and returns a generator of DockerV1Metadata. + Rewrites Docker v1 image IDs and returns a generator of DockerV1Metadata, starting + at the base layer and working towards the leaf. If Docker gives us a layer with a v1 image ID that already points to existing content, but the checksums don't match, then we need to rewrite the image ID @@ -99,15 +104,16 @@ class ManifestInterface(object): """ @abstractmethod - def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, content_retriever): - """ Returns the manifest that is compatible with V1, by virtue of being `amd64` and `linux`. + def get_schema1_manifest(self, namespace_name, repo_name, tag_name, content_retriever): + """ Returns a schema1 version of the manifest. If this is a mainfest list, should return the + manifest that is compatible with V1, by virtue of being `amd64` and `linux`. If none, returns None. """ @add_metaclass(ABCMeta) class ContentRetriever(object): - """ Defines the interface for retrieval of various content referneced by a manifest. """ + """ Defines the interface for retrieval of various content referenced by a manifest. """ @abstractmethod def get_manifest_bytes_with_digest(self, digest): """ Returns the bytes of the manifest with the given digest or None if none found. """ diff --git a/image/docker/schema1.py b/image/docker/schema1.py index 23abb3a84..7c5f1a3a2 100644 --- a/image/docker/schema1.py +++ b/image/docker/schema1.py @@ -267,10 +267,6 @@ class DockerSchema1Manifest(ManifestInterface): def image_ids(self): return {mdata.v1_metadata.image_id for mdata in self.layers} - @property - def legacy_image_ids(self): - return {mdata.v1_metadata.image_id for mdata in self.layers} - @property def parent_image_ids(self): return {mdata.v1_metadata.parent_image_id for mdata in self.layers @@ -280,10 +276,6 @@ class DockerSchema1Manifest(ManifestInterface): def checksums(self): return list({str(mdata.digest) for mdata in self.layers}) - @property - def leaf_layer_v1_image_id(self): - return self.layers[-1].v1_metadata.image_id - @property def leaf_layer(self): return self.layers[-1] @@ -377,7 +369,25 @@ class DockerSchema1Manifest(ManifestInterface): def generate_legacy_layers(self, images_map, content_retriever): return self.rewrite_invalid_image_ids(images_map) - def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, content_retriever): + def get_legacy_image_ids(self, content_retriever): + return self.legacy_image_ids + + @property + def legacy_image_ids(self): + return {mdata.v1_metadata.image_id for mdata in self.layers} + + @property + def has_legacy_image(self): + return True + + @property + def leaf_layer_v1_image_id(self): + return self.layers[-1].v1_metadata.image_id + + def get_leaf_layer_v1_image_id(self, content_retriever): + return self.layers[-1].v1_metadata.image_id + + def get_schema1_manifest(self, namespace_name, repo_name, tag_name, content_retriever): """ Returns the manifest that is compatible with V1, by virtue of being `amd64` and `linux`. If none, returns None. """ diff --git a/image/docker/schema2/config.py b/image/docker/schema2/config.py index a54adffdc..ea8551f38 100644 --- a/image/docker/schema2/config.py +++ b/image/docker/schema2/config.py @@ -94,6 +94,7 @@ Example: import copy import json +import hashlib from collections import namedtuple from jsonschema import validate as validate_schema, ValidationError @@ -111,7 +112,8 @@ DOCKER_SCHEMA2_CONFIG_EMPTY_LAYER_KEY = "empty_layer" DOCKER_SCHEMA2_CONFIG_TYPE_KEY = "type" -LayerHistory = namedtuple('LayerHistory', ['created', 'created_datetime', 'command', 'is_empty']) +LayerHistory = namedtuple('LayerHistory', ['created', 'created_datetime', 'command', 'is_empty', + 'raw_entry']) class MalformedSchema2Config(ManifestException): @@ -211,30 +213,25 @@ class DockerSchema2Config(object): yield LayerHistory(created_datetime=created_datetime, created=history_entry[DOCKER_SCHEMA2_CONFIG_CREATED_KEY], command=history_entry[DOCKER_SCHEMA2_CONFIG_CREATED_BY_KEY], - is_empty=history_entry.get(DOCKER_SCHEMA2_CONFIG_EMPTY_LAYER_KEY, False)) + is_empty=history_entry.get(DOCKER_SCHEMA2_CONFIG_EMPTY_LAYER_KEY, False), + raw_entry=history_entry) - def build_v1_compatibility(self, layer_index, v1_id, v1_parent_id, compressed_size=None): + def build_v1_compatibility(self, history, v1_id, v1_parent_id, is_leaf, compressed_size=None): """ Builds the V1 compatibility block for the given layer. - - Note that the layer_index is 0-indexed, with the *base* layer being 0, and the leaf - layer being last. """ - history = list(self.history) - assert layer_index < len(history) - # If the layer is the leaf, it gets the full config (minus 2 fields). Otherwise, it gets only # IDs. - v1_compatibility = copy.deepcopy(self._parsed) if layer_index == len(history) - 1 else {} + v1_compatibility = copy.deepcopy(self._parsed) if is_leaf else {} v1_compatibility['id'] = v1_id if v1_parent_id is not None: v1_compatibility['parent'] = v1_parent_id if 'created' not in v1_compatibility: - v1_compatibility['created'] = history[layer_index].created + v1_compatibility['created'] = history.created if 'container_config' not in v1_compatibility: v1_compatibility['container_config'] = { - 'Cmd': history[layer_index].command, + 'Cmd': [history.command], } if compressed_size is not None: diff --git a/image/docker/schema2/list.py b/image/docker/schema2/list.py index 65a18ab8f..9f153ddbb 100644 --- a/image/docker/schema2/list.py +++ b/image/docker/schema2/list.py @@ -215,14 +215,6 @@ class DockerSchema2ManifestList(ManifestInterface): def layers(self): return None - @property - def leaf_layer_v1_image_id(self): - return None - - @property - def legacy_image_ids(self): - return None - @property def blob_digests(self): # Manifest lists have no blob digests, since everything is stored as a manifest. @@ -253,7 +245,17 @@ class DockerSchema2ManifestList(ManifestInterface): def get_manifest_labels(self, content_retriever): return None - def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, content_retriever): + def get_leaf_layer_v1_image_id(self, content_retriever): + return None + + def get_legacy_image_ids(self, content_retriever): + return None + + @property + def has_legacy_image(self): + return False + + def get_schema1_manifest(self, namespace_name, repo_name, tag_name, content_retriever): """ Returns the manifest that is compatible with V1, by virtue of being `amd64` and `linux`. If none, returns None. """ @@ -270,8 +272,7 @@ class DockerSchema2ManifestList(ManifestInterface): logger.exception('Could not load child manifest') return None - return manifest.get_v1_compatible_manifest(namespace_name, repo_name, tag_name, - content_retriever) + return manifest.get_schema1_manifest(namespace_name, repo_name, tag_name, content_retriever) return None diff --git a/image/docker/schema2/manifest.py b/image/docker/schema2/manifest.py index db24e776b..3278b0c95 100644 --- a/image/docker/schema2/manifest.py +++ b/image/docker/schema2/manifest.py @@ -30,7 +30,11 @@ DockerV2ManifestLayer = namedtuple('DockerV2ManifestLayer', ['index', 'digest', 'is_remote', 'urls', 'compressed_size']) -LayerWithV1ID = namedtuple('LayerWithV1ID', ['layer', 'v1_id', 'v1_parent_id', 'compressed_size']) +ManifestImageLayer = namedtuple('ManifestImageLayer', ['history', 'blob_layer', 'v1_id', + 'v1_parent_id', 'compressed_size', + 'blob_digest']) + +EMPTY_BLOB_DIGEST = 'sha256:a3ed95caeb02ffe68cdd9fd84406680ae93d633cb16422d00e8a7c22955b46d4' logger = logging.getLogger(__name__) @@ -174,12 +178,13 @@ class DockerSchema2Manifest(ManifestInterface): return self._layers @property - def layers_compressed_size(self): - return sum(layer.compressed_size for layer in self.layers) + def leaf_layer(self): + """ Returns the leaf layer for this manifest. """ + return self.layers[-1] @property - def leaf_layer(self): - return self.layers[-1] + def layers_compressed_size(self): + return sum(layer.compressed_size for layer in self.layers) @property def has_remote_layer(self): @@ -189,22 +194,6 @@ class DockerSchema2Manifest(ManifestInterface): return False - @property - def leaf_layer_v1_image_id(self): - # NOTE: If there exists a layer with remote content, then we consider this manifest - # to not support legacy images. - if self.has_remote_layer: - return None - - return list(self.layers_with_v1_ids)[-1].v1_id - - @property - def legacy_image_ids(self): - if self.has_remote_layer: - return None - - return [l.v1_id for l in self.layers_with_v1_ids] - @property def blob_digests(self): return [str(layer.digest) for layer in self.layers] + [str(self.config.digest)] @@ -217,6 +206,112 @@ class DockerSchema2Manifest(ManifestInterface): def get_manifest_labels(self, content_retriever): return self._get_built_config(content_retriever).labels + @property + def bytes(self): + return self._payload + + def child_manifests(self, content_retriever): + return None + + def _manifest_image_layers(self, content_retriever, schema2_config=None): + assert not self.has_remote_layer + + # Retrieve the configuration for the manifest. + config = schema2_config or self._get_built_config(content_retriever) + history = list(config.history) + if len(history) < len(self.layers): + raise MalformedSchema2Manifest('Found less history than layer blobs') + + digest_history = hashlib.sha256() + v1_layer_parent_id = None + v1_layer_id = None + blob_index = 0 + + for history_index, history_entry in enumerate(history): + if blob_index >= len(self.layers): + raise MalformedSchema2Manifest('Missing history entry #%s' % blob_index) + + v1_layer_parent_id = v1_layer_id + blob_layer = None if history_entry.is_empty else self.layers[blob_index] + blob_digest = EMPTY_BLOB_DIGEST if blob_layer is None else str(blob_layer.digest) + compressed_size = 0 if blob_layer is None else blob_layer.compressed_size + + # Create a new synthesized V1 ID for the history layer by hashing its content and + # the blob associated withn it. + digest_history.update(json.dumps(history_entry.raw_entry)) + digest_history.update("|") + digest_history.update(str(history_index)) + digest_history.update("|") + digest_history.update(blob_digest) + digest_history.update("||") + + v1_layer_id = digest_history.hexdigest() + yield ManifestImageLayer(history=history_entry, + blob_layer=blob_layer, + blob_digest=blob_digest, + v1_id=v1_layer_id, + v1_parent_id=v1_layer_parent_id, + compressed_size=compressed_size) + + if not history_entry.is_empty: + blob_index += 1 + + @property + def has_legacy_image(self): + return not self.has_remote_layer + + def generate_legacy_layers(self, images_map, content_retriever): + assert not self.has_remote_layer + + # NOTE: We use the DockerSchema1ManifestBuilder here because it already contains + # the logic for generating the DockerV1Metadata. All of this will go away once we get + # rid of legacy images in the database, so this is a temporary solution. + v1_builder = DockerSchema1ManifestBuilder('', '', '') + self._populate_schema1_builder(v1_builder, content_retriever) + return v1_builder.build().generate_legacy_layers(images_map, content_retriever) + + def get_leaf_layer_v1_image_id(self, content_retriever): + # NOTE: If there exists a layer with remote content, then we consider this manifest + # to not support legacy images. + if self.has_remote_layer: + return None + + return list(self._manifest_image_layers(content_retriever))[-1].v1_id + + def get_legacy_image_ids(self, content_retriever): + if self.has_remote_layer: + return None + + return [l.v1_id for l in self._manifest_image_layers(content_retriever)] + + def get_schema1_manifest(self, namespace_name, repo_name, tag_name, content_retriever): + if self.has_remote_layer: + return None + + v1_builder = DockerSchema1ManifestBuilder(namespace_name, repo_name, tag_name) + self._populate_schema1_builder(v1_builder, content_retriever) + return v1_builder.build() + + def unsigned(self): + return self + + def _populate_schema1_builder(self, v1_builder, content_retriever): + """ Populates a DockerSchema1ManifestBuilder with the layers and config from + this schema. + """ + assert not self.has_remote_layer + schema2_config = self._get_built_config(content_retriever) + layers = list(self._manifest_image_layers(content_retriever, schema2_config)) + for index, layer in enumerate(reversed(layers)): # Schema 1 layers are in reverse order + v1_compatibility = schema2_config.build_v1_compatibility(layer.history, + layer.v1_id, + layer.v1_parent_id, + index == 0, + layer.compressed_size) + v1_builder.add_layer(str(layer.blob_digest), json.dumps(v1_compatibility)) + + return v1_builder + def _get_built_config(self, content_retriever): config_bytes = content_retriever.get_blob_bytes_with_digest(self.config.digest) if config_bytes is None: @@ -228,13 +323,6 @@ class DockerSchema2Manifest(ManifestInterface): return DockerSchema2Config(config_bytes) - @property - def bytes(self): - return self._payload - - def child_manifests(self, content_retriever): - return None - def _generate_layers(self): for index, layer in enumerate(self._parsed[DOCKER_SCHEMA2_MANIFEST_LAYERS_KEY]): content_type = layer[DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY] @@ -252,66 +340,6 @@ class DockerSchema2Manifest(ManifestInterface): is_remote=is_remote, urls=layer.get(DOCKER_SCHEMA2_MANIFEST_URLS_KEY)) - @property - def layers_with_v1_ids(self): - assert not self.has_remote_layer - digest_history = hashlib.sha256() - v1_layer_parent_id = None - v1_layer_id = None - - for layer in self.layers: - v1_layer_parent_id = v1_layer_id - - # Create a new synthesized V1 ID for the layer by adding its digest and index to the - # existing digest history hash builder. This will ensure unique V1s across *all* schemas in - # a repository. - digest_history.update(str(layer.digest)) - digest_history.update("#") - digest_history.update(str(layer.index)) - digest_history.update("|") - v1_layer_id = digest_history.hexdigest() - yield LayerWithV1ID(layer=layer, v1_id=v1_layer_id, v1_parent_id=v1_layer_parent_id, - compressed_size=layer.compressed_size) - - def populate_schema1_builder(self, v1_builder, content_retriever): - """ Populates a DockerSchema1ManifestBuilder with the layers and config from - this schema. - """ - assert not self.has_remote_layer - schema2_config = self._get_built_config(content_retriever) - - # Build the V1 IDs for the layers. - layers = list(self.layers_with_v1_ids) - for layer_with_ids in reversed(layers): # Schema1 has layers in reverse order - v1_compatibility = schema2_config.build_v1_compatibility(layer_with_ids.layer.index, - layer_with_ids.v1_id, - layer_with_ids.v1_parent_id, - layer_with_ids.compressed_size) - v1_builder.add_layer(str(layer_with_ids.layer.digest), json.dumps(v1_compatibility)) - - return v1_builder - - def generate_legacy_layers(self, images_map, content_retriever): - assert not self.has_remote_layer - - # NOTE: We use the DockerSchema1ManifestBuilder here because it already contains - # the logic for generating the DockerV1Metadata. All of this will go away once we get - # rid of legacy images in the database, so this is a temporary solution. - v1_builder = DockerSchema1ManifestBuilder('', '', '') - self.populate_schema1_builder(v1_builder, content_retriever) - return v1_builder.build().generate_legacy_layers(images_map, content_retriever) - - def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, content_retriever): - if self.has_remote_layer: - return None - - v1_builder = DockerSchema1ManifestBuilder(namespace_name, repo_name, tag_name) - self.populate_schema1_builder(v1_builder, content_retriever) - return v1_builder.build() - - def unsigned(self): - return self - class DockerSchema2ManifestBuilder(object): """ diff --git a/image/docker/schema2/test/conversion_data.py b/image/docker/schema2/test/conversion_data.py new file mode 100644 index 000000000..e369fbbc3 --- /dev/null +++ b/image/docker/schema2/test/conversion_data.py @@ -0,0 +1,142 @@ +SCHEMA1_BYTES = r"""{ + "schemaVersion": 1, + "name": "devtable/somerepo", + "tag": "latest", + "architecture": "amd64", + "fsLayers": [ + { + "blobSum": "sha256:28b98663b93a1c984379691300f284ee1536db1b6ecd8a1d59222528f80cee89" + }, + { + "blobSum": "sha256:a3ed95caeb02ffe68cdd9fd84406680ae93d633cb16422d00e8a7c22955b46d4" + }, + { + "blobSum": "sha256:90e01955edcd85dac7985b72a8374545eac617ccdddcc992b732e43cd42534af" + } + ], + "history": [ + { + "v1Compatibility": "{\"architecture\":\"amd64\",\"config\":{\"Hostname\":\"\",\"Domainname\":\"\",\"User\":\"\",\"AttachStdin\":false,\"AttachStdout\":false,\"AttachStderr\":false,\"Tty\":false,\"OpenStdin\":false,\"StdinOnce\":false,\"Env\":[\"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\"],\"Cmd\":[\"sh\"],\"Image\":\"\",\"Volumes\":null,\"WorkingDir\":\"\",\"Entrypoint\":null,\"OnBuild\":null,\"Labels\":{}},\"container\":\"86fff20ea922659929a4716850cc9b3a2cca6c197f7a7ece7da5b6d9d8ac4954\",\"container_config\":{\"Hostname\":\"86fff20ea922\",\"Domainname\":\"\",\"User\":\"\",\"AttachStdin\":true,\"AttachStdout\":true,\"AttachStderr\":true,\"Tty\":true,\"OpenStdin\":true,\"StdinOnce\":true,\"Env\":[\"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\"],\"Cmd\":[\"sh\"],\"Image\":\"busybox\",\"Volumes\":null,\"WorkingDir\":\"\",\"Entrypoint\":null,\"OnBuild\":null,\"Labels\":{}},\"created\":\"2018-11-20T21:15:01.569237Z\",\"docker_version\":\"17.09.0-ce\",\"id\":\"692854afd8718d5285bf99cecfc9d6385f41122d3cea70fc9961b3f23ae0d768\",\"os\":\"linux\",\"parent\":\"61b2663f44edc9a6af340b9bfd46d17d8ed2574ffe289e0d95c0476da3c6faac\"}" + }, + { + "v1Compatibility": "{\"id\":\"61b2663f44edc9a6af340b9bfd46d17d8ed2574ffe289e0d95c0476da3c6faac\",\"parent\":\"5327db1e651c0f49157ace3ffd8569c7361b1f2e61d0b49ff617e83a42bf78d6\",\"created\":\"2018-10-02T17:19:34.239926273Z\",\"container_config\":{\"Cmd\":[\"/bin/sh -c #(nop) CMD [\\\"sh\\\"]\"]},\"throwaway\":true}" + }, + { + "v1Compatibility": "{\"id\":\"5327db1e651c0f49157ace3ffd8569c7361b1f2e61d0b49ff617e83a42bf78d6\",\"created\":\"2018-10-02T17:19:34.03981888Z\",\"container_config\":{\"Cmd\":[\"/bin/sh -c #(nop) ADD file:63eebd629a5f7558c361be0305df5f16baac1d3bbec014b7c486e28812441969 in / \"]}}" + } + ], + "signatures": [ + { + "header": { + "jwk": { + "crv": "P-256", + "kid": "AARA:PFUD:3V54:7F2S:2P7E:WMCU:WRE7:KUYD:CFKH:UHZ7:AZ4I:UQEX", + "kty": "EC", + "x": "34N4h_uM7FedPw4k3_VabKlt7qoBWpHgpko7zE0RkeY", + "y": "LhxxtCYh_b1EwUbl3-tQFTbg1mTu34vMxj4UaKjWZk8" + }, + "alg": "ES256" + }, + "signature": "4-nlo2R9Dn3PIGHuhvPkamCzLgFYURziihwZYAnmw5eMKLRj4ir-VeEJI30mDh8ArTeDo-PnMLRNZGRX2NwXHw", + "protected": "eyJmb3JtYXRMZW5ndGgiOjIzNDEsImZvcm1hdFRhaWwiOiJDbjAiLCJ0aW1lIjoiMjAxOC0xMS0yMFQyMToxNzozMVoifQ" + } + ] +}""" + +SCHEMA2_MANIFEST_BYTES = r"""{ + "schemaVersion": 2, + "mediaType": "application/vnd.docker.distribution.manifest.v2+json", + "config": { + "mediaType": "application/vnd.docker.container.image.v1+json", + "size": 1829, + "digest": "sha256:e7a06c2e5b7afb1bbfa9124812e87f1138c4c10d77e0a217f0b8c8c9694dc5cf" + }, + "layers": [ + { + "mediaType": "application/vnd.docker.image.rootfs.diff.tar.gzip", + "size": 727978, + "digest": "sha256:90e01955edcd85dac7985b72a8374545eac617ccdddcc992b732e43cd42534af" + }, + { + "mediaType": "application/vnd.docker.image.rootfs.diff.tar.gzip", + "size": 190, + "digest": "sha256:28b98663b93a1c984379691300f284ee1536db1b6ecd8a1d59222528f80cee89" + } + ] +}""" + +SCHEMA2_CONFIG_BYTES = r"""{ + "architecture": "amd64", + "config": { + "Hostname": "", + "Domainname": "", + "User": "", + "AttachStdin": false, + "AttachStdout": false, + "AttachStderr": false, + "Tty": false, + "OpenStdin": false, + "StdinOnce": false, + "Env": [ + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" + ], + "Cmd": [ + "sh" + ], + "Image": "", + "Volumes": null, + "WorkingDir": "", + "Entrypoint": null, + "OnBuild": null, + "Labels": {} + }, + "container": "86fff20ea922659929a4716850cc9b3a2cca6c197f7a7ece7da5b6d9d8ac4954", + "container_config": { + "Hostname": "86fff20ea922", + "Domainname": "", + "User": "", + "AttachStdin": true, + "AttachStdout": true, + "AttachStderr": true, + "Tty": true, + "OpenStdin": true, + "StdinOnce": true, + "Env": [ + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" + ], + "Cmd": [ + "sh" + ], + "Image": "busybox", + "Volumes": null, + "WorkingDir": "", + "Entrypoint": null, + "OnBuild": null, + "Labels": {} + }, + "created": "2018-11-20T21:15:01.569237Z", + "docker_version": "17.09.0-ce", + "history": [ + { + "created": "2018-10-02T17:19:34.03981888Z", + "created_by": "/bin/sh -c #(nop) ADD file:63eebd629a5f7558c361be0305df5f16baac1d3bbec014b7c486e28812441969 in / " + }, + { + "created": "2018-10-02T17:19:34.239926273Z", + "created_by": "/bin/sh -c #(nop) CMD [\"sh\"]", + "empty_layer": true + }, + { + "created": "2018-11-20T21:15:01.569237Z", + "created_by": "sh" + } + ], + "os": "linux", + "rootfs": { + "type": "layers", + "diff_ids": [ + "sha256:8a788232037eaf17794408ff3df6b922a1aedf9ef8de36afdae3ed0b0381907b", + "sha256:70d967d052ce14cd372b12663d84046ade5712c3a4ece6078cdb63e75bbfcfa1" + ] + } +}""" \ No newline at end of file diff --git a/image/docker/schema2/test/test_config.py b/image/docker/schema2/test/test_config.py index 2c2b2f6d5..c0e86c05f 100644 --- a/image/docker/schema2/test/test_config.py +++ b/image/docker/schema2/test/test_config.py @@ -118,7 +118,8 @@ def test_valid_config(): assert history[2].command == 'sh' for index, history_entry in enumerate(history): - v1_compat = config.build_v1_compatibility(index, 'somev1id', 'someparentid') + v1_compat = config.build_v1_compatibility(history_entry, 'somev1id', 'someparentid', + index == 3) assert v1_compat['id'] == 'somev1id' assert v1_compat['parent'] == 'someparentid' @@ -126,6 +127,6 @@ def test_valid_config(): assert v1_compat['container_config'] == config._parsed['container_config'] else: assert 'Hostname' not in v1_compat['container_config'] - assert v1_compat['container_config']['Cmd'] == history_entry.command + assert v1_compat['container_config']['Cmd'] == [history_entry.command] assert config.labels == {} diff --git a/image/docker/schema2/test/test_conversion.py b/image/docker/schema2/test/test_conversion.py new file mode 100644 index 000000000..deb1621b4 --- /dev/null +++ b/image/docker/schema2/test/test_conversion.py @@ -0,0 +1,65 @@ +import json + +from image.docker.schema1 import DockerSchema1Manifest +from image.docker.schema2.manifest import DockerSchema2Manifest +from image.docker.schema2.test.conversion_data import (SCHEMA1_BYTES, SCHEMA2_MANIFEST_BYTES, + SCHEMA2_CONFIG_BYTES) +from image.docker.schemautil import ContentRetrieverForTesting + +def test_legacy_layers(): + retriever = ContentRetrieverForTesting({ + 'sha256:e7a06c2e5b7afb1bbfa9124812e87f1138c4c10d77e0a217f0b8c8c9694dc5cf': SCHEMA2_CONFIG_BYTES, + }) + + schema2 = DockerSchema2Manifest(SCHEMA2_MANIFEST_BYTES) + schema1 = DockerSchema1Manifest(SCHEMA1_BYTES, validate=False) + + # Check legacy layers + schema2_legacy_layers = list(schema2.generate_legacy_layers({}, retriever)) + schema1_legacy_layers = list(schema1.generate_legacy_layers({}, retriever)) + assert len(schema1_legacy_layers) == len(schema2_legacy_layers) + + for index in range(0, len(schema1_legacy_layers)): + schema1_legacy_layer = schema1_legacy_layers[index] + schema2_legacy_layer = schema2_legacy_layers[index] + assert schema1_legacy_layer.content_checksum == schema2_legacy_layer.content_checksum + assert schema1_legacy_layer.comment == schema2_legacy_layer.comment + assert schema1_legacy_layer.command == schema2_legacy_layer.command + + +def test_conversion(): + retriever = ContentRetrieverForTesting({ + 'sha256:e7a06c2e5b7afb1bbfa9124812e87f1138c4c10d77e0a217f0b8c8c9694dc5cf': SCHEMA2_CONFIG_BYTES, + }) + + schema2 = DockerSchema2Manifest(SCHEMA2_MANIFEST_BYTES) + schema1 = DockerSchema1Manifest(SCHEMA1_BYTES, validate=False) + + converted = schema2.get_schema1_manifest('devtable', 'somerepo', 'latest', retriever) + assert len(converted.layers) == len(schema1.layers) + + image_id_map = {} + for index in range(0, len(converted.layers)): + converted_layer = converted.layers[index] + schema1_layer = schema1.layers[index] + + image_id_map[schema1_layer.v1_metadata.image_id] = converted_layer.v1_metadata.image_id + + assert str(schema1_layer.digest) == str(converted_layer.digest) + + schema1_parent_id = schema1_layer.v1_metadata.parent_image_id + converted_parent_id = converted_layer.v1_metadata.parent_image_id + assert (schema1_parent_id is None) == (converted_parent_id is None) + + if schema1_parent_id is not None: + assert image_id_map[schema1_parent_id] == converted_parent_id + + assert schema1_layer.v1_metadata.created == converted_layer.v1_metadata.created + assert schema1_layer.v1_metadata.comment == converted_layer.v1_metadata.comment + assert schema1_layer.v1_metadata.command == converted_layer.v1_metadata.command + assert schema1_layer.v1_metadata.labels == converted_layer.v1_metadata.labels + + schema1_container_config = json.loads(schema1_layer.raw_v1_metadata)['container_config'] + converted_container_config = json.loads(converted_layer.raw_v1_metadata)['container_config'] + + assert schema1_container_config == converted_container_config diff --git a/image/docker/schema2/test/test_list.py b/image/docker/schema2/test/test_list.py index 8fbdfbe98..0960e2b44 100644 --- a/image/docker/schema2/test/test_list.py +++ b/image/docker/schema2/test/test_list.py @@ -79,6 +79,7 @@ def test_valid_manifestlist(): assert manifestlist.media_type == 'application/vnd.docker.distribution.manifest.list.v2+json' assert manifestlist.bytes == MANIFESTLIST_BYTES assert manifestlist.manifest_dict == json.loads(MANIFESTLIST_BYTES) + assert manifestlist.layers is None assert not manifestlist.blob_digests for index, manifest in enumerate(manifestlist.manifests(retriever)): @@ -89,22 +90,18 @@ def test_valid_manifestlist(): assert isinstance(manifest.manifest_obj, DockerSchema1Manifest) assert manifest.manifest_obj.schema_version == 1 - compatible_manifest = manifestlist.get_v1_compatible_manifest('foo', 'bar', 'baz', retriever) + compatible_manifest = manifestlist.get_schema1_manifest('foo', 'bar', 'baz', retriever) assert compatible_manifest.schema_version == 1 - assert manifestlist.layers is None - assert manifestlist.leaf_layer_v1_image_id is None - assert manifestlist.legacy_image_ids is None - -def test_get_v1_compatible_manifest_no_matching_list(): +def test_get_schema1_manifest_no_matching_list(): manifestlist = DockerSchema2ManifestList(NO_AMD_MANIFESTLIST_BYTES) assert len(manifestlist.manifests(retriever)) == 1 assert manifestlist.media_type == 'application/vnd.docker.distribution.manifest.list.v2+json' assert manifestlist.bytes == NO_AMD_MANIFESTLIST_BYTES - compatible_manifest = manifestlist.get_v1_compatible_manifest('foo', 'bar', 'baz', retriever) + compatible_manifest = manifestlist.get_schema1_manifest('foo', 'bar', 'baz', retriever) assert compatible_manifest is None diff --git a/image/docker/schema2/test/test_manifest.py b/image/docker/schema2/test/test_manifest.py index 1a515893f..4a875c794 100644 --- a/image/docker/schema2/test/test_manifest.py +++ b/image/docker/schema2/test/test_manifest.py @@ -6,7 +6,7 @@ from image.docker.schema1 import (DockerSchema1ManifestBuilder, DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE, DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE) from image.docker.schema2.manifest import (MalformedSchema2Manifest, DockerSchema2Manifest, - DockerSchema2ManifestBuilder) + DockerSchema2ManifestBuilder, EMPTY_BLOB_DIGEST) from image.docker.schema2.test.test_config import CONFIG_BYTES from image.docker.schemautil import ContentRetrieverForTesting @@ -96,6 +96,7 @@ def test_valid_manifest(): assert str(manifest.config.digest) == 'sha256:b5b2b2c507a0944348e0303114d8d93aaaa081732b86451d9bce1f432a537bc7' assert manifest.media_type == "application/vnd.docker.distribution.manifest.v2+json" assert not manifest.has_remote_layer + assert manifest.has_legacy_image assert len(manifest.layers) == 4 assert manifest.layers[0].compressed_size == 1234 @@ -111,6 +112,36 @@ def test_valid_manifest(): assert blob_digests == expected assert list(manifest.local_blob_digests) == expected + retriever = ContentRetrieverForTesting.for_config({ + "config": { + "Labels": {}, + }, + "rootfs": {"type": "layers", "diff_ids": []}, + "history": [ + { + "created": "2018-04-03T18:37:09.284840891Z", + "created_by": "foo" + }, + { + "created": "2018-04-12T18:37:09.284840891Z", + "created_by": "bar" + }, + { + "created": "2018-04-03T18:37:09.284840891Z", + "created_by": "foo" + }, + { + "created": "2018-04-12T18:37:09.284840891Z", + "created_by": "bar" + }, + ], + }, 'sha256:b5b2b2c507a0944348e0303114d8d93aaaa081732b86451d9bce1f432a537bc7', 1885) + + manifest_image_layers = list(manifest._manifest_image_layers(retriever)) + assert len(manifest_image_layers) == len(list(manifest.layers)) + for index in range(0, 4): + assert manifest_image_layers[index].blob_digest == str(manifest.layers[index].digest) + def test_valid_remote_manifest(): manifest = DockerSchema2Manifest(REMOTE_MANIFEST_BYTES) @@ -138,8 +169,8 @@ def test_valid_remote_manifest(): assert local_digests == (expected - {manifest.layers[0].digest}) assert manifest.has_remote_layer - assert manifest.leaf_layer_v1_image_id is None - assert manifest.legacy_image_ids is None + assert manifest.get_leaf_layer_v1_image_id(None) is None + assert manifest.get_legacy_image_ids(None) is None def test_schema2_builder(): @@ -179,26 +210,13 @@ def test_build_schema1(): }) builder = DockerSchema1ManifestBuilder('somenamespace', 'somename', 'sometag') - manifest.populate_schema1_builder(builder, retriever) + manifest._populate_schema1_builder(builder, retriever) schema1 = builder.build(docker_v2_signing_key) assert schema1.media_type == DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE - assert len(schema1.layers) == len(manifest.layers) - assert set(schema1.image_ids) == set([l.v1_id for l in manifest.layers_with_v1_ids]) - assert set(schema1.parent_image_ids) == set([l.v1_parent_id for l in - manifest.layers_with_v1_ids if l.v1_parent_id]) - - manifest_layers = list(manifest.layers_with_v1_ids) - for index, layer in enumerate(schema1.layers): - assert layer.digest == manifest_layers[index].layer.digest - assert layer.v1_metadata.image_id == manifest_layers[index].v1_id - assert layer.v1_metadata.parent_image_id == manifest_layers[index].v1_parent_id - - for index, digest in enumerate(schema1.blob_digests): - assert digest == str(list(manifest.blob_digests)[index]) -def test_get_v1_compatible_manifest(): +def test_get_schema1_manifest(): retriever = ContentRetrieverForTesting.for_config({ "config": { "Labels": {}, @@ -225,13 +243,9 @@ def test_get_v1_compatible_manifest(): }, 'sha256:b5b2b2c507a0944348e0303114d8d93aaaa081732b86451d9bce1f432a537bc7', 1885) manifest = DockerSchema2Manifest(MANIFEST_BYTES) - schema1 = manifest.get_v1_compatible_manifest('somenamespace', 'somename', 'sometag', retriever) + schema1 = manifest.get_schema1_manifest('somenamespace', 'somename', 'sometag', retriever) assert schema1 is not None assert schema1.media_type == DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE - assert len(schema1.layers) == len(manifest.layers) - assert set(schema1.image_ids) == set([l.v1_id for l in manifest.layers_with_v1_ids]) - assert set(schema1.parent_image_ids) == set([l.v1_parent_id for l in - manifest.layers_with_v1_ids if l.v1_parent_id]) def test_generate_legacy_layers(): @@ -248,29 +262,39 @@ def test_generate_legacy_layers(): "history": [ { "created": "2018-04-03T18:37:09.284840891Z", - "created_by": "foo" + "created_by": "base" + }, + { + "created": "2018-04-06T18:37:09.284840891Z", + "created_by": "middle", + "empty_layer": True, }, { "created": "2018-04-12T18:37:09.284840891Z", - "created_by": "bar" + "created_by": "leaf" }, ], }, 'sha256:def456', 2000) legacy_layers = list(manifest.generate_legacy_layers({}, retriever)) - assert len(legacy_layers) == 2 + assert len(legacy_layers) == 3 assert legacy_layers[0].content_checksum == 'sha256:abc123' - assert legacy_layers[1].content_checksum == 'sha256:def456' + assert legacy_layers[1].content_checksum == EMPTY_BLOB_DIGEST + assert legacy_layers[2].content_checksum == 'sha256:def456' assert legacy_layers[0].created == "2018-04-03T18:37:09.284840891Z" - assert legacy_layers[1].created == "2018-04-12T18:37:09.284840891Z" + assert legacy_layers[1].created == "2018-04-06T18:37:09.284840891Z" + assert legacy_layers[2].created == "2018-04-12T18:37:09.284840891Z" - assert legacy_layers[0].command == '"foo"' - assert legacy_layers[1].command == '"bar"' + assert legacy_layers[0].command == '["base"]' + assert legacy_layers[1].command == '["middle"]' + assert legacy_layers[2].command == '["leaf"]' + assert legacy_layers[2].parent_image_id == legacy_layers[1].image_id assert legacy_layers[1].parent_image_id == legacy_layers[0].image_id assert legacy_layers[0].parent_image_id is None + assert legacy_layers[1].image_id != legacy_layers[2] assert legacy_layers[0].image_id != legacy_layers[1] @@ -283,10 +307,11 @@ def test_remote_layer_manifest(): manifest = builder.build() assert manifest.has_remote_layer - assert manifest.leaf_layer_v1_image_id is None - assert manifest.legacy_image_ids is None + assert manifest.get_leaf_layer_v1_image_id(None) is None + assert manifest.get_legacy_image_ids(None) is None + assert not manifest.has_legacy_image - schema1 = manifest.get_v1_compatible_manifest('somenamespace', 'somename', 'sometag', None) + schema1 = manifest.get_schema1_manifest('somenamespace', 'somename', 'sometag', None) assert schema1 is None assert set(manifest.blob_digests) == {'sha256:adef', 'sha256:abcd', 'sha256:1352', 'sha256:1353'} diff --git a/workers/manifestbackfillworker.py b/workers/manifestbackfillworker.py index 28715bf21..bed90c6a6 100644 --- a/workers/manifestbackfillworker.py +++ b/workers/manifestbackfillworker.py @@ -51,12 +51,10 @@ class BrokenManifest(ManifestInterface): def layers(self): return [] - @property - def legacy_image_ids(self): + def get_legacy_image_ids(self, cr): return [] - @property - def leaf_layer_v1_image_id(self): + def get_leaf_layer_v1_image_id(self, cr): return None @property @@ -79,7 +77,7 @@ class BrokenManifest(ManifestInterface): def generate_legacy_layers(self, images_map, lookup_config_fn): return None - def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, lookup_fn): + def get_schema1_manifest(self, namespace_name, repo_name, tag_name, lookup_fn): return self @property @@ -94,6 +92,10 @@ class BrokenManifest(ManifestInterface): def is_manifest_list(self): return False + @property + def has_legacy_image(self): + return False + class ManifestBackfillWorker(Worker): def __init__(self):