Fix conversion of schema 2 manifests to schema 1 manifests

Also adds a number of conversion tests and clarify the interfaces a bit more
This commit is contained in:
Joseph Schorr 2018-11-21 17:23:52 +02:00
parent bd79eaa38f
commit c233760007
11 changed files with 457 additions and 183 deletions

View file

@ -38,24 +38,24 @@ class ManifestInterface(object):
does not support layers. """ does not support layers. """
pass pass
@abstractproperty
def leaf_layer_v1_image_id(self):
""" Returns the Docker V1 image ID for the leaf (top) layer, if any, or None if
not applicable. """
pass
@abstractproperty
def legacy_image_ids(self):
""" Returns the Docker V1 image IDs for the layers of this manifest or None if not applicable.
"""
pass
@abstractproperty @abstractproperty
def layers_compressed_size(self): def layers_compressed_size(self):
""" Returns the total compressed size of all the layers in this manifest. Returns None if this """ Returns the total compressed size of all the layers in this manifest. Returns None if this
cannot be computed locally. cannot be computed locally.
""" """
@abstractmethod
def get_leaf_layer_v1_image_id(self, content_retriever):
""" Returns the Docker V1 image ID for the leaf (top) layer, if any, or None if
not applicable. """
pass
@abstractmethod
def get_legacy_image_ids(self, content_retriever):
""" Returns the Docker V1 image IDs for the layers of this manifest or None if not applicable.
"""
pass
@abstractproperty @abstractproperty
def blob_digests(self): def blob_digests(self):
""" Returns an iterator over all the blob digests referenced by this manifest, """ Returns an iterator over all the blob digests referenced by this manifest,
@ -86,10 +86,15 @@ class ManifestInterface(object):
def unsigned(self): def unsigned(self):
""" Returns an unsigned version of this manifest. """ """ Returns an unsigned version of this manifest. """
@abstractproperty
def has_legacy_image(self):
""" Returns True if this manifest has a legacy V1 image, or False if not. """
@abstractmethod @abstractmethod
def generate_legacy_layers(self, images_map, content_retriever): def generate_legacy_layers(self, images_map, content_retriever):
""" """
Rewrites Docker v1 image IDs and returns a generator of DockerV1Metadata. Rewrites Docker v1 image IDs and returns a generator of DockerV1Metadata, starting
at the base layer and working towards the leaf.
If Docker gives us a layer with a v1 image ID that already points to existing If Docker gives us a layer with a v1 image ID that already points to existing
content, but the checksums don't match, then we need to rewrite the image ID content, but the checksums don't match, then we need to rewrite the image ID
@ -99,15 +104,16 @@ class ManifestInterface(object):
""" """
@abstractmethod @abstractmethod
def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, content_retriever): def get_schema1_manifest(self, namespace_name, repo_name, tag_name, content_retriever):
""" Returns the manifest that is compatible with V1, by virtue of being `amd64` and `linux`. """ Returns a schema1 version of the manifest. If this is a mainfest list, should return the
manifest that is compatible with V1, by virtue of being `amd64` and `linux`.
If none, returns None. If none, returns None.
""" """
@add_metaclass(ABCMeta) @add_metaclass(ABCMeta)
class ContentRetriever(object): class ContentRetriever(object):
""" Defines the interface for retrieval of various content referneced by a manifest. """ """ Defines the interface for retrieval of various content referenced by a manifest. """
@abstractmethod @abstractmethod
def get_manifest_bytes_with_digest(self, digest): def get_manifest_bytes_with_digest(self, digest):
""" Returns the bytes of the manifest with the given digest or None if none found. """ """ Returns the bytes of the manifest with the given digest or None if none found. """

View file

@ -267,10 +267,6 @@ class DockerSchema1Manifest(ManifestInterface):
def image_ids(self): def image_ids(self):
return {mdata.v1_metadata.image_id for mdata in self.layers} return {mdata.v1_metadata.image_id for mdata in self.layers}
@property
def legacy_image_ids(self):
return {mdata.v1_metadata.image_id for mdata in self.layers}
@property @property
def parent_image_ids(self): def parent_image_ids(self):
return {mdata.v1_metadata.parent_image_id for mdata in self.layers return {mdata.v1_metadata.parent_image_id for mdata in self.layers
@ -280,10 +276,6 @@ class DockerSchema1Manifest(ManifestInterface):
def checksums(self): def checksums(self):
return list({str(mdata.digest) for mdata in self.layers}) return list({str(mdata.digest) for mdata in self.layers})
@property
def leaf_layer_v1_image_id(self):
return self.layers[-1].v1_metadata.image_id
@property @property
def leaf_layer(self): def leaf_layer(self):
return self.layers[-1] return self.layers[-1]
@ -377,7 +369,25 @@ class DockerSchema1Manifest(ManifestInterface):
def generate_legacy_layers(self, images_map, content_retriever): def generate_legacy_layers(self, images_map, content_retriever):
return self.rewrite_invalid_image_ids(images_map) return self.rewrite_invalid_image_ids(images_map)
def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, content_retriever): def get_legacy_image_ids(self, content_retriever):
return self.legacy_image_ids
@property
def legacy_image_ids(self):
return {mdata.v1_metadata.image_id for mdata in self.layers}
@property
def has_legacy_image(self):
return True
@property
def leaf_layer_v1_image_id(self):
return self.layers[-1].v1_metadata.image_id
def get_leaf_layer_v1_image_id(self, content_retriever):
return self.layers[-1].v1_metadata.image_id
def get_schema1_manifest(self, namespace_name, repo_name, tag_name, content_retriever):
""" Returns the manifest that is compatible with V1, by virtue of being `amd64` and `linux`. """ Returns the manifest that is compatible with V1, by virtue of being `amd64` and `linux`.
If none, returns None. If none, returns None.
""" """

View file

@ -94,6 +94,7 @@ Example:
import copy import copy
import json import json
import hashlib
from collections import namedtuple from collections import namedtuple
from jsonschema import validate as validate_schema, ValidationError from jsonschema import validate as validate_schema, ValidationError
@ -111,7 +112,8 @@ DOCKER_SCHEMA2_CONFIG_EMPTY_LAYER_KEY = "empty_layer"
DOCKER_SCHEMA2_CONFIG_TYPE_KEY = "type" DOCKER_SCHEMA2_CONFIG_TYPE_KEY = "type"
LayerHistory = namedtuple('LayerHistory', ['created', 'created_datetime', 'command', 'is_empty']) LayerHistory = namedtuple('LayerHistory', ['created', 'created_datetime', 'command', 'is_empty',
'raw_entry'])
class MalformedSchema2Config(ManifestException): class MalformedSchema2Config(ManifestException):
@ -211,30 +213,25 @@ class DockerSchema2Config(object):
yield LayerHistory(created_datetime=created_datetime, yield LayerHistory(created_datetime=created_datetime,
created=history_entry[DOCKER_SCHEMA2_CONFIG_CREATED_KEY], created=history_entry[DOCKER_SCHEMA2_CONFIG_CREATED_KEY],
command=history_entry[DOCKER_SCHEMA2_CONFIG_CREATED_BY_KEY], command=history_entry[DOCKER_SCHEMA2_CONFIG_CREATED_BY_KEY],
is_empty=history_entry.get(DOCKER_SCHEMA2_CONFIG_EMPTY_LAYER_KEY, False)) is_empty=history_entry.get(DOCKER_SCHEMA2_CONFIG_EMPTY_LAYER_KEY, False),
raw_entry=history_entry)
def build_v1_compatibility(self, layer_index, v1_id, v1_parent_id, compressed_size=None): def build_v1_compatibility(self, history, v1_id, v1_parent_id, is_leaf, compressed_size=None):
""" Builds the V1 compatibility block for the given layer. """ Builds the V1 compatibility block for the given layer.
Note that the layer_index is 0-indexed, with the *base* layer being 0, and the leaf
layer being last.
""" """
history = list(self.history)
assert layer_index < len(history)
# If the layer is the leaf, it gets the full config (minus 2 fields). Otherwise, it gets only # If the layer is the leaf, it gets the full config (minus 2 fields). Otherwise, it gets only
# IDs. # IDs.
v1_compatibility = copy.deepcopy(self._parsed) if layer_index == len(history) - 1 else {} v1_compatibility = copy.deepcopy(self._parsed) if is_leaf else {}
v1_compatibility['id'] = v1_id v1_compatibility['id'] = v1_id
if v1_parent_id is not None: if v1_parent_id is not None:
v1_compatibility['parent'] = v1_parent_id v1_compatibility['parent'] = v1_parent_id
if 'created' not in v1_compatibility: if 'created' not in v1_compatibility:
v1_compatibility['created'] = history[layer_index].created v1_compatibility['created'] = history.created
if 'container_config' not in v1_compatibility: if 'container_config' not in v1_compatibility:
v1_compatibility['container_config'] = { v1_compatibility['container_config'] = {
'Cmd': history[layer_index].command, 'Cmd': [history.command],
} }
if compressed_size is not None: if compressed_size is not None:

View file

@ -215,14 +215,6 @@ class DockerSchema2ManifestList(ManifestInterface):
def layers(self): def layers(self):
return None return None
@property
def leaf_layer_v1_image_id(self):
return None
@property
def legacy_image_ids(self):
return None
@property @property
def blob_digests(self): def blob_digests(self):
# Manifest lists have no blob digests, since everything is stored as a manifest. # Manifest lists have no blob digests, since everything is stored as a manifest.
@ -253,7 +245,17 @@ class DockerSchema2ManifestList(ManifestInterface):
def get_manifest_labels(self, content_retriever): def get_manifest_labels(self, content_retriever):
return None return None
def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, content_retriever): def get_leaf_layer_v1_image_id(self, content_retriever):
return None
def get_legacy_image_ids(self, content_retriever):
return None
@property
def has_legacy_image(self):
return False
def get_schema1_manifest(self, namespace_name, repo_name, tag_name, content_retriever):
""" Returns the manifest that is compatible with V1, by virtue of being `amd64` and `linux`. """ Returns the manifest that is compatible with V1, by virtue of being `amd64` and `linux`.
If none, returns None. If none, returns None.
""" """
@ -270,8 +272,7 @@ class DockerSchema2ManifestList(ManifestInterface):
logger.exception('Could not load child manifest') logger.exception('Could not load child manifest')
return None return None
return manifest.get_v1_compatible_manifest(namespace_name, repo_name, tag_name, return manifest.get_schema1_manifest(namespace_name, repo_name, tag_name, content_retriever)
content_retriever)
return None return None

View file

@ -30,7 +30,11 @@ DockerV2ManifestLayer = namedtuple('DockerV2ManifestLayer', ['index', 'digest',
'is_remote', 'urls', 'is_remote', 'urls',
'compressed_size']) 'compressed_size'])
LayerWithV1ID = namedtuple('LayerWithV1ID', ['layer', 'v1_id', 'v1_parent_id', 'compressed_size']) ManifestImageLayer = namedtuple('ManifestImageLayer', ['history', 'blob_layer', 'v1_id',
'v1_parent_id', 'compressed_size',
'blob_digest'])
EMPTY_BLOB_DIGEST = 'sha256:a3ed95caeb02ffe68cdd9fd84406680ae93d633cb16422d00e8a7c22955b46d4'
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -174,12 +178,13 @@ class DockerSchema2Manifest(ManifestInterface):
return self._layers return self._layers
@property @property
def layers_compressed_size(self): def leaf_layer(self):
return sum(layer.compressed_size for layer in self.layers) """ Returns the leaf layer for this manifest. """
return self.layers[-1]
@property @property
def leaf_layer(self): def layers_compressed_size(self):
return self.layers[-1] return sum(layer.compressed_size for layer in self.layers)
@property @property
def has_remote_layer(self): def has_remote_layer(self):
@ -189,22 +194,6 @@ class DockerSchema2Manifest(ManifestInterface):
return False return False
@property
def leaf_layer_v1_image_id(self):
# NOTE: If there exists a layer with remote content, then we consider this manifest
# to not support legacy images.
if self.has_remote_layer:
return None
return list(self.layers_with_v1_ids)[-1].v1_id
@property
def legacy_image_ids(self):
if self.has_remote_layer:
return None
return [l.v1_id for l in self.layers_with_v1_ids]
@property @property
def blob_digests(self): def blob_digests(self):
return [str(layer.digest) for layer in self.layers] + [str(self.config.digest)] return [str(layer.digest) for layer in self.layers] + [str(self.config.digest)]
@ -217,6 +206,112 @@ class DockerSchema2Manifest(ManifestInterface):
def get_manifest_labels(self, content_retriever): def get_manifest_labels(self, content_retriever):
return self._get_built_config(content_retriever).labels return self._get_built_config(content_retriever).labels
@property
def bytes(self):
return self._payload
def child_manifests(self, content_retriever):
return None
def _manifest_image_layers(self, content_retriever, schema2_config=None):
assert not self.has_remote_layer
# Retrieve the configuration for the manifest.
config = schema2_config or self._get_built_config(content_retriever)
history = list(config.history)
if len(history) < len(self.layers):
raise MalformedSchema2Manifest('Found less history than layer blobs')
digest_history = hashlib.sha256()
v1_layer_parent_id = None
v1_layer_id = None
blob_index = 0
for history_index, history_entry in enumerate(history):
if blob_index >= len(self.layers):
raise MalformedSchema2Manifest('Missing history entry #%s' % blob_index)
v1_layer_parent_id = v1_layer_id
blob_layer = None if history_entry.is_empty else self.layers[blob_index]
blob_digest = EMPTY_BLOB_DIGEST if blob_layer is None else str(blob_layer.digest)
compressed_size = 0 if blob_layer is None else blob_layer.compressed_size
# Create a new synthesized V1 ID for the history layer by hashing its content and
# the blob associated withn it.
digest_history.update(json.dumps(history_entry.raw_entry))
digest_history.update("|")
digest_history.update(str(history_index))
digest_history.update("|")
digest_history.update(blob_digest)
digest_history.update("||")
v1_layer_id = digest_history.hexdigest()
yield ManifestImageLayer(history=history_entry,
blob_layer=blob_layer,
blob_digest=blob_digest,
v1_id=v1_layer_id,
v1_parent_id=v1_layer_parent_id,
compressed_size=compressed_size)
if not history_entry.is_empty:
blob_index += 1
@property
def has_legacy_image(self):
return not self.has_remote_layer
def generate_legacy_layers(self, images_map, content_retriever):
assert not self.has_remote_layer
# NOTE: We use the DockerSchema1ManifestBuilder here because it already contains
# the logic for generating the DockerV1Metadata. All of this will go away once we get
# rid of legacy images in the database, so this is a temporary solution.
v1_builder = DockerSchema1ManifestBuilder('', '', '')
self._populate_schema1_builder(v1_builder, content_retriever)
return v1_builder.build().generate_legacy_layers(images_map, content_retriever)
def get_leaf_layer_v1_image_id(self, content_retriever):
# NOTE: If there exists a layer with remote content, then we consider this manifest
# to not support legacy images.
if self.has_remote_layer:
return None
return list(self._manifest_image_layers(content_retriever))[-1].v1_id
def get_legacy_image_ids(self, content_retriever):
if self.has_remote_layer:
return None
return [l.v1_id for l in self._manifest_image_layers(content_retriever)]
def get_schema1_manifest(self, namespace_name, repo_name, tag_name, content_retriever):
if self.has_remote_layer:
return None
v1_builder = DockerSchema1ManifestBuilder(namespace_name, repo_name, tag_name)
self._populate_schema1_builder(v1_builder, content_retriever)
return v1_builder.build()
def unsigned(self):
return self
def _populate_schema1_builder(self, v1_builder, content_retriever):
""" Populates a DockerSchema1ManifestBuilder with the layers and config from
this schema.
"""
assert not self.has_remote_layer
schema2_config = self._get_built_config(content_retriever)
layers = list(self._manifest_image_layers(content_retriever, schema2_config))
for index, layer in enumerate(reversed(layers)): # Schema 1 layers are in reverse order
v1_compatibility = schema2_config.build_v1_compatibility(layer.history,
layer.v1_id,
layer.v1_parent_id,
index == 0,
layer.compressed_size)
v1_builder.add_layer(str(layer.blob_digest), json.dumps(v1_compatibility))
return v1_builder
def _get_built_config(self, content_retriever): def _get_built_config(self, content_retriever):
config_bytes = content_retriever.get_blob_bytes_with_digest(self.config.digest) config_bytes = content_retriever.get_blob_bytes_with_digest(self.config.digest)
if config_bytes is None: if config_bytes is None:
@ -228,13 +323,6 @@ class DockerSchema2Manifest(ManifestInterface):
return DockerSchema2Config(config_bytes) return DockerSchema2Config(config_bytes)
@property
def bytes(self):
return self._payload
def child_manifests(self, content_retriever):
return None
def _generate_layers(self): def _generate_layers(self):
for index, layer in enumerate(self._parsed[DOCKER_SCHEMA2_MANIFEST_LAYERS_KEY]): for index, layer in enumerate(self._parsed[DOCKER_SCHEMA2_MANIFEST_LAYERS_KEY]):
content_type = layer[DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY] content_type = layer[DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY]
@ -252,66 +340,6 @@ class DockerSchema2Manifest(ManifestInterface):
is_remote=is_remote, is_remote=is_remote,
urls=layer.get(DOCKER_SCHEMA2_MANIFEST_URLS_KEY)) urls=layer.get(DOCKER_SCHEMA2_MANIFEST_URLS_KEY))
@property
def layers_with_v1_ids(self):
assert not self.has_remote_layer
digest_history = hashlib.sha256()
v1_layer_parent_id = None
v1_layer_id = None
for layer in self.layers:
v1_layer_parent_id = v1_layer_id
# Create a new synthesized V1 ID for the layer by adding its digest and index to the
# existing digest history hash builder. This will ensure unique V1s across *all* schemas in
# a repository.
digest_history.update(str(layer.digest))
digest_history.update("#")
digest_history.update(str(layer.index))
digest_history.update("|")
v1_layer_id = digest_history.hexdigest()
yield LayerWithV1ID(layer=layer, v1_id=v1_layer_id, v1_parent_id=v1_layer_parent_id,
compressed_size=layer.compressed_size)
def populate_schema1_builder(self, v1_builder, content_retriever):
""" Populates a DockerSchema1ManifestBuilder with the layers and config from
this schema.
"""
assert not self.has_remote_layer
schema2_config = self._get_built_config(content_retriever)
# Build the V1 IDs for the layers.
layers = list(self.layers_with_v1_ids)
for layer_with_ids in reversed(layers): # Schema1 has layers in reverse order
v1_compatibility = schema2_config.build_v1_compatibility(layer_with_ids.layer.index,
layer_with_ids.v1_id,
layer_with_ids.v1_parent_id,
layer_with_ids.compressed_size)
v1_builder.add_layer(str(layer_with_ids.layer.digest), json.dumps(v1_compatibility))
return v1_builder
def generate_legacy_layers(self, images_map, content_retriever):
assert not self.has_remote_layer
# NOTE: We use the DockerSchema1ManifestBuilder here because it already contains
# the logic for generating the DockerV1Metadata. All of this will go away once we get
# rid of legacy images in the database, so this is a temporary solution.
v1_builder = DockerSchema1ManifestBuilder('', '', '')
self.populate_schema1_builder(v1_builder, content_retriever)
return v1_builder.build().generate_legacy_layers(images_map, content_retriever)
def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, content_retriever):
if self.has_remote_layer:
return None
v1_builder = DockerSchema1ManifestBuilder(namespace_name, repo_name, tag_name)
self.populate_schema1_builder(v1_builder, content_retriever)
return v1_builder.build()
def unsigned(self):
return self
class DockerSchema2ManifestBuilder(object): class DockerSchema2ManifestBuilder(object):
""" """

View file

@ -0,0 +1,142 @@
SCHEMA1_BYTES = r"""{
"schemaVersion": 1,
"name": "devtable/somerepo",
"tag": "latest",
"architecture": "amd64",
"fsLayers": [
{
"blobSum": "sha256:28b98663b93a1c984379691300f284ee1536db1b6ecd8a1d59222528f80cee89"
},
{
"blobSum": "sha256:a3ed95caeb02ffe68cdd9fd84406680ae93d633cb16422d00e8a7c22955b46d4"
},
{
"blobSum": "sha256:90e01955edcd85dac7985b72a8374545eac617ccdddcc992b732e43cd42534af"
}
],
"history": [
{
"v1Compatibility": "{\"architecture\":\"amd64\",\"config\":{\"Hostname\":\"\",\"Domainname\":\"\",\"User\":\"\",\"AttachStdin\":false,\"AttachStdout\":false,\"AttachStderr\":false,\"Tty\":false,\"OpenStdin\":false,\"StdinOnce\":false,\"Env\":[\"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\"],\"Cmd\":[\"sh\"],\"Image\":\"\",\"Volumes\":null,\"WorkingDir\":\"\",\"Entrypoint\":null,\"OnBuild\":null,\"Labels\":{}},\"container\":\"86fff20ea922659929a4716850cc9b3a2cca6c197f7a7ece7da5b6d9d8ac4954\",\"container_config\":{\"Hostname\":\"86fff20ea922\",\"Domainname\":\"\",\"User\":\"\",\"AttachStdin\":true,\"AttachStdout\":true,\"AttachStderr\":true,\"Tty\":true,\"OpenStdin\":true,\"StdinOnce\":true,\"Env\":[\"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\"],\"Cmd\":[\"sh\"],\"Image\":\"busybox\",\"Volumes\":null,\"WorkingDir\":\"\",\"Entrypoint\":null,\"OnBuild\":null,\"Labels\":{}},\"created\":\"2018-11-20T21:15:01.569237Z\",\"docker_version\":\"17.09.0-ce\",\"id\":\"692854afd8718d5285bf99cecfc9d6385f41122d3cea70fc9961b3f23ae0d768\",\"os\":\"linux\",\"parent\":\"61b2663f44edc9a6af340b9bfd46d17d8ed2574ffe289e0d95c0476da3c6faac\"}"
},
{
"v1Compatibility": "{\"id\":\"61b2663f44edc9a6af340b9bfd46d17d8ed2574ffe289e0d95c0476da3c6faac\",\"parent\":\"5327db1e651c0f49157ace3ffd8569c7361b1f2e61d0b49ff617e83a42bf78d6\",\"created\":\"2018-10-02T17:19:34.239926273Z\",\"container_config\":{\"Cmd\":[\"/bin/sh -c #(nop) CMD [\\\"sh\\\"]\"]},\"throwaway\":true}"
},
{
"v1Compatibility": "{\"id\":\"5327db1e651c0f49157ace3ffd8569c7361b1f2e61d0b49ff617e83a42bf78d6\",\"created\":\"2018-10-02T17:19:34.03981888Z\",\"container_config\":{\"Cmd\":[\"/bin/sh -c #(nop) ADD file:63eebd629a5f7558c361be0305df5f16baac1d3bbec014b7c486e28812441969 in / \"]}}"
}
],
"signatures": [
{
"header": {
"jwk": {
"crv": "P-256",
"kid": "AARA:PFUD:3V54:7F2S:2P7E:WMCU:WRE7:KUYD:CFKH:UHZ7:AZ4I:UQEX",
"kty": "EC",
"x": "34N4h_uM7FedPw4k3_VabKlt7qoBWpHgpko7zE0RkeY",
"y": "LhxxtCYh_b1EwUbl3-tQFTbg1mTu34vMxj4UaKjWZk8"
},
"alg": "ES256"
},
"signature": "4-nlo2R9Dn3PIGHuhvPkamCzLgFYURziihwZYAnmw5eMKLRj4ir-VeEJI30mDh8ArTeDo-PnMLRNZGRX2NwXHw",
"protected": "eyJmb3JtYXRMZW5ndGgiOjIzNDEsImZvcm1hdFRhaWwiOiJDbjAiLCJ0aW1lIjoiMjAxOC0xMS0yMFQyMToxNzozMVoifQ"
}
]
}"""
SCHEMA2_MANIFEST_BYTES = r"""{
"schemaVersion": 2,
"mediaType": "application/vnd.docker.distribution.manifest.v2+json",
"config": {
"mediaType": "application/vnd.docker.container.image.v1+json",
"size": 1829,
"digest": "sha256:e7a06c2e5b7afb1bbfa9124812e87f1138c4c10d77e0a217f0b8c8c9694dc5cf"
},
"layers": [
{
"mediaType": "application/vnd.docker.image.rootfs.diff.tar.gzip",
"size": 727978,
"digest": "sha256:90e01955edcd85dac7985b72a8374545eac617ccdddcc992b732e43cd42534af"
},
{
"mediaType": "application/vnd.docker.image.rootfs.diff.tar.gzip",
"size": 190,
"digest": "sha256:28b98663b93a1c984379691300f284ee1536db1b6ecd8a1d59222528f80cee89"
}
]
}"""
SCHEMA2_CONFIG_BYTES = r"""{
"architecture": "amd64",
"config": {
"Hostname": "",
"Domainname": "",
"User": "",
"AttachStdin": false,
"AttachStdout": false,
"AttachStderr": false,
"Tty": false,
"OpenStdin": false,
"StdinOnce": false,
"Env": [
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
],
"Cmd": [
"sh"
],
"Image": "",
"Volumes": null,
"WorkingDir": "",
"Entrypoint": null,
"OnBuild": null,
"Labels": {}
},
"container": "86fff20ea922659929a4716850cc9b3a2cca6c197f7a7ece7da5b6d9d8ac4954",
"container_config": {
"Hostname": "86fff20ea922",
"Domainname": "",
"User": "",
"AttachStdin": true,
"AttachStdout": true,
"AttachStderr": true,
"Tty": true,
"OpenStdin": true,
"StdinOnce": true,
"Env": [
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
],
"Cmd": [
"sh"
],
"Image": "busybox",
"Volumes": null,
"WorkingDir": "",
"Entrypoint": null,
"OnBuild": null,
"Labels": {}
},
"created": "2018-11-20T21:15:01.569237Z",
"docker_version": "17.09.0-ce",
"history": [
{
"created": "2018-10-02T17:19:34.03981888Z",
"created_by": "/bin/sh -c #(nop) ADD file:63eebd629a5f7558c361be0305df5f16baac1d3bbec014b7c486e28812441969 in / "
},
{
"created": "2018-10-02T17:19:34.239926273Z",
"created_by": "/bin/sh -c #(nop) CMD [\"sh\"]",
"empty_layer": true
},
{
"created": "2018-11-20T21:15:01.569237Z",
"created_by": "sh"
}
],
"os": "linux",
"rootfs": {
"type": "layers",
"diff_ids": [
"sha256:8a788232037eaf17794408ff3df6b922a1aedf9ef8de36afdae3ed0b0381907b",
"sha256:70d967d052ce14cd372b12663d84046ade5712c3a4ece6078cdb63e75bbfcfa1"
]
}
}"""

View file

@ -118,7 +118,8 @@ def test_valid_config():
assert history[2].command == 'sh' assert history[2].command == 'sh'
for index, history_entry in enumerate(history): for index, history_entry in enumerate(history):
v1_compat = config.build_v1_compatibility(index, 'somev1id', 'someparentid') v1_compat = config.build_v1_compatibility(history_entry, 'somev1id', 'someparentid',
index == 3)
assert v1_compat['id'] == 'somev1id' assert v1_compat['id'] == 'somev1id'
assert v1_compat['parent'] == 'someparentid' assert v1_compat['parent'] == 'someparentid'
@ -126,6 +127,6 @@ def test_valid_config():
assert v1_compat['container_config'] == config._parsed['container_config'] assert v1_compat['container_config'] == config._parsed['container_config']
else: else:
assert 'Hostname' not in v1_compat['container_config'] assert 'Hostname' not in v1_compat['container_config']
assert v1_compat['container_config']['Cmd'] == history_entry.command assert v1_compat['container_config']['Cmd'] == [history_entry.command]
assert config.labels == {} assert config.labels == {}

View file

@ -0,0 +1,65 @@
import json
from image.docker.schema1 import DockerSchema1Manifest
from image.docker.schema2.manifest import DockerSchema2Manifest
from image.docker.schema2.test.conversion_data import (SCHEMA1_BYTES, SCHEMA2_MANIFEST_BYTES,
SCHEMA2_CONFIG_BYTES)
from image.docker.schemautil import ContentRetrieverForTesting
def test_legacy_layers():
retriever = ContentRetrieverForTesting({
'sha256:e7a06c2e5b7afb1bbfa9124812e87f1138c4c10d77e0a217f0b8c8c9694dc5cf': SCHEMA2_CONFIG_BYTES,
})
schema2 = DockerSchema2Manifest(SCHEMA2_MANIFEST_BYTES)
schema1 = DockerSchema1Manifest(SCHEMA1_BYTES, validate=False)
# Check legacy layers
schema2_legacy_layers = list(schema2.generate_legacy_layers({}, retriever))
schema1_legacy_layers = list(schema1.generate_legacy_layers({}, retriever))
assert len(schema1_legacy_layers) == len(schema2_legacy_layers)
for index in range(0, len(schema1_legacy_layers)):
schema1_legacy_layer = schema1_legacy_layers[index]
schema2_legacy_layer = schema2_legacy_layers[index]
assert schema1_legacy_layer.content_checksum == schema2_legacy_layer.content_checksum
assert schema1_legacy_layer.comment == schema2_legacy_layer.comment
assert schema1_legacy_layer.command == schema2_legacy_layer.command
def test_conversion():
retriever = ContentRetrieverForTesting({
'sha256:e7a06c2e5b7afb1bbfa9124812e87f1138c4c10d77e0a217f0b8c8c9694dc5cf': SCHEMA2_CONFIG_BYTES,
})
schema2 = DockerSchema2Manifest(SCHEMA2_MANIFEST_BYTES)
schema1 = DockerSchema1Manifest(SCHEMA1_BYTES, validate=False)
converted = schema2.get_schema1_manifest('devtable', 'somerepo', 'latest', retriever)
assert len(converted.layers) == len(schema1.layers)
image_id_map = {}
for index in range(0, len(converted.layers)):
converted_layer = converted.layers[index]
schema1_layer = schema1.layers[index]
image_id_map[schema1_layer.v1_metadata.image_id] = converted_layer.v1_metadata.image_id
assert str(schema1_layer.digest) == str(converted_layer.digest)
schema1_parent_id = schema1_layer.v1_metadata.parent_image_id
converted_parent_id = converted_layer.v1_metadata.parent_image_id
assert (schema1_parent_id is None) == (converted_parent_id is None)
if schema1_parent_id is not None:
assert image_id_map[schema1_parent_id] == converted_parent_id
assert schema1_layer.v1_metadata.created == converted_layer.v1_metadata.created
assert schema1_layer.v1_metadata.comment == converted_layer.v1_metadata.comment
assert schema1_layer.v1_metadata.command == converted_layer.v1_metadata.command
assert schema1_layer.v1_metadata.labels == converted_layer.v1_metadata.labels
schema1_container_config = json.loads(schema1_layer.raw_v1_metadata)['container_config']
converted_container_config = json.loads(converted_layer.raw_v1_metadata)['container_config']
assert schema1_container_config == converted_container_config

View file

@ -79,6 +79,7 @@ def test_valid_manifestlist():
assert manifestlist.media_type == 'application/vnd.docker.distribution.manifest.list.v2+json' assert manifestlist.media_type == 'application/vnd.docker.distribution.manifest.list.v2+json'
assert manifestlist.bytes == MANIFESTLIST_BYTES assert manifestlist.bytes == MANIFESTLIST_BYTES
assert manifestlist.manifest_dict == json.loads(MANIFESTLIST_BYTES) assert manifestlist.manifest_dict == json.loads(MANIFESTLIST_BYTES)
assert manifestlist.layers is None
assert not manifestlist.blob_digests assert not manifestlist.blob_digests
for index, manifest in enumerate(manifestlist.manifests(retriever)): for index, manifest in enumerate(manifestlist.manifests(retriever)):
@ -89,22 +90,18 @@ def test_valid_manifestlist():
assert isinstance(manifest.manifest_obj, DockerSchema1Manifest) assert isinstance(manifest.manifest_obj, DockerSchema1Manifest)
assert manifest.manifest_obj.schema_version == 1 assert manifest.manifest_obj.schema_version == 1
compatible_manifest = manifestlist.get_v1_compatible_manifest('foo', 'bar', 'baz', retriever) compatible_manifest = manifestlist.get_schema1_manifest('foo', 'bar', 'baz', retriever)
assert compatible_manifest.schema_version == 1 assert compatible_manifest.schema_version == 1
assert manifestlist.layers is None
assert manifestlist.leaf_layer_v1_image_id is None
assert manifestlist.legacy_image_ids is None
def test_get_schema1_manifest_no_matching_list():
def test_get_v1_compatible_manifest_no_matching_list():
manifestlist = DockerSchema2ManifestList(NO_AMD_MANIFESTLIST_BYTES) manifestlist = DockerSchema2ManifestList(NO_AMD_MANIFESTLIST_BYTES)
assert len(manifestlist.manifests(retriever)) == 1 assert len(manifestlist.manifests(retriever)) == 1
assert manifestlist.media_type == 'application/vnd.docker.distribution.manifest.list.v2+json' assert manifestlist.media_type == 'application/vnd.docker.distribution.manifest.list.v2+json'
assert manifestlist.bytes == NO_AMD_MANIFESTLIST_BYTES assert manifestlist.bytes == NO_AMD_MANIFESTLIST_BYTES
compatible_manifest = manifestlist.get_v1_compatible_manifest('foo', 'bar', 'baz', retriever) compatible_manifest = manifestlist.get_schema1_manifest('foo', 'bar', 'baz', retriever)
assert compatible_manifest is None assert compatible_manifest is None

View file

@ -6,7 +6,7 @@ from image.docker.schema1 import (DockerSchema1ManifestBuilder,
DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE, DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE,
DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE) DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE)
from image.docker.schema2.manifest import (MalformedSchema2Manifest, DockerSchema2Manifest, from image.docker.schema2.manifest import (MalformedSchema2Manifest, DockerSchema2Manifest,
DockerSchema2ManifestBuilder) DockerSchema2ManifestBuilder, EMPTY_BLOB_DIGEST)
from image.docker.schema2.test.test_config import CONFIG_BYTES from image.docker.schema2.test.test_config import CONFIG_BYTES
from image.docker.schemautil import ContentRetrieverForTesting from image.docker.schemautil import ContentRetrieverForTesting
@ -96,6 +96,7 @@ def test_valid_manifest():
assert str(manifest.config.digest) == 'sha256:b5b2b2c507a0944348e0303114d8d93aaaa081732b86451d9bce1f432a537bc7' assert str(manifest.config.digest) == 'sha256:b5b2b2c507a0944348e0303114d8d93aaaa081732b86451d9bce1f432a537bc7'
assert manifest.media_type == "application/vnd.docker.distribution.manifest.v2+json" assert manifest.media_type == "application/vnd.docker.distribution.manifest.v2+json"
assert not manifest.has_remote_layer assert not manifest.has_remote_layer
assert manifest.has_legacy_image
assert len(manifest.layers) == 4 assert len(manifest.layers) == 4
assert manifest.layers[0].compressed_size == 1234 assert manifest.layers[0].compressed_size == 1234
@ -111,6 +112,36 @@ def test_valid_manifest():
assert blob_digests == expected assert blob_digests == expected
assert list(manifest.local_blob_digests) == expected assert list(manifest.local_blob_digests) == expected
retriever = ContentRetrieverForTesting.for_config({
"config": {
"Labels": {},
},
"rootfs": {"type": "layers", "diff_ids": []},
"history": [
{
"created": "2018-04-03T18:37:09.284840891Z",
"created_by": "foo"
},
{
"created": "2018-04-12T18:37:09.284840891Z",
"created_by": "bar"
},
{
"created": "2018-04-03T18:37:09.284840891Z",
"created_by": "foo"
},
{
"created": "2018-04-12T18:37:09.284840891Z",
"created_by": "bar"
},
],
}, 'sha256:b5b2b2c507a0944348e0303114d8d93aaaa081732b86451d9bce1f432a537bc7', 1885)
manifest_image_layers = list(manifest._manifest_image_layers(retriever))
assert len(manifest_image_layers) == len(list(manifest.layers))
for index in range(0, 4):
assert manifest_image_layers[index].blob_digest == str(manifest.layers[index].digest)
def test_valid_remote_manifest(): def test_valid_remote_manifest():
manifest = DockerSchema2Manifest(REMOTE_MANIFEST_BYTES) manifest = DockerSchema2Manifest(REMOTE_MANIFEST_BYTES)
@ -138,8 +169,8 @@ def test_valid_remote_manifest():
assert local_digests == (expected - {manifest.layers[0].digest}) assert local_digests == (expected - {manifest.layers[0].digest})
assert manifest.has_remote_layer assert manifest.has_remote_layer
assert manifest.leaf_layer_v1_image_id is None assert manifest.get_leaf_layer_v1_image_id(None) is None
assert manifest.legacy_image_ids is None assert manifest.get_legacy_image_ids(None) is None
def test_schema2_builder(): def test_schema2_builder():
@ -179,26 +210,13 @@ def test_build_schema1():
}) })
builder = DockerSchema1ManifestBuilder('somenamespace', 'somename', 'sometag') builder = DockerSchema1ManifestBuilder('somenamespace', 'somename', 'sometag')
manifest.populate_schema1_builder(builder, retriever) manifest._populate_schema1_builder(builder, retriever)
schema1 = builder.build(docker_v2_signing_key) schema1 = builder.build(docker_v2_signing_key)
assert schema1.media_type == DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE assert schema1.media_type == DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE
assert len(schema1.layers) == len(manifest.layers)
assert set(schema1.image_ids) == set([l.v1_id for l in manifest.layers_with_v1_ids])
assert set(schema1.parent_image_ids) == set([l.v1_parent_id for l in
manifest.layers_with_v1_ids if l.v1_parent_id])
manifest_layers = list(manifest.layers_with_v1_ids)
for index, layer in enumerate(schema1.layers):
assert layer.digest == manifest_layers[index].layer.digest
assert layer.v1_metadata.image_id == manifest_layers[index].v1_id
assert layer.v1_metadata.parent_image_id == manifest_layers[index].v1_parent_id
for index, digest in enumerate(schema1.blob_digests):
assert digest == str(list(manifest.blob_digests)[index])
def test_get_v1_compatible_manifest(): def test_get_schema1_manifest():
retriever = ContentRetrieverForTesting.for_config({ retriever = ContentRetrieverForTesting.for_config({
"config": { "config": {
"Labels": {}, "Labels": {},
@ -225,13 +243,9 @@ def test_get_v1_compatible_manifest():
}, 'sha256:b5b2b2c507a0944348e0303114d8d93aaaa081732b86451d9bce1f432a537bc7', 1885) }, 'sha256:b5b2b2c507a0944348e0303114d8d93aaaa081732b86451d9bce1f432a537bc7', 1885)
manifest = DockerSchema2Manifest(MANIFEST_BYTES) manifest = DockerSchema2Manifest(MANIFEST_BYTES)
schema1 = manifest.get_v1_compatible_manifest('somenamespace', 'somename', 'sometag', retriever) schema1 = manifest.get_schema1_manifest('somenamespace', 'somename', 'sometag', retriever)
assert schema1 is not None assert schema1 is not None
assert schema1.media_type == DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE assert schema1.media_type == DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE
assert len(schema1.layers) == len(manifest.layers)
assert set(schema1.image_ids) == set([l.v1_id for l in manifest.layers_with_v1_ids])
assert set(schema1.parent_image_ids) == set([l.v1_parent_id for l in
manifest.layers_with_v1_ids if l.v1_parent_id])
def test_generate_legacy_layers(): def test_generate_legacy_layers():
@ -248,29 +262,39 @@ def test_generate_legacy_layers():
"history": [ "history": [
{ {
"created": "2018-04-03T18:37:09.284840891Z", "created": "2018-04-03T18:37:09.284840891Z",
"created_by": "foo" "created_by": "base"
},
{
"created": "2018-04-06T18:37:09.284840891Z",
"created_by": "middle",
"empty_layer": True,
}, },
{ {
"created": "2018-04-12T18:37:09.284840891Z", "created": "2018-04-12T18:37:09.284840891Z",
"created_by": "bar" "created_by": "leaf"
}, },
], ],
}, 'sha256:def456', 2000) }, 'sha256:def456', 2000)
legacy_layers = list(manifest.generate_legacy_layers({}, retriever)) legacy_layers = list(manifest.generate_legacy_layers({}, retriever))
assert len(legacy_layers) == 2 assert len(legacy_layers) == 3
assert legacy_layers[0].content_checksum == 'sha256:abc123' assert legacy_layers[0].content_checksum == 'sha256:abc123'
assert legacy_layers[1].content_checksum == 'sha256:def456' assert legacy_layers[1].content_checksum == EMPTY_BLOB_DIGEST
assert legacy_layers[2].content_checksum == 'sha256:def456'
assert legacy_layers[0].created == "2018-04-03T18:37:09.284840891Z" assert legacy_layers[0].created == "2018-04-03T18:37:09.284840891Z"
assert legacy_layers[1].created == "2018-04-12T18:37:09.284840891Z" assert legacy_layers[1].created == "2018-04-06T18:37:09.284840891Z"
assert legacy_layers[2].created == "2018-04-12T18:37:09.284840891Z"
assert legacy_layers[0].command == '"foo"' assert legacy_layers[0].command == '["base"]'
assert legacy_layers[1].command == '"bar"' assert legacy_layers[1].command == '["middle"]'
assert legacy_layers[2].command == '["leaf"]'
assert legacy_layers[2].parent_image_id == legacy_layers[1].image_id
assert legacy_layers[1].parent_image_id == legacy_layers[0].image_id assert legacy_layers[1].parent_image_id == legacy_layers[0].image_id
assert legacy_layers[0].parent_image_id is None assert legacy_layers[0].parent_image_id is None
assert legacy_layers[1].image_id != legacy_layers[2]
assert legacy_layers[0].image_id != legacy_layers[1] assert legacy_layers[0].image_id != legacy_layers[1]
@ -283,10 +307,11 @@ def test_remote_layer_manifest():
manifest = builder.build() manifest = builder.build()
assert manifest.has_remote_layer assert manifest.has_remote_layer
assert manifest.leaf_layer_v1_image_id is None assert manifest.get_leaf_layer_v1_image_id(None) is None
assert manifest.legacy_image_ids is None assert manifest.get_legacy_image_ids(None) is None
assert not manifest.has_legacy_image
schema1 = manifest.get_v1_compatible_manifest('somenamespace', 'somename', 'sometag', None) schema1 = manifest.get_schema1_manifest('somenamespace', 'somename', 'sometag', None)
assert schema1 is None assert schema1 is None
assert set(manifest.blob_digests) == {'sha256:adef', 'sha256:abcd', 'sha256:1352', 'sha256:1353'} assert set(manifest.blob_digests) == {'sha256:adef', 'sha256:abcd', 'sha256:1352', 'sha256:1353'}

View file

@ -51,12 +51,10 @@ class BrokenManifest(ManifestInterface):
def layers(self): def layers(self):
return [] return []
@property def get_legacy_image_ids(self, cr):
def legacy_image_ids(self):
return [] return []
@property def get_leaf_layer_v1_image_id(self, cr):
def leaf_layer_v1_image_id(self):
return None return None
@property @property
@ -79,7 +77,7 @@ class BrokenManifest(ManifestInterface):
def generate_legacy_layers(self, images_map, lookup_config_fn): def generate_legacy_layers(self, images_map, lookup_config_fn):
return None return None
def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, lookup_fn): def get_schema1_manifest(self, namespace_name, repo_name, tag_name, lookup_fn):
return self return self
@property @property
@ -94,6 +92,10 @@ class BrokenManifest(ManifestInterface):
def is_manifest_list(self): def is_manifest_list(self):
return False return False
@property
def has_legacy_image(self):
return False
class ManifestBackfillWorker(Worker): class ManifestBackfillWorker(Worker):
def __init__(self): def __init__(self):