Fix conversion of schema 2 manifests to schema 1 manifests

Also adds a number of conversion tests and clarify the interfaces a bit more
This commit is contained in:
Joseph Schorr 2018-11-21 17:23:52 +02:00
parent bd79eaa38f
commit c233760007
11 changed files with 457 additions and 183 deletions

View file

@ -30,7 +30,11 @@ DockerV2ManifestLayer = namedtuple('DockerV2ManifestLayer', ['index', 'digest',
'is_remote', 'urls',
'compressed_size'])
LayerWithV1ID = namedtuple('LayerWithV1ID', ['layer', 'v1_id', 'v1_parent_id', 'compressed_size'])
ManifestImageLayer = namedtuple('ManifestImageLayer', ['history', 'blob_layer', 'v1_id',
'v1_parent_id', 'compressed_size',
'blob_digest'])
EMPTY_BLOB_DIGEST = 'sha256:a3ed95caeb02ffe68cdd9fd84406680ae93d633cb16422d00e8a7c22955b46d4'
logger = logging.getLogger(__name__)
@ -174,12 +178,13 @@ class DockerSchema2Manifest(ManifestInterface):
return self._layers
@property
def layers_compressed_size(self):
return sum(layer.compressed_size for layer in self.layers)
def leaf_layer(self):
""" Returns the leaf layer for this manifest. """
return self.layers[-1]
@property
def leaf_layer(self):
return self.layers[-1]
def layers_compressed_size(self):
return sum(layer.compressed_size for layer in self.layers)
@property
def has_remote_layer(self):
@ -189,22 +194,6 @@ class DockerSchema2Manifest(ManifestInterface):
return False
@property
def leaf_layer_v1_image_id(self):
# NOTE: If there exists a layer with remote content, then we consider this manifest
# to not support legacy images.
if self.has_remote_layer:
return None
return list(self.layers_with_v1_ids)[-1].v1_id
@property
def legacy_image_ids(self):
if self.has_remote_layer:
return None
return [l.v1_id for l in self.layers_with_v1_ids]
@property
def blob_digests(self):
return [str(layer.digest) for layer in self.layers] + [str(self.config.digest)]
@ -217,6 +206,112 @@ class DockerSchema2Manifest(ManifestInterface):
def get_manifest_labels(self, content_retriever):
return self._get_built_config(content_retriever).labels
@property
def bytes(self):
return self._payload
def child_manifests(self, content_retriever):
return None
def _manifest_image_layers(self, content_retriever, schema2_config=None):
assert not self.has_remote_layer
# Retrieve the configuration for the manifest.
config = schema2_config or self._get_built_config(content_retriever)
history = list(config.history)
if len(history) < len(self.layers):
raise MalformedSchema2Manifest('Found less history than layer blobs')
digest_history = hashlib.sha256()
v1_layer_parent_id = None
v1_layer_id = None
blob_index = 0
for history_index, history_entry in enumerate(history):
if blob_index >= len(self.layers):
raise MalformedSchema2Manifest('Missing history entry #%s' % blob_index)
v1_layer_parent_id = v1_layer_id
blob_layer = None if history_entry.is_empty else self.layers[blob_index]
blob_digest = EMPTY_BLOB_DIGEST if blob_layer is None else str(blob_layer.digest)
compressed_size = 0 if blob_layer is None else blob_layer.compressed_size
# Create a new synthesized V1 ID for the history layer by hashing its content and
# the blob associated withn it.
digest_history.update(json.dumps(history_entry.raw_entry))
digest_history.update("|")
digest_history.update(str(history_index))
digest_history.update("|")
digest_history.update(blob_digest)
digest_history.update("||")
v1_layer_id = digest_history.hexdigest()
yield ManifestImageLayer(history=history_entry,
blob_layer=blob_layer,
blob_digest=blob_digest,
v1_id=v1_layer_id,
v1_parent_id=v1_layer_parent_id,
compressed_size=compressed_size)
if not history_entry.is_empty:
blob_index += 1
@property
def has_legacy_image(self):
return not self.has_remote_layer
def generate_legacy_layers(self, images_map, content_retriever):
assert not self.has_remote_layer
# NOTE: We use the DockerSchema1ManifestBuilder here because it already contains
# the logic for generating the DockerV1Metadata. All of this will go away once we get
# rid of legacy images in the database, so this is a temporary solution.
v1_builder = DockerSchema1ManifestBuilder('', '', '')
self._populate_schema1_builder(v1_builder, content_retriever)
return v1_builder.build().generate_legacy_layers(images_map, content_retriever)
def get_leaf_layer_v1_image_id(self, content_retriever):
# NOTE: If there exists a layer with remote content, then we consider this manifest
# to not support legacy images.
if self.has_remote_layer:
return None
return list(self._manifest_image_layers(content_retriever))[-1].v1_id
def get_legacy_image_ids(self, content_retriever):
if self.has_remote_layer:
return None
return [l.v1_id for l in self._manifest_image_layers(content_retriever)]
def get_schema1_manifest(self, namespace_name, repo_name, tag_name, content_retriever):
if self.has_remote_layer:
return None
v1_builder = DockerSchema1ManifestBuilder(namespace_name, repo_name, tag_name)
self._populate_schema1_builder(v1_builder, content_retriever)
return v1_builder.build()
def unsigned(self):
return self
def _populate_schema1_builder(self, v1_builder, content_retriever):
""" Populates a DockerSchema1ManifestBuilder with the layers and config from
this schema.
"""
assert not self.has_remote_layer
schema2_config = self._get_built_config(content_retriever)
layers = list(self._manifest_image_layers(content_retriever, schema2_config))
for index, layer in enumerate(reversed(layers)): # Schema 1 layers are in reverse order
v1_compatibility = schema2_config.build_v1_compatibility(layer.history,
layer.v1_id,
layer.v1_parent_id,
index == 0,
layer.compressed_size)
v1_builder.add_layer(str(layer.blob_digest), json.dumps(v1_compatibility))
return v1_builder
def _get_built_config(self, content_retriever):
config_bytes = content_retriever.get_blob_bytes_with_digest(self.config.digest)
if config_bytes is None:
@ -228,13 +323,6 @@ class DockerSchema2Manifest(ManifestInterface):
return DockerSchema2Config(config_bytes)
@property
def bytes(self):
return self._payload
def child_manifests(self, content_retriever):
return None
def _generate_layers(self):
for index, layer in enumerate(self._parsed[DOCKER_SCHEMA2_MANIFEST_LAYERS_KEY]):
content_type = layer[DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY]
@ -252,66 +340,6 @@ class DockerSchema2Manifest(ManifestInterface):
is_remote=is_remote,
urls=layer.get(DOCKER_SCHEMA2_MANIFEST_URLS_KEY))
@property
def layers_with_v1_ids(self):
assert not self.has_remote_layer
digest_history = hashlib.sha256()
v1_layer_parent_id = None
v1_layer_id = None
for layer in self.layers:
v1_layer_parent_id = v1_layer_id
# Create a new synthesized V1 ID for the layer by adding its digest and index to the
# existing digest history hash builder. This will ensure unique V1s across *all* schemas in
# a repository.
digest_history.update(str(layer.digest))
digest_history.update("#")
digest_history.update(str(layer.index))
digest_history.update("|")
v1_layer_id = digest_history.hexdigest()
yield LayerWithV1ID(layer=layer, v1_id=v1_layer_id, v1_parent_id=v1_layer_parent_id,
compressed_size=layer.compressed_size)
def populate_schema1_builder(self, v1_builder, content_retriever):
""" Populates a DockerSchema1ManifestBuilder with the layers and config from
this schema.
"""
assert not self.has_remote_layer
schema2_config = self._get_built_config(content_retriever)
# Build the V1 IDs for the layers.
layers = list(self.layers_with_v1_ids)
for layer_with_ids in reversed(layers): # Schema1 has layers in reverse order
v1_compatibility = schema2_config.build_v1_compatibility(layer_with_ids.layer.index,
layer_with_ids.v1_id,
layer_with_ids.v1_parent_id,
layer_with_ids.compressed_size)
v1_builder.add_layer(str(layer_with_ids.layer.digest), json.dumps(v1_compatibility))
return v1_builder
def generate_legacy_layers(self, images_map, content_retriever):
assert not self.has_remote_layer
# NOTE: We use the DockerSchema1ManifestBuilder here because it already contains
# the logic for generating the DockerV1Metadata. All of this will go away once we get
# rid of legacy images in the database, so this is a temporary solution.
v1_builder = DockerSchema1ManifestBuilder('', '', '')
self.populate_schema1_builder(v1_builder, content_retriever)
return v1_builder.build().generate_legacy_layers(images_map, content_retriever)
def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, content_retriever):
if self.has_remote_layer:
return None
v1_builder = DockerSchema1ManifestBuilder(namespace_name, repo_name, tag_name)
self.populate_schema1_builder(v1_builder, content_retriever)
return v1_builder.build()
def unsigned(self):
return self
class DockerSchema2ManifestBuilder(object):
"""