From bacf0742192cbac41dfbd9f786907400d2c5264f Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Wed, 20 Feb 2019 12:08:20 -0500 Subject: [PATCH] Fix backfills of super large manifests by stripping metadata from all but the final layer This is semantically valid because Docker only uses the leaf layer as the image config when reading a V2_1 manifest Fixes https://jira.coreos.com/browse/QUAY-1351 --- data/registry_model/registry_pre_oci_model.py | 2 - data/registry_model/shared.py | 16 +++- image/docker/schema1.py | 63 ++++++++++-- image/docker/test/test_schema1.py | 96 +++++++++++++++++++ 4 files changed, 166 insertions(+), 11 deletions(-) diff --git a/data/registry_model/registry_pre_oci_model.py b/data/registry_model/registry_pre_oci_model.py index ce40f12ea..b73085efa 100644 --- a/data/registry_model/registry_pre_oci_model.py +++ b/data/registry_model/registry_pre_oci_model.py @@ -473,8 +473,6 @@ class PreOCIModel(SharedModel, RegistryDataInterface): assert not tag_obj.hidden repo = tag_obj.repository - namespace_name = repo.namespace_user.username - repo_name = repo.name # Write the manifest to the DB. manifest = self._build_manifest_for_legacy_image(tag_obj.name, tag_obj.image) diff --git a/data/registry_model/shared.py b/data/registry_model/shared.py index 586adcd96..561f73cd4 100644 --- a/data/registry_model/shared.py +++ b/data/registry_model/shared.py @@ -17,6 +17,8 @@ from image.docker.schema2 import EMPTY_LAYER_BLOB_DIGEST logger = logging.getLogger(__name__) +# The maximum size for generated manifest after which we remove extra metadata. +MAXIMUM_GENERATED_MANIFEST_SIZE = 3 * 1024 * 1024 # 3 MB class SharedModel: """ @@ -445,9 +447,19 @@ class SharedModel: builder.add_layer(parent_image.storage.content_checksum, parent_image.v1_json_metadata) - # Sign the manifest with our signing key. try: - return builder.build(docker_v2_signing_key) + built_manifest = builder.build(docker_v2_signing_key) + + # If the generated manifest is greater than the maximum size, regenerate it with + # intermediate metadata layers stripped down to their bare essentials. + if len(built_manifest.bytes.as_encoded_str()) > MAXIMUM_GENERATED_MANIFEST_SIZE: + built_manifest = builder.with_metadata_removed().build(docker_v2_signing_key) + + if len(built_manifest.bytes.as_encoded_str()) > MAXIMUM_GENERATED_MANIFEST_SIZE: + logger.error('Legacy image is too large to generate manifest') + return None + + return built_manifest except ManifestException as me: logger.exception('Got exception when trying to build manifest for legacy image %s', legacy_image_row) diff --git a/image/docker/schema1.py b/image/docker/schema1.py index 337c9547d..7ace40128 100644 --- a/image/docker/schema1.py +++ b/image/docker/schema1.py @@ -59,7 +59,6 @@ _ISO_DATETIME_FORMAT_ZULU = '%Y-%m-%dT%H:%M:%SZ' # The algorithm we use to sign the JWS. _JWS_SIGNING_ALGORITHM = 'RS256' - class MalformedSchema1Manifest(ManifestException): """ Raised when a manifest fails an assertion that should be true according to the Docker Manifest @@ -340,17 +339,20 @@ class DockerSchema1Manifest(ManifestInterface): def get_requires_empty_layer_blob(self, content_retriever): return False - def unsigned(self): - if self.media_type == DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE: - return self - - # Create an unsigned version of the manifest. + def _unsigned_builder(self): builder = DockerSchema1ManifestBuilder(self._namespace, self._repo_name, self._tag, self._architecture) for layer in reversed(self.layers): builder.add_layer(str(layer.digest), layer.raw_v1_metadata) - return builder.build() + return builder + + def unsigned(self): + if self.media_type == DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE: + return self + + # Create an unsigned version of the manifest. + return self._unsigned_builder().build() def with_tag_name(self, tag_name, json_web_key=None): """ Returns a copy of this manifest, with the tag changed to the given tag name. """ @@ -534,6 +536,10 @@ class DockerSchema1ManifestBuilder(object): self._fs_layer_digests = [] self._history = [] + self._namespace_name = namespace_name + self._repo_name = repo_name + self._tag = tag + self._architecture = architecture def add_layer(self, layer_digest, v1_json_metadata): self._fs_layer_digests.append({ @@ -544,6 +550,49 @@ class DockerSchema1ManifestBuilder(object): }) return self + def with_metadata_removed(self): + """ Returns a copy of the builder where every layer but the leaf layer has + its metadata stripped down to the bare essentials. + """ + builder = DockerSchema1ManifestBuilder(self._namespace_name, self._repo_name, self._tag, + self._architecture) + + for index, fs_layer in enumerate(self._fs_layer_digests): + try: + metadata = json.loads(self._history[index][DOCKER_SCHEMA1_V1_COMPAT_KEY]) + except (ValueError, TypeError): + logger.exception('Could not parse existing builder') + raise MalformedSchema1Manifest + + fixed_metadata = {} + if index == 0: # Leaf layer is at index 0 in schema 1. + fixed_metadata = metadata + else: + # Remove all container config from the metadata. + fixed_metadata['id'] = metadata['id'] + if 'parent' in metadata: + fixed_metadata['parent'] = metadata['parent'] + + if 'created' in metadata: + fixed_metadata['created'] = metadata['created'] + + if 'author' in metadata: + fixed_metadata['author'] = metadata['author'] + + if 'comment' in metadata: + fixed_metadata['comment'] = metadata['comment'] + + if 'Size' in metadata: + fixed_metadata['Size'] = metadata['Size'] + + if 'Cmd' in metadata.get('container_config', {}): + fixed_metadata['container_config'] = { + 'Cmd': metadata['container_config']['Cmd'], + } + + builder.add_layer(fs_layer[DOCKER_SCHEMA1_BLOB_SUM_KEY], json.dumps(fixed_metadata)) + + return builder def build(self, json_web_key=None, ensure_ascii=True): """ diff --git a/image/docker/test/test_schema1.py b/image/docker/test/test_schema1.py index 0fd473b6a..2766cdaac 100644 --- a/image/docker/test/test_schema1.py +++ b/image/docker/test/test_schema1.py @@ -213,3 +213,99 @@ def test_validate_manifest_with_none_metadata_layer(with_key): # Ensure the manifest can be reloaded. built_bytes = built.bytes.as_encoded_str() DockerSchema1Manifest(Bytes.for_string_or_unicode(built_bytes)) + + +def test_build_with_metadata_removed(): + builder = DockerSchema1ManifestBuilder('somenamespace', 'somerepo', 'sometag') + builder.add_layer('sha256:abcde', json.dumps({ + 'id': 'someid', + 'parent': 'someid', + 'author': u'😱', + 'comment': 'hello world!', + 'created': '1975-01-02 12:34', + 'Size': 5678, + 'container_config': { + 'Cmd': 'foobar', + 'more': 'stuff', + 'goes': 'here', + }, + })) + builder.add_layer('sha256:abcde', json.dumps({ + 'id': 'anotherid', + 'author': u'😱', + 'created': '1985-02-03 12:34', + 'Size': 1234, + 'container_config': { + 'Cmd': 'barbaz', + 'more': 'stuff', + 'goes': 'here', + }, + })) + + built = builder.build(None) + built._validate() + + assert built.leaf_layer_v1_image_id == 'someid' + + with_metadata_removed = builder.with_metadata_removed().build() + with_metadata_removed._validate() + + built_layers = list(built.get_layers(None)) + with_metadata_removed_layers = list(with_metadata_removed.get_layers(None)) + + assert len(built_layers) == len(with_metadata_removed_layers) + for index, built_layer in enumerate(built_layers): + with_metadata_removed_layer = with_metadata_removed_layers[index] + + assert built_layer.layer_id == with_metadata_removed_layer.layer_id + assert built_layer.compressed_size == with_metadata_removed_layer.compressed_size + assert built_layer.command == with_metadata_removed_layer.command + assert built_layer.comment == with_metadata_removed_layer.comment + assert built_layer.author == with_metadata_removed_layer.author + assert built_layer.blob_digest == with_metadata_removed_layer.blob_digest + assert built_layer.created_datetime == with_metadata_removed_layer.created_datetime + + assert built.leaf_layer_v1_image_id == with_metadata_removed.leaf_layer_v1_image_id + assert built_layers[-1].layer_id == built.leaf_layer_v1_image_id + + assert (json.loads(built_layers[-1].internal_layer.raw_v1_metadata) == + json.loads(with_metadata_removed_layers[-1].internal_layer.raw_v1_metadata)) + + +def test_validate_manifest_without_metadata(): + test_dir = os.path.dirname(os.path.abspath(__file__)) + with open(os.path.join(test_dir, 'validated_manifest.json'), 'r') as f: + manifest_bytes = f.read() + + manifest = DockerSchema1Manifest(Bytes.for_string_or_unicode(manifest_bytes), validate=True) + digest = manifest.digest + assert digest == 'sha256:b5dc4f63fdbd64f34f2314c0747ef81008f9fcddce4edfc3fd0e8ec8b358d571' + assert manifest.created_datetime + + with_metadata_removed = manifest._unsigned_builder().with_metadata_removed().build() + assert with_metadata_removed.leaf_layer_v1_image_id == manifest.leaf_layer_v1_image_id + + manifest_layers = list(manifest.get_layers(None)) + with_metadata_removed_layers = list(with_metadata_removed.get_layers(None)) + + assert len(manifest_layers) == len(with_metadata_removed_layers) + for index, built_layer in enumerate(manifest_layers): + with_metadata_removed_layer = with_metadata_removed_layers[index] + + assert built_layer.layer_id == with_metadata_removed_layer.layer_id + assert built_layer.compressed_size == with_metadata_removed_layer.compressed_size + assert built_layer.command == with_metadata_removed_layer.command + assert built_layer.comment == with_metadata_removed_layer.comment + assert built_layer.author == with_metadata_removed_layer.author + assert built_layer.blob_digest == with_metadata_removed_layer.blob_digest + assert built_layer.created_datetime == with_metadata_removed_layer.created_datetime + + assert with_metadata_removed.digest != manifest.digest + + assert with_metadata_removed.namespace == manifest.namespace + assert with_metadata_removed.repo_name == manifest.repo_name + assert with_metadata_removed.tag == manifest.tag + assert with_metadata_removed.created_datetime == manifest.created_datetime + assert with_metadata_removed.checksums == manifest.checksums + assert with_metadata_removed.image_ids == manifest.image_ids + assert with_metadata_removed.parent_image_ids == manifest.parent_image_ids