diff --git a/data/registry_model/registry_pre_oci_model.py b/data/registry_model/registry_pre_oci_model.py index ce40f12ea..b73085efa 100644 --- a/data/registry_model/registry_pre_oci_model.py +++ b/data/registry_model/registry_pre_oci_model.py @@ -473,8 +473,6 @@ class PreOCIModel(SharedModel, RegistryDataInterface): assert not tag_obj.hidden repo = tag_obj.repository - namespace_name = repo.namespace_user.username - repo_name = repo.name # Write the manifest to the DB. manifest = self._build_manifest_for_legacy_image(tag_obj.name, tag_obj.image) diff --git a/data/registry_model/shared.py b/data/registry_model/shared.py index 586adcd96..561f73cd4 100644 --- a/data/registry_model/shared.py +++ b/data/registry_model/shared.py @@ -17,6 +17,8 @@ from image.docker.schema2 import EMPTY_LAYER_BLOB_DIGEST logger = logging.getLogger(__name__) +# The maximum size for generated manifest after which we remove extra metadata. +MAXIMUM_GENERATED_MANIFEST_SIZE = 3 * 1024 * 1024 # 3 MB class SharedModel: """ @@ -445,9 +447,19 @@ class SharedModel: builder.add_layer(parent_image.storage.content_checksum, parent_image.v1_json_metadata) - # Sign the manifest with our signing key. try: - return builder.build(docker_v2_signing_key) + built_manifest = builder.build(docker_v2_signing_key) + + # If the generated manifest is greater than the maximum size, regenerate it with + # intermediate metadata layers stripped down to their bare essentials. + if len(built_manifest.bytes.as_encoded_str()) > MAXIMUM_GENERATED_MANIFEST_SIZE: + built_manifest = builder.with_metadata_removed().build(docker_v2_signing_key) + + if len(built_manifest.bytes.as_encoded_str()) > MAXIMUM_GENERATED_MANIFEST_SIZE: + logger.error('Legacy image is too large to generate manifest') + return None + + return built_manifest except ManifestException as me: logger.exception('Got exception when trying to build manifest for legacy image %s', legacy_image_row) diff --git a/image/docker/schema1.py b/image/docker/schema1.py index 337c9547d..7ace40128 100644 --- a/image/docker/schema1.py +++ b/image/docker/schema1.py @@ -59,7 +59,6 @@ _ISO_DATETIME_FORMAT_ZULU = '%Y-%m-%dT%H:%M:%SZ' # The algorithm we use to sign the JWS. _JWS_SIGNING_ALGORITHM = 'RS256' - class MalformedSchema1Manifest(ManifestException): """ Raised when a manifest fails an assertion that should be true according to the Docker Manifest @@ -340,17 +339,20 @@ class DockerSchema1Manifest(ManifestInterface): def get_requires_empty_layer_blob(self, content_retriever): return False - def unsigned(self): - if self.media_type == DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE: - return self - - # Create an unsigned version of the manifest. + def _unsigned_builder(self): builder = DockerSchema1ManifestBuilder(self._namespace, self._repo_name, self._tag, self._architecture) for layer in reversed(self.layers): builder.add_layer(str(layer.digest), layer.raw_v1_metadata) - return builder.build() + return builder + + def unsigned(self): + if self.media_type == DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE: + return self + + # Create an unsigned version of the manifest. + return self._unsigned_builder().build() def with_tag_name(self, tag_name, json_web_key=None): """ Returns a copy of this manifest, with the tag changed to the given tag name. """ @@ -534,6 +536,10 @@ class DockerSchema1ManifestBuilder(object): self._fs_layer_digests = [] self._history = [] + self._namespace_name = namespace_name + self._repo_name = repo_name + self._tag = tag + self._architecture = architecture def add_layer(self, layer_digest, v1_json_metadata): self._fs_layer_digests.append({ @@ -544,6 +550,49 @@ class DockerSchema1ManifestBuilder(object): }) return self + def with_metadata_removed(self): + """ Returns a copy of the builder where every layer but the leaf layer has + its metadata stripped down to the bare essentials. + """ + builder = DockerSchema1ManifestBuilder(self._namespace_name, self._repo_name, self._tag, + self._architecture) + + for index, fs_layer in enumerate(self._fs_layer_digests): + try: + metadata = json.loads(self._history[index][DOCKER_SCHEMA1_V1_COMPAT_KEY]) + except (ValueError, TypeError): + logger.exception('Could not parse existing builder') + raise MalformedSchema1Manifest + + fixed_metadata = {} + if index == 0: # Leaf layer is at index 0 in schema 1. + fixed_metadata = metadata + else: + # Remove all container config from the metadata. + fixed_metadata['id'] = metadata['id'] + if 'parent' in metadata: + fixed_metadata['parent'] = metadata['parent'] + + if 'created' in metadata: + fixed_metadata['created'] = metadata['created'] + + if 'author' in metadata: + fixed_metadata['author'] = metadata['author'] + + if 'comment' in metadata: + fixed_metadata['comment'] = metadata['comment'] + + if 'Size' in metadata: + fixed_metadata['Size'] = metadata['Size'] + + if 'Cmd' in metadata.get('container_config', {}): + fixed_metadata['container_config'] = { + 'Cmd': metadata['container_config']['Cmd'], + } + + builder.add_layer(fs_layer[DOCKER_SCHEMA1_BLOB_SUM_KEY], json.dumps(fixed_metadata)) + + return builder def build(self, json_web_key=None, ensure_ascii=True): """ diff --git a/image/docker/test/test_schema1.py b/image/docker/test/test_schema1.py index 0fd473b6a..2766cdaac 100644 --- a/image/docker/test/test_schema1.py +++ b/image/docker/test/test_schema1.py @@ -213,3 +213,99 @@ def test_validate_manifest_with_none_metadata_layer(with_key): # Ensure the manifest can be reloaded. built_bytes = built.bytes.as_encoded_str() DockerSchema1Manifest(Bytes.for_string_or_unicode(built_bytes)) + + +def test_build_with_metadata_removed(): + builder = DockerSchema1ManifestBuilder('somenamespace', 'somerepo', 'sometag') + builder.add_layer('sha256:abcde', json.dumps({ + 'id': 'someid', + 'parent': 'someid', + 'author': u'😱', + 'comment': 'hello world!', + 'created': '1975-01-02 12:34', + 'Size': 5678, + 'container_config': { + 'Cmd': 'foobar', + 'more': 'stuff', + 'goes': 'here', + }, + })) + builder.add_layer('sha256:abcde', json.dumps({ + 'id': 'anotherid', + 'author': u'😱', + 'created': '1985-02-03 12:34', + 'Size': 1234, + 'container_config': { + 'Cmd': 'barbaz', + 'more': 'stuff', + 'goes': 'here', + }, + })) + + built = builder.build(None) + built._validate() + + assert built.leaf_layer_v1_image_id == 'someid' + + with_metadata_removed = builder.with_metadata_removed().build() + with_metadata_removed._validate() + + built_layers = list(built.get_layers(None)) + with_metadata_removed_layers = list(with_metadata_removed.get_layers(None)) + + assert len(built_layers) == len(with_metadata_removed_layers) + for index, built_layer in enumerate(built_layers): + with_metadata_removed_layer = with_metadata_removed_layers[index] + + assert built_layer.layer_id == with_metadata_removed_layer.layer_id + assert built_layer.compressed_size == with_metadata_removed_layer.compressed_size + assert built_layer.command == with_metadata_removed_layer.command + assert built_layer.comment == with_metadata_removed_layer.comment + assert built_layer.author == with_metadata_removed_layer.author + assert built_layer.blob_digest == with_metadata_removed_layer.blob_digest + assert built_layer.created_datetime == with_metadata_removed_layer.created_datetime + + assert built.leaf_layer_v1_image_id == with_metadata_removed.leaf_layer_v1_image_id + assert built_layers[-1].layer_id == built.leaf_layer_v1_image_id + + assert (json.loads(built_layers[-1].internal_layer.raw_v1_metadata) == + json.loads(with_metadata_removed_layers[-1].internal_layer.raw_v1_metadata)) + + +def test_validate_manifest_without_metadata(): + test_dir = os.path.dirname(os.path.abspath(__file__)) + with open(os.path.join(test_dir, 'validated_manifest.json'), 'r') as f: + manifest_bytes = f.read() + + manifest = DockerSchema1Manifest(Bytes.for_string_or_unicode(manifest_bytes), validate=True) + digest = manifest.digest + assert digest == 'sha256:b5dc4f63fdbd64f34f2314c0747ef81008f9fcddce4edfc3fd0e8ec8b358d571' + assert manifest.created_datetime + + with_metadata_removed = manifest._unsigned_builder().with_metadata_removed().build() + assert with_metadata_removed.leaf_layer_v1_image_id == manifest.leaf_layer_v1_image_id + + manifest_layers = list(manifest.get_layers(None)) + with_metadata_removed_layers = list(with_metadata_removed.get_layers(None)) + + assert len(manifest_layers) == len(with_metadata_removed_layers) + for index, built_layer in enumerate(manifest_layers): + with_metadata_removed_layer = with_metadata_removed_layers[index] + + assert built_layer.layer_id == with_metadata_removed_layer.layer_id + assert built_layer.compressed_size == with_metadata_removed_layer.compressed_size + assert built_layer.command == with_metadata_removed_layer.command + assert built_layer.comment == with_metadata_removed_layer.comment + assert built_layer.author == with_metadata_removed_layer.author + assert built_layer.blob_digest == with_metadata_removed_layer.blob_digest + assert built_layer.created_datetime == with_metadata_removed_layer.created_datetime + + assert with_metadata_removed.digest != manifest.digest + + assert with_metadata_removed.namespace == manifest.namespace + assert with_metadata_removed.repo_name == manifest.repo_name + assert with_metadata_removed.tag == manifest.tag + assert with_metadata_removed.created_datetime == manifest.created_datetime + assert with_metadata_removed.checksums == manifest.checksums + assert with_metadata_removed.image_ids == manifest.image_ids + assert with_metadata_removed.parent_image_ids == manifest.parent_image_ids