Merge pull request #3378 from quay/joseph.schorr/QUAY-1351/fix-super-large-manifest-backfill

Fix backfills of super large manifests by stripping metadata from all but the final layer
This commit is contained in:
Joseph Schorr 2019-02-20 15:00:13 -05:00 committed by GitHub
commit d763361069
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 166 additions and 11 deletions

View file

@ -473,8 +473,6 @@ class PreOCIModel(SharedModel, RegistryDataInterface):
assert not tag_obj.hidden
repo = tag_obj.repository
namespace_name = repo.namespace_user.username
repo_name = repo.name
# Write the manifest to the DB.
manifest = self._build_manifest_for_legacy_image(tag_obj.name, tag_obj.image)

View file

@ -17,6 +17,8 @@ from image.docker.schema2 import EMPTY_LAYER_BLOB_DIGEST
logger = logging.getLogger(__name__)
# The maximum size for generated manifest after which we remove extra metadata.
MAXIMUM_GENERATED_MANIFEST_SIZE = 3 * 1024 * 1024 # 3 MB
class SharedModel:
"""
@ -445,9 +447,19 @@ class SharedModel:
builder.add_layer(parent_image.storage.content_checksum, parent_image.v1_json_metadata)
# Sign the manifest with our signing key.
try:
return builder.build(docker_v2_signing_key)
built_manifest = builder.build(docker_v2_signing_key)
# If the generated manifest is greater than the maximum size, regenerate it with
# intermediate metadata layers stripped down to their bare essentials.
if len(built_manifest.bytes.as_encoded_str()) > MAXIMUM_GENERATED_MANIFEST_SIZE:
built_manifest = builder.with_metadata_removed().build(docker_v2_signing_key)
if len(built_manifest.bytes.as_encoded_str()) > MAXIMUM_GENERATED_MANIFEST_SIZE:
logger.error('Legacy image is too large to generate manifest')
return None
return built_manifest
except ManifestException as me:
logger.exception('Got exception when trying to build manifest for legacy image %s',
legacy_image_row)

View file

@ -59,7 +59,6 @@ _ISO_DATETIME_FORMAT_ZULU = '%Y-%m-%dT%H:%M:%SZ'
# The algorithm we use to sign the JWS.
_JWS_SIGNING_ALGORITHM = 'RS256'
class MalformedSchema1Manifest(ManifestException):
"""
Raised when a manifest fails an assertion that should be true according to the Docker Manifest
@ -340,17 +339,20 @@ class DockerSchema1Manifest(ManifestInterface):
def get_requires_empty_layer_blob(self, content_retriever):
return False
def unsigned(self):
if self.media_type == DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE:
return self
# Create an unsigned version of the manifest.
def _unsigned_builder(self):
builder = DockerSchema1ManifestBuilder(self._namespace, self._repo_name, self._tag,
self._architecture)
for layer in reversed(self.layers):
builder.add_layer(str(layer.digest), layer.raw_v1_metadata)
return builder.build()
return builder
def unsigned(self):
if self.media_type == DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE:
return self
# Create an unsigned version of the manifest.
return self._unsigned_builder().build()
def with_tag_name(self, tag_name, json_web_key=None):
""" Returns a copy of this manifest, with the tag changed to the given tag name. """
@ -534,6 +536,10 @@ class DockerSchema1ManifestBuilder(object):
self._fs_layer_digests = []
self._history = []
self._namespace_name = namespace_name
self._repo_name = repo_name
self._tag = tag
self._architecture = architecture
def add_layer(self, layer_digest, v1_json_metadata):
self._fs_layer_digests.append({
@ -544,6 +550,49 @@ class DockerSchema1ManifestBuilder(object):
})
return self
def with_metadata_removed(self):
""" Returns a copy of the builder where every layer but the leaf layer has
its metadata stripped down to the bare essentials.
"""
builder = DockerSchema1ManifestBuilder(self._namespace_name, self._repo_name, self._tag,
self._architecture)
for index, fs_layer in enumerate(self._fs_layer_digests):
try:
metadata = json.loads(self._history[index][DOCKER_SCHEMA1_V1_COMPAT_KEY])
except (ValueError, TypeError):
logger.exception('Could not parse existing builder')
raise MalformedSchema1Manifest
fixed_metadata = {}
if index == 0: # Leaf layer is at index 0 in schema 1.
fixed_metadata = metadata
else:
# Remove all container config from the metadata.
fixed_metadata['id'] = metadata['id']
if 'parent' in metadata:
fixed_metadata['parent'] = metadata['parent']
if 'created' in metadata:
fixed_metadata['created'] = metadata['created']
if 'author' in metadata:
fixed_metadata['author'] = metadata['author']
if 'comment' in metadata:
fixed_metadata['comment'] = metadata['comment']
if 'Size' in metadata:
fixed_metadata['Size'] = metadata['Size']
if 'Cmd' in metadata.get('container_config', {}):
fixed_metadata['container_config'] = {
'Cmd': metadata['container_config']['Cmd'],
}
builder.add_layer(fs_layer[DOCKER_SCHEMA1_BLOB_SUM_KEY], json.dumps(fixed_metadata))
return builder
def build(self, json_web_key=None, ensure_ascii=True):
"""

View file

@ -213,3 +213,99 @@ def test_validate_manifest_with_none_metadata_layer(with_key):
# Ensure the manifest can be reloaded.
built_bytes = built.bytes.as_encoded_str()
DockerSchema1Manifest(Bytes.for_string_or_unicode(built_bytes))
def test_build_with_metadata_removed():
builder = DockerSchema1ManifestBuilder('somenamespace', 'somerepo', 'sometag')
builder.add_layer('sha256:abcde', json.dumps({
'id': 'someid',
'parent': 'someid',
'author': u'😱',
'comment': 'hello world!',
'created': '1975-01-02 12:34',
'Size': 5678,
'container_config': {
'Cmd': 'foobar',
'more': 'stuff',
'goes': 'here',
},
}))
builder.add_layer('sha256:abcde', json.dumps({
'id': 'anotherid',
'author': u'😱',
'created': '1985-02-03 12:34',
'Size': 1234,
'container_config': {
'Cmd': 'barbaz',
'more': 'stuff',
'goes': 'here',
},
}))
built = builder.build(None)
built._validate()
assert built.leaf_layer_v1_image_id == 'someid'
with_metadata_removed = builder.with_metadata_removed().build()
with_metadata_removed._validate()
built_layers = list(built.get_layers(None))
with_metadata_removed_layers = list(with_metadata_removed.get_layers(None))
assert len(built_layers) == len(with_metadata_removed_layers)
for index, built_layer in enumerate(built_layers):
with_metadata_removed_layer = with_metadata_removed_layers[index]
assert built_layer.layer_id == with_metadata_removed_layer.layer_id
assert built_layer.compressed_size == with_metadata_removed_layer.compressed_size
assert built_layer.command == with_metadata_removed_layer.command
assert built_layer.comment == with_metadata_removed_layer.comment
assert built_layer.author == with_metadata_removed_layer.author
assert built_layer.blob_digest == with_metadata_removed_layer.blob_digest
assert built_layer.created_datetime == with_metadata_removed_layer.created_datetime
assert built.leaf_layer_v1_image_id == with_metadata_removed.leaf_layer_v1_image_id
assert built_layers[-1].layer_id == built.leaf_layer_v1_image_id
assert (json.loads(built_layers[-1].internal_layer.raw_v1_metadata) ==
json.loads(with_metadata_removed_layers[-1].internal_layer.raw_v1_metadata))
def test_validate_manifest_without_metadata():
test_dir = os.path.dirname(os.path.abspath(__file__))
with open(os.path.join(test_dir, 'validated_manifest.json'), 'r') as f:
manifest_bytes = f.read()
manifest = DockerSchema1Manifest(Bytes.for_string_or_unicode(manifest_bytes), validate=True)
digest = manifest.digest
assert digest == 'sha256:b5dc4f63fdbd64f34f2314c0747ef81008f9fcddce4edfc3fd0e8ec8b358d571'
assert manifest.created_datetime
with_metadata_removed = manifest._unsigned_builder().with_metadata_removed().build()
assert with_metadata_removed.leaf_layer_v1_image_id == manifest.leaf_layer_v1_image_id
manifest_layers = list(manifest.get_layers(None))
with_metadata_removed_layers = list(with_metadata_removed.get_layers(None))
assert len(manifest_layers) == len(with_metadata_removed_layers)
for index, built_layer in enumerate(manifest_layers):
with_metadata_removed_layer = with_metadata_removed_layers[index]
assert built_layer.layer_id == with_metadata_removed_layer.layer_id
assert built_layer.compressed_size == with_metadata_removed_layer.compressed_size
assert built_layer.command == with_metadata_removed_layer.command
assert built_layer.comment == with_metadata_removed_layer.comment
assert built_layer.author == with_metadata_removed_layer.author
assert built_layer.blob_digest == with_metadata_removed_layer.blob_digest
assert built_layer.created_datetime == with_metadata_removed_layer.created_datetime
assert with_metadata_removed.digest != manifest.digest
assert with_metadata_removed.namespace == manifest.namespace
assert with_metadata_removed.repo_name == manifest.repo_name
assert with_metadata_removed.tag == manifest.tag
assert with_metadata_removed.created_datetime == manifest.created_datetime
assert with_metadata_removed.checksums == manifest.checksums
assert with_metadata_removed.image_ids == manifest.image_ids
assert with_metadata_removed.parent_image_ids == manifest.parent_image_ids