Merge pull request #3378 from quay/joseph.schorr/QUAY-1351/fix-super-large-manifest-backfill
Fix backfills of super large manifests by stripping metadata from all but the final layer
This commit is contained in:
commit
d763361069
4 changed files with 166 additions and 11 deletions
|
@ -473,8 +473,6 @@ class PreOCIModel(SharedModel, RegistryDataInterface):
|
|||
assert not tag_obj.hidden
|
||||
|
||||
repo = tag_obj.repository
|
||||
namespace_name = repo.namespace_user.username
|
||||
repo_name = repo.name
|
||||
|
||||
# Write the manifest to the DB.
|
||||
manifest = self._build_manifest_for_legacy_image(tag_obj.name, tag_obj.image)
|
||||
|
|
|
@ -17,6 +17,8 @@ from image.docker.schema2 import EMPTY_LAYER_BLOB_DIGEST
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# The maximum size for generated manifest after which we remove extra metadata.
|
||||
MAXIMUM_GENERATED_MANIFEST_SIZE = 3 * 1024 * 1024 # 3 MB
|
||||
|
||||
class SharedModel:
|
||||
"""
|
||||
|
@ -445,9 +447,19 @@ class SharedModel:
|
|||
|
||||
builder.add_layer(parent_image.storage.content_checksum, parent_image.v1_json_metadata)
|
||||
|
||||
# Sign the manifest with our signing key.
|
||||
try:
|
||||
return builder.build(docker_v2_signing_key)
|
||||
built_manifest = builder.build(docker_v2_signing_key)
|
||||
|
||||
# If the generated manifest is greater than the maximum size, regenerate it with
|
||||
# intermediate metadata layers stripped down to their bare essentials.
|
||||
if len(built_manifest.bytes.as_encoded_str()) > MAXIMUM_GENERATED_MANIFEST_SIZE:
|
||||
built_manifest = builder.with_metadata_removed().build(docker_v2_signing_key)
|
||||
|
||||
if len(built_manifest.bytes.as_encoded_str()) > MAXIMUM_GENERATED_MANIFEST_SIZE:
|
||||
logger.error('Legacy image is too large to generate manifest')
|
||||
return None
|
||||
|
||||
return built_manifest
|
||||
except ManifestException as me:
|
||||
logger.exception('Got exception when trying to build manifest for legacy image %s',
|
||||
legacy_image_row)
|
||||
|
|
|
@ -59,7 +59,6 @@ _ISO_DATETIME_FORMAT_ZULU = '%Y-%m-%dT%H:%M:%SZ'
|
|||
# The algorithm we use to sign the JWS.
|
||||
_JWS_SIGNING_ALGORITHM = 'RS256'
|
||||
|
||||
|
||||
class MalformedSchema1Manifest(ManifestException):
|
||||
"""
|
||||
Raised when a manifest fails an assertion that should be true according to the Docker Manifest
|
||||
|
@ -340,17 +339,20 @@ class DockerSchema1Manifest(ManifestInterface):
|
|||
def get_requires_empty_layer_blob(self, content_retriever):
|
||||
return False
|
||||
|
||||
def unsigned(self):
|
||||
if self.media_type == DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE:
|
||||
return self
|
||||
|
||||
# Create an unsigned version of the manifest.
|
||||
def _unsigned_builder(self):
|
||||
builder = DockerSchema1ManifestBuilder(self._namespace, self._repo_name, self._tag,
|
||||
self._architecture)
|
||||
for layer in reversed(self.layers):
|
||||
builder.add_layer(str(layer.digest), layer.raw_v1_metadata)
|
||||
|
||||
return builder.build()
|
||||
return builder
|
||||
|
||||
def unsigned(self):
|
||||
if self.media_type == DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE:
|
||||
return self
|
||||
|
||||
# Create an unsigned version of the manifest.
|
||||
return self._unsigned_builder().build()
|
||||
|
||||
def with_tag_name(self, tag_name, json_web_key=None):
|
||||
""" Returns a copy of this manifest, with the tag changed to the given tag name. """
|
||||
|
@ -534,6 +536,10 @@ class DockerSchema1ManifestBuilder(object):
|
|||
|
||||
self._fs_layer_digests = []
|
||||
self._history = []
|
||||
self._namespace_name = namespace_name
|
||||
self._repo_name = repo_name
|
||||
self._tag = tag
|
||||
self._architecture = architecture
|
||||
|
||||
def add_layer(self, layer_digest, v1_json_metadata):
|
||||
self._fs_layer_digests.append({
|
||||
|
@ -544,6 +550,49 @@ class DockerSchema1ManifestBuilder(object):
|
|||
})
|
||||
return self
|
||||
|
||||
def with_metadata_removed(self):
|
||||
""" Returns a copy of the builder where every layer but the leaf layer has
|
||||
its metadata stripped down to the bare essentials.
|
||||
"""
|
||||
builder = DockerSchema1ManifestBuilder(self._namespace_name, self._repo_name, self._tag,
|
||||
self._architecture)
|
||||
|
||||
for index, fs_layer in enumerate(self._fs_layer_digests):
|
||||
try:
|
||||
metadata = json.loads(self._history[index][DOCKER_SCHEMA1_V1_COMPAT_KEY])
|
||||
except (ValueError, TypeError):
|
||||
logger.exception('Could not parse existing builder')
|
||||
raise MalformedSchema1Manifest
|
||||
|
||||
fixed_metadata = {}
|
||||
if index == 0: # Leaf layer is at index 0 in schema 1.
|
||||
fixed_metadata = metadata
|
||||
else:
|
||||
# Remove all container config from the metadata.
|
||||
fixed_metadata['id'] = metadata['id']
|
||||
if 'parent' in metadata:
|
||||
fixed_metadata['parent'] = metadata['parent']
|
||||
|
||||
if 'created' in metadata:
|
||||
fixed_metadata['created'] = metadata['created']
|
||||
|
||||
if 'author' in metadata:
|
||||
fixed_metadata['author'] = metadata['author']
|
||||
|
||||
if 'comment' in metadata:
|
||||
fixed_metadata['comment'] = metadata['comment']
|
||||
|
||||
if 'Size' in metadata:
|
||||
fixed_metadata['Size'] = metadata['Size']
|
||||
|
||||
if 'Cmd' in metadata.get('container_config', {}):
|
||||
fixed_metadata['container_config'] = {
|
||||
'Cmd': metadata['container_config']['Cmd'],
|
||||
}
|
||||
|
||||
builder.add_layer(fs_layer[DOCKER_SCHEMA1_BLOB_SUM_KEY], json.dumps(fixed_metadata))
|
||||
|
||||
return builder
|
||||
|
||||
def build(self, json_web_key=None, ensure_ascii=True):
|
||||
"""
|
||||
|
|
|
@ -213,3 +213,99 @@ def test_validate_manifest_with_none_metadata_layer(with_key):
|
|||
# Ensure the manifest can be reloaded.
|
||||
built_bytes = built.bytes.as_encoded_str()
|
||||
DockerSchema1Manifest(Bytes.for_string_or_unicode(built_bytes))
|
||||
|
||||
|
||||
def test_build_with_metadata_removed():
|
||||
builder = DockerSchema1ManifestBuilder('somenamespace', 'somerepo', 'sometag')
|
||||
builder.add_layer('sha256:abcde', json.dumps({
|
||||
'id': 'someid',
|
||||
'parent': 'someid',
|
||||
'author': u'😱',
|
||||
'comment': 'hello world!',
|
||||
'created': '1975-01-02 12:34',
|
||||
'Size': 5678,
|
||||
'container_config': {
|
||||
'Cmd': 'foobar',
|
||||
'more': 'stuff',
|
||||
'goes': 'here',
|
||||
},
|
||||
}))
|
||||
builder.add_layer('sha256:abcde', json.dumps({
|
||||
'id': 'anotherid',
|
||||
'author': u'😱',
|
||||
'created': '1985-02-03 12:34',
|
||||
'Size': 1234,
|
||||
'container_config': {
|
||||
'Cmd': 'barbaz',
|
||||
'more': 'stuff',
|
||||
'goes': 'here',
|
||||
},
|
||||
}))
|
||||
|
||||
built = builder.build(None)
|
||||
built._validate()
|
||||
|
||||
assert built.leaf_layer_v1_image_id == 'someid'
|
||||
|
||||
with_metadata_removed = builder.with_metadata_removed().build()
|
||||
with_metadata_removed._validate()
|
||||
|
||||
built_layers = list(built.get_layers(None))
|
||||
with_metadata_removed_layers = list(with_metadata_removed.get_layers(None))
|
||||
|
||||
assert len(built_layers) == len(with_metadata_removed_layers)
|
||||
for index, built_layer in enumerate(built_layers):
|
||||
with_metadata_removed_layer = with_metadata_removed_layers[index]
|
||||
|
||||
assert built_layer.layer_id == with_metadata_removed_layer.layer_id
|
||||
assert built_layer.compressed_size == with_metadata_removed_layer.compressed_size
|
||||
assert built_layer.command == with_metadata_removed_layer.command
|
||||
assert built_layer.comment == with_metadata_removed_layer.comment
|
||||
assert built_layer.author == with_metadata_removed_layer.author
|
||||
assert built_layer.blob_digest == with_metadata_removed_layer.blob_digest
|
||||
assert built_layer.created_datetime == with_metadata_removed_layer.created_datetime
|
||||
|
||||
assert built.leaf_layer_v1_image_id == with_metadata_removed.leaf_layer_v1_image_id
|
||||
assert built_layers[-1].layer_id == built.leaf_layer_v1_image_id
|
||||
|
||||
assert (json.loads(built_layers[-1].internal_layer.raw_v1_metadata) ==
|
||||
json.loads(with_metadata_removed_layers[-1].internal_layer.raw_v1_metadata))
|
||||
|
||||
|
||||
def test_validate_manifest_without_metadata():
|
||||
test_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
with open(os.path.join(test_dir, 'validated_manifest.json'), 'r') as f:
|
||||
manifest_bytes = f.read()
|
||||
|
||||
manifest = DockerSchema1Manifest(Bytes.for_string_or_unicode(manifest_bytes), validate=True)
|
||||
digest = manifest.digest
|
||||
assert digest == 'sha256:b5dc4f63fdbd64f34f2314c0747ef81008f9fcddce4edfc3fd0e8ec8b358d571'
|
||||
assert manifest.created_datetime
|
||||
|
||||
with_metadata_removed = manifest._unsigned_builder().with_metadata_removed().build()
|
||||
assert with_metadata_removed.leaf_layer_v1_image_id == manifest.leaf_layer_v1_image_id
|
||||
|
||||
manifest_layers = list(manifest.get_layers(None))
|
||||
with_metadata_removed_layers = list(with_metadata_removed.get_layers(None))
|
||||
|
||||
assert len(manifest_layers) == len(with_metadata_removed_layers)
|
||||
for index, built_layer in enumerate(manifest_layers):
|
||||
with_metadata_removed_layer = with_metadata_removed_layers[index]
|
||||
|
||||
assert built_layer.layer_id == with_metadata_removed_layer.layer_id
|
||||
assert built_layer.compressed_size == with_metadata_removed_layer.compressed_size
|
||||
assert built_layer.command == with_metadata_removed_layer.command
|
||||
assert built_layer.comment == with_metadata_removed_layer.comment
|
||||
assert built_layer.author == with_metadata_removed_layer.author
|
||||
assert built_layer.blob_digest == with_metadata_removed_layer.blob_digest
|
||||
assert built_layer.created_datetime == with_metadata_removed_layer.created_datetime
|
||||
|
||||
assert with_metadata_removed.digest != manifest.digest
|
||||
|
||||
assert with_metadata_removed.namespace == manifest.namespace
|
||||
assert with_metadata_removed.repo_name == manifest.repo_name
|
||||
assert with_metadata_removed.tag == manifest.tag
|
||||
assert with_metadata_removed.created_datetime == manifest.created_datetime
|
||||
assert with_metadata_removed.checksums == manifest.checksums
|
||||
assert with_metadata_removed.image_ids == manifest.image_ids
|
||||
assert with_metadata_removed.parent_image_ids == manifest.parent_image_ids
|
||||
|
|
Reference in a new issue