Fix backfills of super large manifests by stripping metadata from all but the final layer

This is semantically valid because Docker only uses the leaf layer as the image config when reading a V2_1 manifest

Fixes https://jira.coreos.com/browse/QUAY-1351
This commit is contained in:
Joseph Schorr 2019-02-20 12:08:20 -05:00
parent 6b30702699
commit bacf074219
4 changed files with 166 additions and 11 deletions

View file

@ -473,8 +473,6 @@ class PreOCIModel(SharedModel, RegistryDataInterface):
assert not tag_obj.hidden
repo = tag_obj.repository
namespace_name = repo.namespace_user.username
repo_name = repo.name
# Write the manifest to the DB.
manifest = self._build_manifest_for_legacy_image(tag_obj.name, tag_obj.image)

View file

@ -17,6 +17,8 @@ from image.docker.schema2 import EMPTY_LAYER_BLOB_DIGEST
logger = logging.getLogger(__name__)
# The maximum size for generated manifest after which we remove extra metadata.
MAXIMUM_GENERATED_MANIFEST_SIZE = 3 * 1024 * 1024 # 3 MB
class SharedModel:
"""
@ -445,9 +447,19 @@ class SharedModel:
builder.add_layer(parent_image.storage.content_checksum, parent_image.v1_json_metadata)
# Sign the manifest with our signing key.
try:
return builder.build(docker_v2_signing_key)
built_manifest = builder.build(docker_v2_signing_key)
# If the generated manifest is greater than the maximum size, regenerate it with
# intermediate metadata layers stripped down to their bare essentials.
if len(built_manifest.bytes.as_encoded_str()) > MAXIMUM_GENERATED_MANIFEST_SIZE:
built_manifest = builder.with_metadata_removed().build(docker_v2_signing_key)
if len(built_manifest.bytes.as_encoded_str()) > MAXIMUM_GENERATED_MANIFEST_SIZE:
logger.error('Legacy image is too large to generate manifest')
return None
return built_manifest
except ManifestException as me:
logger.exception('Got exception when trying to build manifest for legacy image %s',
legacy_image_row)

View file

@ -59,7 +59,6 @@ _ISO_DATETIME_FORMAT_ZULU = '%Y-%m-%dT%H:%M:%SZ'
# The algorithm we use to sign the JWS.
_JWS_SIGNING_ALGORITHM = 'RS256'
class MalformedSchema1Manifest(ManifestException):
"""
Raised when a manifest fails an assertion that should be true according to the Docker Manifest
@ -340,17 +339,20 @@ class DockerSchema1Manifest(ManifestInterface):
def get_requires_empty_layer_blob(self, content_retriever):
return False
def unsigned(self):
if self.media_type == DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE:
return self
# Create an unsigned version of the manifest.
def _unsigned_builder(self):
builder = DockerSchema1ManifestBuilder(self._namespace, self._repo_name, self._tag,
self._architecture)
for layer in reversed(self.layers):
builder.add_layer(str(layer.digest), layer.raw_v1_metadata)
return builder.build()
return builder
def unsigned(self):
if self.media_type == DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE:
return self
# Create an unsigned version of the manifest.
return self._unsigned_builder().build()
def with_tag_name(self, tag_name, json_web_key=None):
""" Returns a copy of this manifest, with the tag changed to the given tag name. """
@ -534,6 +536,10 @@ class DockerSchema1ManifestBuilder(object):
self._fs_layer_digests = []
self._history = []
self._namespace_name = namespace_name
self._repo_name = repo_name
self._tag = tag
self._architecture = architecture
def add_layer(self, layer_digest, v1_json_metadata):
self._fs_layer_digests.append({
@ -544,6 +550,49 @@ class DockerSchema1ManifestBuilder(object):
})
return self
def with_metadata_removed(self):
""" Returns a copy of the builder where every layer but the leaf layer has
its metadata stripped down to the bare essentials.
"""
builder = DockerSchema1ManifestBuilder(self._namespace_name, self._repo_name, self._tag,
self._architecture)
for index, fs_layer in enumerate(self._fs_layer_digests):
try:
metadata = json.loads(self._history[index][DOCKER_SCHEMA1_V1_COMPAT_KEY])
except (ValueError, TypeError):
logger.exception('Could not parse existing builder')
raise MalformedSchema1Manifest
fixed_metadata = {}
if index == 0: # Leaf layer is at index 0 in schema 1.
fixed_metadata = metadata
else:
# Remove all container config from the metadata.
fixed_metadata['id'] = metadata['id']
if 'parent' in metadata:
fixed_metadata['parent'] = metadata['parent']
if 'created' in metadata:
fixed_metadata['created'] = metadata['created']
if 'author' in metadata:
fixed_metadata['author'] = metadata['author']
if 'comment' in metadata:
fixed_metadata['comment'] = metadata['comment']
if 'Size' in metadata:
fixed_metadata['Size'] = metadata['Size']
if 'Cmd' in metadata.get('container_config', {}):
fixed_metadata['container_config'] = {
'Cmd': metadata['container_config']['Cmd'],
}
builder.add_layer(fs_layer[DOCKER_SCHEMA1_BLOB_SUM_KEY], json.dumps(fixed_metadata))
return builder
def build(self, json_web_key=None, ensure_ascii=True):
"""

View file

@ -213,3 +213,99 @@ def test_validate_manifest_with_none_metadata_layer(with_key):
# Ensure the manifest can be reloaded.
built_bytes = built.bytes.as_encoded_str()
DockerSchema1Manifest(Bytes.for_string_or_unicode(built_bytes))
def test_build_with_metadata_removed():
builder = DockerSchema1ManifestBuilder('somenamespace', 'somerepo', 'sometag')
builder.add_layer('sha256:abcde', json.dumps({
'id': 'someid',
'parent': 'someid',
'author': u'😱',
'comment': 'hello world!',
'created': '1975-01-02 12:34',
'Size': 5678,
'container_config': {
'Cmd': 'foobar',
'more': 'stuff',
'goes': 'here',
},
}))
builder.add_layer('sha256:abcde', json.dumps({
'id': 'anotherid',
'author': u'😱',
'created': '1985-02-03 12:34',
'Size': 1234,
'container_config': {
'Cmd': 'barbaz',
'more': 'stuff',
'goes': 'here',
},
}))
built = builder.build(None)
built._validate()
assert built.leaf_layer_v1_image_id == 'someid'
with_metadata_removed = builder.with_metadata_removed().build()
with_metadata_removed._validate()
built_layers = list(built.get_layers(None))
with_metadata_removed_layers = list(with_metadata_removed.get_layers(None))
assert len(built_layers) == len(with_metadata_removed_layers)
for index, built_layer in enumerate(built_layers):
with_metadata_removed_layer = with_metadata_removed_layers[index]
assert built_layer.layer_id == with_metadata_removed_layer.layer_id
assert built_layer.compressed_size == with_metadata_removed_layer.compressed_size
assert built_layer.command == with_metadata_removed_layer.command
assert built_layer.comment == with_metadata_removed_layer.comment
assert built_layer.author == with_metadata_removed_layer.author
assert built_layer.blob_digest == with_metadata_removed_layer.blob_digest
assert built_layer.created_datetime == with_metadata_removed_layer.created_datetime
assert built.leaf_layer_v1_image_id == with_metadata_removed.leaf_layer_v1_image_id
assert built_layers[-1].layer_id == built.leaf_layer_v1_image_id
assert (json.loads(built_layers[-1].internal_layer.raw_v1_metadata) ==
json.loads(with_metadata_removed_layers[-1].internal_layer.raw_v1_metadata))
def test_validate_manifest_without_metadata():
test_dir = os.path.dirname(os.path.abspath(__file__))
with open(os.path.join(test_dir, 'validated_manifest.json'), 'r') as f:
manifest_bytes = f.read()
manifest = DockerSchema1Manifest(Bytes.for_string_or_unicode(manifest_bytes), validate=True)
digest = manifest.digest
assert digest == 'sha256:b5dc4f63fdbd64f34f2314c0747ef81008f9fcddce4edfc3fd0e8ec8b358d571'
assert manifest.created_datetime
with_metadata_removed = manifest._unsigned_builder().with_metadata_removed().build()
assert with_metadata_removed.leaf_layer_v1_image_id == manifest.leaf_layer_v1_image_id
manifest_layers = list(manifest.get_layers(None))
with_metadata_removed_layers = list(with_metadata_removed.get_layers(None))
assert len(manifest_layers) == len(with_metadata_removed_layers)
for index, built_layer in enumerate(manifest_layers):
with_metadata_removed_layer = with_metadata_removed_layers[index]
assert built_layer.layer_id == with_metadata_removed_layer.layer_id
assert built_layer.compressed_size == with_metadata_removed_layer.compressed_size
assert built_layer.command == with_metadata_removed_layer.command
assert built_layer.comment == with_metadata_removed_layer.comment
assert built_layer.author == with_metadata_removed_layer.author
assert built_layer.blob_digest == with_metadata_removed_layer.blob_digest
assert built_layer.created_datetime == with_metadata_removed_layer.created_datetime
assert with_metadata_removed.digest != manifest.digest
assert with_metadata_removed.namespace == manifest.namespace
assert with_metadata_removed.repo_name == manifest.repo_name
assert with_metadata_removed.tag == manifest.tag
assert with_metadata_removed.created_datetime == manifest.created_datetime
assert with_metadata_removed.checksums == manifest.checksums
assert with_metadata_removed.image_ids == manifest.image_ids
assert with_metadata_removed.parent_image_ids == manifest.parent_image_ids