V1 Docker ID <-> V2 layer SHA mismatch fix
Fix handling of V1 Docker ID <-> V2 layer SHA mismatch by dynamically rewriting the manifest to use new synthesized IDs for all layers above the mismatch. Also adds a bunch of tests for this and other use cases, fixes a bug around manifest digest uniqueness and fixes the 5.5 migration for MySQL.
This commit is contained in:
parent
8b61c69dad
commit
abd2e3c234
6 changed files with 240 additions and 53 deletions
|
@ -2,9 +2,10 @@ import logging
|
|||
import jwt.utils
|
||||
import json
|
||||
import features
|
||||
import hashlib
|
||||
|
||||
from peewee import IntegrityError
|
||||
from flask import make_response, request, url_for, abort
|
||||
from flask import make_response, request, url_for
|
||||
from collections import namedtuple, OrderedDict
|
||||
from jwkest.jws import SIGNER_ALGS, keyrep
|
||||
from datetime import datetime
|
||||
|
@ -126,7 +127,8 @@ class SignedManifest(object):
|
|||
try:
|
||||
image_digest = digest_tools.Digest.parse_digest(blob_sum_obj[_BLOB_SUM_KEY])
|
||||
except digest_tools.InvalidDigestException:
|
||||
raise ManifestInvalid()
|
||||
err_message = 'could not parse manifest digest: %s' % blob_sum_obj[_BLOB_SUM_KEY]
|
||||
raise ManifestInvalid(detail={'message': err_message})
|
||||
|
||||
metadata_string = history_obj[_V1_COMPAT_KEY]
|
||||
|
||||
|
@ -134,6 +136,9 @@ class SignedManifest(object):
|
|||
command_list = v1_metadata.get('container_config', {}).get('Cmd', None)
|
||||
command = json.dumps(command_list) if command_list else None
|
||||
|
||||
if not 'id' in v1_metadata:
|
||||
raise ManifestInvalid(detail={'message': 'invalid manifest v1 history'})
|
||||
|
||||
extracted = ExtractedV1Metadata(v1_metadata['id'], v1_metadata.get('parent'),
|
||||
v1_metadata.get('created'), v1_metadata.get('comment'),
|
||||
command)
|
||||
|
@ -180,6 +185,14 @@ class SignedManifestBuilder(object):
|
|||
_V1_COMPAT_KEY: v1_json_metadata,
|
||||
})
|
||||
|
||||
def add_top_layer(self, layer_digest, v1_json_metadata):
|
||||
self._fs_layer_digests.insert(0, {
|
||||
_BLOB_SUM_KEY: layer_digest,
|
||||
})
|
||||
self._history.insert(0, {
|
||||
_V1_COMPAT_KEY: v1_json_metadata,
|
||||
})
|
||||
|
||||
def build(self, json_web_key):
|
||||
""" Build the payload and sign it, returning a SignedManifest object.
|
||||
"""
|
||||
|
@ -301,8 +314,8 @@ def _reject_manifest2_schema2(func):
|
|||
def write_manifest_by_tagname(namespace, repo_name, manifest_ref):
|
||||
try:
|
||||
manifest = SignedManifest(request.data)
|
||||
except ValueError as ve:
|
||||
raise ManifestInvalid()
|
||||
except ValueError:
|
||||
raise ManifestInvalid(detail={'message': 'could not parse manifest'})
|
||||
|
||||
if manifest.tag != manifest_ref:
|
||||
raise TagInvalid()
|
||||
|
@ -320,14 +333,29 @@ def write_manifest_by_digest(namespace, repo_name, manifest_ref):
|
|||
try:
|
||||
manifest = SignedManifest(request.data)
|
||||
except ValueError:
|
||||
raise ManifestInvalid()
|
||||
raise ManifestInvalid(detail={'message': 'could not parse manifest'})
|
||||
|
||||
if manifest.digest != manifest_ref:
|
||||
raise ManifestInvalid()
|
||||
raise ManifestInvalid(detail={'message': 'manifest digest mismatch'})
|
||||
|
||||
return _write_manifest(namespace, repo_name, manifest)
|
||||
|
||||
|
||||
def _updated_v1_metadata(v1_metadata_json, updated_id_map):
|
||||
parsed = json.loads(v1_metadata_json)
|
||||
parsed['id'] = updated_id_map[parsed['id']]
|
||||
|
||||
if parsed.get('parent'):
|
||||
parsed['parent'] = updated_id_map[parsed['parent']]
|
||||
|
||||
if parsed.get('container_config', {}).get('Image'):
|
||||
existing_image = parsed['container_config']['Image']
|
||||
if existing_image in updated_id_map:
|
||||
parsed['container_config']['image'] = updated_id_map[existing_image]
|
||||
|
||||
return json.dumps(parsed)
|
||||
|
||||
|
||||
def _write_manifest(namespace, repo_name, manifest):
|
||||
# Ensure that the manifest is for this repository. If the manifest's namespace is empty, then
|
||||
# it is for the library namespace and we need an extra check.
|
||||
|
@ -363,22 +391,56 @@ def _write_manifest(namespace, repo_name, manifest):
|
|||
storage_query = model.storage.lookup_repo_storages_by_content_checksum(repo, checksums)
|
||||
storage_map = {storage.content_checksum: storage for storage in storage_query}
|
||||
|
||||
# Synthesize the V1 metadata for each layer.
|
||||
manifest_digest = manifest.digest
|
||||
# Ensure that we have valid V1 docker IDs. If Docker gives us a V1 layer ID pointing to
|
||||
# a storage with a content checksum different from the existing, then we need to rewrite
|
||||
# the Docker ID to ensure consistency.
|
||||
tag_name = manifest.tag
|
||||
updated_manifest_builder = SignedManifestBuilder(namespace, repo_name, tag_name)
|
||||
has_updated_manifest = False
|
||||
updated_id_map = {}
|
||||
|
||||
for mdata in layers:
|
||||
digest_str = str(mdata.digest)
|
||||
v1_mdata = mdata.v1_metadata
|
||||
updated_id_map[v1_mdata.docker_id] = v1_mdata.docker_id
|
||||
|
||||
# Ensure that all blobs exist.
|
||||
blob_storage = storage_map.get(digest_str)
|
||||
if blob_storage is None:
|
||||
raise BlobUnknown(detail={'digest': digest_str})
|
||||
|
||||
# If there is already a V1 image for this layer, nothing more to do.
|
||||
if v1_mdata.docker_id in images_map:
|
||||
# Ensure that the V1 image's storage matches the V2 blob. If not, we've found
|
||||
# a data inconsistency and need to fail.
|
||||
# a data inconsistency and need to create a new layer ID for the V1 image.
|
||||
v1_image = images_map[v1_mdata.docker_id]
|
||||
if v1_image.storage.content_checksum != digest_str:
|
||||
logger.error('Checksum mismatch on V1 layer %s (#%s): Expected digest %s, found %s',
|
||||
v1_mdata.docker_id, v1_image.id, digest_str, v1_image.storage.content_checksum)
|
||||
if has_updated_manifest or v1_image.storage.content_checksum != digest_str:
|
||||
new_synthetic_id = hashlib.sha256(mdata.v1_metadata_str + '@' + digest_str).hexdigest()
|
||||
logger.debug('Got content mismatch for layer %s under repo %s/%s. New ID: %s',
|
||||
v1_mdata.docker_id, namespace, repo_name, new_synthetic_id)
|
||||
|
||||
updated_id_map[v1_mdata.docker_id] = new_synthetic_id
|
||||
has_updated_manifest = True
|
||||
|
||||
# Update the manifest withn the new ID (if any).
|
||||
v1_metadata_json = mdata.v1_metadata_str
|
||||
if has_updated_manifest:
|
||||
v1_metadata_json = _updated_v1_metadata(mdata.v1_metadata_str, updated_id_map)
|
||||
|
||||
updated_manifest_builder.add_top_layer(digest_str, v1_metadata_json)
|
||||
|
||||
# If the manifest was changed due to an updated layer ID, then create a new manifest
|
||||
# based on the updated data.
|
||||
if has_updated_manifest:
|
||||
manifest = updated_manifest_builder.build(docker_v2_signing_key)
|
||||
layers = list(manifest.layers)
|
||||
|
||||
# Synthesize the V1 metadata for each layer.
|
||||
for mdata in layers:
|
||||
v1_mdata = mdata.v1_metadata
|
||||
|
||||
# If the layer with the V1 id already exists, then nothing more to do. We've ensured
|
||||
# it points to the correct content SHA above.
|
||||
if v1_mdata.docker_id in images_map:
|
||||
continue
|
||||
|
||||
# Lookup the parent image for the layer, if any.
|
||||
|
@ -390,10 +452,8 @@ def _write_manifest(namespace, repo_name, manifest):
|
|||
raise ManifestInvalid(detail={'message': msg})
|
||||
|
||||
# Synthesize and store the v1 metadata in the db.
|
||||
blob_storage = storage_map.get(digest_str)
|
||||
if blob_storage is None:
|
||||
raise BlobUnknown(detail={'digest': digest_str})
|
||||
|
||||
digest_str = str(mdata.digest)
|
||||
blob_storage = storage_map[digest_str]
|
||||
image = model.image.synthesize_v1_image(repo, blob_storage, v1_mdata.docker_id,
|
||||
v1_mdata.created, v1_mdata.comment, v1_mdata.command,
|
||||
mdata.v1_metadata_str, parent_image)
|
||||
|
@ -405,9 +465,10 @@ def _write_manifest(namespace, repo_name, manifest):
|
|||
raise ManifestInvalid(detail={'message': 'manifest does not reference any layers'})
|
||||
|
||||
# Store the manifest pointing to the tag.
|
||||
manifest_digest = manifest.digest
|
||||
leaf_layer = layers[-1]
|
||||
model.tag.store_tag_manifest(namespace, repo_name, tag_name, leaf_layer.v1_metadata.docker_id,
|
||||
manifest_digest, request.data)
|
||||
manifest_digest, manifest.bytes)
|
||||
|
||||
# Spawn the repo_push event.
|
||||
event_data = {
|
||||
|
@ -481,9 +542,11 @@ def _generate_and_store_manifest(namespace, repo_name, tag_name):
|
|||
try:
|
||||
return model.tag.associate_generated_tag_manifest(namespace, repo_name, tag_name,
|
||||
manifest.digest, manifest.bytes)
|
||||
except IntegrityError:
|
||||
except IntegrityError as ie:
|
||||
logger.debug('Got integrity error: %s', ie)
|
||||
try:
|
||||
return model.tag.load_tag_manifest(namespace, repo_name, tag_name)
|
||||
except model.InvalidManifestException:
|
||||
logger.exception('Exception when generating manifest')
|
||||
raise model.DataModelException('Could not load or generate manifest')
|
||||
|
||||
|
|
Reference in a new issue