Change manifest backfill to lookup missing content checksums in the repository

After discussion, we decided the best solution for the missing content checksum problem was to lookup the proper blobs in the repository and, if not present, mark the manifest as broken, as this would reflect the actual issue the user faces if they pull the repository tag today via V2
This commit is contained in:
Joseph Schorr 2018-08-14 14:36:19 -04:00
parent c0d25d3149
commit 70df10dade
3 changed files with 92 additions and 3 deletions

View file

@ -606,7 +606,7 @@ def associate_generated_tag_manifest(namespace, repo_name, tag_name, manifest, s
def _create_manifest(tag, manifest, storage_id_map):
storage_ids = set()
for blob_digest in reversed(manifest.blob_digests):
for blob_digest in manifest.blob_digests:
image_storage_id = storage_id_map.get(blob_digest)
if image_storage_id is None:
logger.error('Missing blob for manifest `%s` in: %s', blob_digest, storage_id_map)

View file

@ -8,6 +8,7 @@ from data.database import (UseThenDisconnect, TagManifest, TagManifestToManifest
db_transaction)
from data.model.image import get_parent_images
from data.model.tag import populate_manifest
from data.model.blob import get_repo_blob_by_digest, BlobDoesNotExist
from image.docker.schema1 import (DockerSchema1Manifest, ManifestException, ManifestInterface,
DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE)
from workers.worker import Worker
@ -127,10 +128,28 @@ def backfill_manifest(tag_manifest):
root_image = tag_manifest.tag.image
repository = tag_manifest.tag.repository
storage_ids = {root_image.storage.id}
image_storage_id_map = {root_image.storage.content_checksum: root_image.storage.id}
parent_images = get_parent_images(repository.namespace_user.username, repository.name, root_image)
for parent_image in parent_images:
storage_ids.add(parent_image.storage.id)
image_storage_id_map[parent_image.storage.content_checksum] = parent_image.storage.id
# Ensure that all the expected blobs have been found. If not, we lookup the blob under the repo
# and add its storage ID. If the blob is not found, we mark the manifest as broken.
storage_ids = set()
for blob_digest in manifest.blob_digests:
if blob_digest in image_storage_id_map:
storage_ids.add(image_storage_id_map[blob_digest])
else:
logger.debug('Blob `%s` not found in images for manifest `%s`; checking repo',
blob_digest, tag_manifest.id)
try:
blob_storage = get_repo_blob_by_digest(repository.namespace_user.username, repository.name,
blob_digest)
storage_ids.add(blob_storage.id)
except BlobDoesNotExist:
logger.debug('Blob `%s` not found in repo for manifest `%s`',
blob_digest, tag_manifest.id)
is_broken = True
with db_transaction():
# Re-retrieve the tag manifest to ensure it still exists and we're pointing at the correct tag.

View file

@ -1,6 +1,9 @@
from app import docker_v2_signing_key
from data import model
from data.database import (TagManifestLabelMap, TagManifestToManifest, Manifest, ManifestBlob,
ManifestLegacyImage, ManifestLabel, TagManifest, RepositoryTag, Image,
TagManifestLabel)
from image.docker.schema1 import DockerSchema1ManifestBuilder
from workers.manifestbackfillworker import backfill_manifest
from test.fixtures import *
@ -68,3 +71,70 @@ def test_manifestbackfillworker_broken_manifest(clear_rows, initialized_db):
legacy_image = ManifestLegacyImage.get(manifest=manifest_row).image
assert broken_manifest.tag.image == legacy_image
def test_manifestbackfillworker_mislinked_manifest(clear_rows, initialized_db):
""" Tests that a manifest whose image is mislinked will have its storages relinked properly. """
# Delete existing tag manifest so we can reuse the tag.
TagManifestLabel.delete().execute()
TagManifest.delete().execute()
repo = model.repository.get_repository('devtable', 'complex')
tag_v30 = model.tag.get_active_tag('devtable', 'gargantuan', 'v3.0')
tag_v50 = model.tag.get_active_tag('devtable', 'gargantuan', 'v5.0')
# Add a mislinked manifest, by having its layer point to a blob in v3.0 but its image
# be the v5.0 image.
builder = DockerSchema1ManifestBuilder('devtable', 'gargantuan', 'sometag')
builder.add_layer(tag_v30.image.storage.content_checksum, '{"id": "foo"}')
manifest = builder.build(docker_v2_signing_key)
mislinked_manifest = TagManifest.create(json_data=manifest.bytes, digest=manifest.digest,
tag=tag_v50)
# Backfill the manifest and ensure its proper content checksum was linked.
assert backfill_manifest(mislinked_manifest)
map_row = TagManifestToManifest.get(tag_manifest=mislinked_manifest)
assert not map_row.broken
manifest_row = map_row.manifest
legacy_image = ManifestLegacyImage.get(manifest=manifest_row).image
assert legacy_image == tag_v50.image
manifest_blobs = list(ManifestBlob.select().where(ManifestBlob.manifest == manifest_row))
assert len(manifest_blobs) == 1
assert manifest_blobs[0].blob.content_checksum == tag_v30.image.storage.content_checksum
def test_manifestbackfillworker_mislinked_invalid_manifest(clear_rows, initialized_db):
""" Tests that a manifest whose image is mislinked will attempt to have its storages relinked
properly. """
# Delete existing tag manifest so we can reuse the tag.
TagManifestLabel.delete().execute()
TagManifest.delete().execute()
repo = model.repository.get_repository('devtable', 'complex')
tag_v50 = model.tag.get_active_tag('devtable', 'gargantuan', 'v5.0')
# Add a mislinked manifest, by having its layer point to an invalid blob but its image
# be the v5.0 image.
builder = DockerSchema1ManifestBuilder('devtable', 'gargantuan', 'sometag')
builder.add_layer('sha256:deadbeef', '{"id": "foo"}')
manifest = builder.build(docker_v2_signing_key)
broken_manifest = TagManifest.create(json_data=manifest.bytes, digest=manifest.digest,
tag=tag_v50)
# Backfill the manifest and ensure it is marked as broken.
assert backfill_manifest(broken_manifest)
map_row = TagManifestToManifest.get(tag_manifest=broken_manifest)
assert map_row.broken
manifest_row = map_row.manifest
legacy_image = ManifestLegacyImage.get(manifest=manifest_row).image
assert legacy_image == tag_v50.image
manifest_blobs = list(ManifestBlob.select().where(ManifestBlob.manifest == manifest_row))
assert len(manifest_blobs) == 0