diff --git a/data/model/tag.py b/data/model/tag.py index 538445c23..b08385762 100644 --- a/data/model/tag.py +++ b/data/model/tag.py @@ -606,7 +606,7 @@ def associate_generated_tag_manifest(namespace, repo_name, tag_name, manifest, s def _create_manifest(tag, manifest, storage_id_map): storage_ids = set() - for blob_digest in reversed(manifest.blob_digests): + for blob_digest in manifest.blob_digests: image_storage_id = storage_id_map.get(blob_digest) if image_storage_id is None: logger.error('Missing blob for manifest `%s` in: %s', blob_digest, storage_id_map) diff --git a/workers/manifestbackfillworker.py b/workers/manifestbackfillworker.py index 5d35b5552..78dda070e 100644 --- a/workers/manifestbackfillworker.py +++ b/workers/manifestbackfillworker.py @@ -8,6 +8,7 @@ from data.database import (UseThenDisconnect, TagManifest, TagManifestToManifest db_transaction) from data.model.image import get_parent_images from data.model.tag import populate_manifest +from data.model.blob import get_repo_blob_by_digest, BlobDoesNotExist from image.docker.schema1 import (DockerSchema1Manifest, ManifestException, ManifestInterface, DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE) from workers.worker import Worker @@ -127,10 +128,28 @@ def backfill_manifest(tag_manifest): root_image = tag_manifest.tag.image repository = tag_manifest.tag.repository - storage_ids = {root_image.storage.id} + image_storage_id_map = {root_image.storage.content_checksum: root_image.storage.id} parent_images = get_parent_images(repository.namespace_user.username, repository.name, root_image) for parent_image in parent_images: - storage_ids.add(parent_image.storage.id) + image_storage_id_map[parent_image.storage.content_checksum] = parent_image.storage.id + + # Ensure that all the expected blobs have been found. If not, we lookup the blob under the repo + # and add its storage ID. If the blob is not found, we mark the manifest as broken. + storage_ids = set() + for blob_digest in manifest.blob_digests: + if blob_digest in image_storage_id_map: + storage_ids.add(image_storage_id_map[blob_digest]) + else: + logger.debug('Blob `%s` not found in images for manifest `%s`; checking repo', + blob_digest, tag_manifest.id) + try: + blob_storage = get_repo_blob_by_digest(repository.namespace_user.username, repository.name, + blob_digest) + storage_ids.add(blob_storage.id) + except BlobDoesNotExist: + logger.debug('Blob `%s` not found in repo for manifest `%s`', + blob_digest, tag_manifest.id) + is_broken = True with db_transaction(): # Re-retrieve the tag manifest to ensure it still exists and we're pointing at the correct tag. diff --git a/workers/test/test_manifestbackfillworker.py b/workers/test/test_manifestbackfillworker.py index edf4b464b..0a63325de 100644 --- a/workers/test/test_manifestbackfillworker.py +++ b/workers/test/test_manifestbackfillworker.py @@ -1,6 +1,9 @@ +from app import docker_v2_signing_key +from data import model from data.database import (TagManifestLabelMap, TagManifestToManifest, Manifest, ManifestBlob, ManifestLegacyImage, ManifestLabel, TagManifest, RepositoryTag, Image, TagManifestLabel) +from image.docker.schema1 import DockerSchema1ManifestBuilder from workers.manifestbackfillworker import backfill_manifest from test.fixtures import * @@ -68,3 +71,70 @@ def test_manifestbackfillworker_broken_manifest(clear_rows, initialized_db): legacy_image = ManifestLegacyImage.get(manifest=manifest_row).image assert broken_manifest.tag.image == legacy_image + + +def test_manifestbackfillworker_mislinked_manifest(clear_rows, initialized_db): + """ Tests that a manifest whose image is mislinked will have its storages relinked properly. """ + # Delete existing tag manifest so we can reuse the tag. + TagManifestLabel.delete().execute() + TagManifest.delete().execute() + + repo = model.repository.get_repository('devtable', 'complex') + tag_v30 = model.tag.get_active_tag('devtable', 'gargantuan', 'v3.0') + tag_v50 = model.tag.get_active_tag('devtable', 'gargantuan', 'v5.0') + + # Add a mislinked manifest, by having its layer point to a blob in v3.0 but its image + # be the v5.0 image. + builder = DockerSchema1ManifestBuilder('devtable', 'gargantuan', 'sometag') + builder.add_layer(tag_v30.image.storage.content_checksum, '{"id": "foo"}') + manifest = builder.build(docker_v2_signing_key) + + mislinked_manifest = TagManifest.create(json_data=manifest.bytes, digest=manifest.digest, + tag=tag_v50) + + # Backfill the manifest and ensure its proper content checksum was linked. + assert backfill_manifest(mislinked_manifest) + + map_row = TagManifestToManifest.get(tag_manifest=mislinked_manifest) + assert not map_row.broken + + manifest_row = map_row.manifest + legacy_image = ManifestLegacyImage.get(manifest=manifest_row).image + assert legacy_image == tag_v50.image + + manifest_blobs = list(ManifestBlob.select().where(ManifestBlob.manifest == manifest_row)) + assert len(manifest_blobs) == 1 + assert manifest_blobs[0].blob.content_checksum == tag_v30.image.storage.content_checksum + + +def test_manifestbackfillworker_mislinked_invalid_manifest(clear_rows, initialized_db): + """ Tests that a manifest whose image is mislinked will attempt to have its storages relinked + properly. """ + # Delete existing tag manifest so we can reuse the tag. + TagManifestLabel.delete().execute() + TagManifest.delete().execute() + + repo = model.repository.get_repository('devtable', 'complex') + tag_v50 = model.tag.get_active_tag('devtable', 'gargantuan', 'v5.0') + + # Add a mislinked manifest, by having its layer point to an invalid blob but its image + # be the v5.0 image. + builder = DockerSchema1ManifestBuilder('devtable', 'gargantuan', 'sometag') + builder.add_layer('sha256:deadbeef', '{"id": "foo"}') + manifest = builder.build(docker_v2_signing_key) + + broken_manifest = TagManifest.create(json_data=manifest.bytes, digest=manifest.digest, + tag=tag_v50) + + # Backfill the manifest and ensure it is marked as broken. + assert backfill_manifest(broken_manifest) + + map_row = TagManifestToManifest.get(tag_manifest=broken_manifest) + assert map_row.broken + + manifest_row = map_row.manifest + legacy_image = ManifestLegacyImage.get(manifest=manifest_row).image + assert legacy_image == tag_v50.image + + manifest_blobs = list(ManifestBlob.select().where(ManifestBlob.manifest == manifest_row)) + assert len(manifest_blobs) == 0