Merge pull request #3205 from quay/backfill-storages-from-manifest

Change manifest backfill to lookup missing content checksums in the repository
This commit is contained in:
Joseph Schorr 2018-08-14 16:42:25 -04:00 committed by GitHub
commit a66a4d6a10
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 92 additions and 3 deletions

View file

@ -606,7 +606,7 @@ def associate_generated_tag_manifest(namespace, repo_name, tag_name, manifest, s
def _create_manifest(tag, manifest, storage_id_map): def _create_manifest(tag, manifest, storage_id_map):
storage_ids = set() storage_ids = set()
for blob_digest in reversed(manifest.blob_digests): for blob_digest in manifest.blob_digests:
image_storage_id = storage_id_map.get(blob_digest) image_storage_id = storage_id_map.get(blob_digest)
if image_storage_id is None: if image_storage_id is None:
logger.error('Missing blob for manifest `%s` in: %s', blob_digest, storage_id_map) logger.error('Missing blob for manifest `%s` in: %s', blob_digest, storage_id_map)

View file

@ -8,6 +8,7 @@ from data.database import (UseThenDisconnect, TagManifest, TagManifestToManifest
db_transaction) db_transaction)
from data.model.image import get_parent_images from data.model.image import get_parent_images
from data.model.tag import populate_manifest from data.model.tag import populate_manifest
from data.model.blob import get_repo_blob_by_digest, BlobDoesNotExist
from image.docker.schema1 import (DockerSchema1Manifest, ManifestException, ManifestInterface, from image.docker.schema1 import (DockerSchema1Manifest, ManifestException, ManifestInterface,
DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE) DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE)
from workers.worker import Worker from workers.worker import Worker
@ -127,10 +128,28 @@ def backfill_manifest(tag_manifest):
root_image = tag_manifest.tag.image root_image = tag_manifest.tag.image
repository = tag_manifest.tag.repository repository = tag_manifest.tag.repository
storage_ids = {root_image.storage.id} image_storage_id_map = {root_image.storage.content_checksum: root_image.storage.id}
parent_images = get_parent_images(repository.namespace_user.username, repository.name, root_image) parent_images = get_parent_images(repository.namespace_user.username, repository.name, root_image)
for parent_image in parent_images: for parent_image in parent_images:
storage_ids.add(parent_image.storage.id) image_storage_id_map[parent_image.storage.content_checksum] = parent_image.storage.id
# Ensure that all the expected blobs have been found. If not, we lookup the blob under the repo
# and add its storage ID. If the blob is not found, we mark the manifest as broken.
storage_ids = set()
for blob_digest in manifest.blob_digests:
if blob_digest in image_storage_id_map:
storage_ids.add(image_storage_id_map[blob_digest])
else:
logger.debug('Blob `%s` not found in images for manifest `%s`; checking repo',
blob_digest, tag_manifest.id)
try:
blob_storage = get_repo_blob_by_digest(repository.namespace_user.username, repository.name,
blob_digest)
storage_ids.add(blob_storage.id)
except BlobDoesNotExist:
logger.debug('Blob `%s` not found in repo for manifest `%s`',
blob_digest, tag_manifest.id)
is_broken = True
with db_transaction(): with db_transaction():
# Re-retrieve the tag manifest to ensure it still exists and we're pointing at the correct tag. # Re-retrieve the tag manifest to ensure it still exists and we're pointing at the correct tag.

View file

@ -1,6 +1,9 @@
from app import docker_v2_signing_key
from data import model
from data.database import (TagManifestLabelMap, TagManifestToManifest, Manifest, ManifestBlob, from data.database import (TagManifestLabelMap, TagManifestToManifest, Manifest, ManifestBlob,
ManifestLegacyImage, ManifestLabel, TagManifest, RepositoryTag, Image, ManifestLegacyImage, ManifestLabel, TagManifest, RepositoryTag, Image,
TagManifestLabel) TagManifestLabel)
from image.docker.schema1 import DockerSchema1ManifestBuilder
from workers.manifestbackfillworker import backfill_manifest from workers.manifestbackfillworker import backfill_manifest
from test.fixtures import * from test.fixtures import *
@ -68,3 +71,70 @@ def test_manifestbackfillworker_broken_manifest(clear_rows, initialized_db):
legacy_image = ManifestLegacyImage.get(manifest=manifest_row).image legacy_image = ManifestLegacyImage.get(manifest=manifest_row).image
assert broken_manifest.tag.image == legacy_image assert broken_manifest.tag.image == legacy_image
def test_manifestbackfillworker_mislinked_manifest(clear_rows, initialized_db):
""" Tests that a manifest whose image is mislinked will have its storages relinked properly. """
# Delete existing tag manifest so we can reuse the tag.
TagManifestLabel.delete().execute()
TagManifest.delete().execute()
repo = model.repository.get_repository('devtable', 'complex')
tag_v30 = model.tag.get_active_tag('devtable', 'gargantuan', 'v3.0')
tag_v50 = model.tag.get_active_tag('devtable', 'gargantuan', 'v5.0')
# Add a mislinked manifest, by having its layer point to a blob in v3.0 but its image
# be the v5.0 image.
builder = DockerSchema1ManifestBuilder('devtable', 'gargantuan', 'sometag')
builder.add_layer(tag_v30.image.storage.content_checksum, '{"id": "foo"}')
manifest = builder.build(docker_v2_signing_key)
mislinked_manifest = TagManifest.create(json_data=manifest.bytes, digest=manifest.digest,
tag=tag_v50)
# Backfill the manifest and ensure its proper content checksum was linked.
assert backfill_manifest(mislinked_manifest)
map_row = TagManifestToManifest.get(tag_manifest=mislinked_manifest)
assert not map_row.broken
manifest_row = map_row.manifest
legacy_image = ManifestLegacyImage.get(manifest=manifest_row).image
assert legacy_image == tag_v50.image
manifest_blobs = list(ManifestBlob.select().where(ManifestBlob.manifest == manifest_row))
assert len(manifest_blobs) == 1
assert manifest_blobs[0].blob.content_checksum == tag_v30.image.storage.content_checksum
def test_manifestbackfillworker_mislinked_invalid_manifest(clear_rows, initialized_db):
""" Tests that a manifest whose image is mislinked will attempt to have its storages relinked
properly. """
# Delete existing tag manifest so we can reuse the tag.
TagManifestLabel.delete().execute()
TagManifest.delete().execute()
repo = model.repository.get_repository('devtable', 'complex')
tag_v50 = model.tag.get_active_tag('devtable', 'gargantuan', 'v5.0')
# Add a mislinked manifest, by having its layer point to an invalid blob but its image
# be the v5.0 image.
builder = DockerSchema1ManifestBuilder('devtable', 'gargantuan', 'sometag')
builder.add_layer('sha256:deadbeef', '{"id": "foo"}')
manifest = builder.build(docker_v2_signing_key)
broken_manifest = TagManifest.create(json_data=manifest.bytes, digest=manifest.digest,
tag=tag_v50)
# Backfill the manifest and ensure it is marked as broken.
assert backfill_manifest(broken_manifest)
map_row = TagManifestToManifest.get(tag_manifest=broken_manifest)
assert map_row.broken
manifest_row = map_row.manifest
legacy_image = ManifestLegacyImage.get(manifest=manifest_row).image
assert legacy_image == tag_v50.image
manifest_blobs = list(ManifestBlob.select().where(ManifestBlob.manifest == manifest_row))
assert len(manifest_blobs) == 0