From 96e0fc4ad6f153f8b7ef1c647a80c2b0ce66303b Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Mon, 13 Aug 2018 15:51:18 -0400 Subject: [PATCH] Fix manifest backfill for manifests pointing to V1 images V1 images don't have checksums, so we just always use the full storage set for the manifest, rather than a checksum map --- data/model/tag.py | 33 ++++++++++----------- workers/manifestbackfillworker.py | 14 +++++---- workers/test/test_manifestbackfillworker.py | 2 -- 3 files changed, 24 insertions(+), 25 deletions(-) diff --git a/data/model/tag.py b/data/model/tag.py index ef49cf51f..538445c23 100644 --- a/data/model/tag.py +++ b/data/model/tag.py @@ -605,7 +605,19 @@ def associate_generated_tag_manifest(namespace, repo_name, tag_name, manifest, s def _create_manifest(tag, manifest, storage_id_map): - manifest_row = populate_manifest(tag.repository, manifest, tag.image, storage_id_map) + storage_ids = set() + for blob_digest in reversed(manifest.blob_digests): + image_storage_id = storage_id_map.get(blob_digest) + if image_storage_id is None: + logger.error('Missing blob for manifest `%s` in: %s', blob_digest, storage_id_map) + raise DataModelException('Missing blob for manifest `%s`' % blob_digest) + + if image_storage_id in storage_ids: + continue + + storage_ids.add(image_storage_id) + + manifest_row = populate_manifest(tag.repository, manifest, tag.image, storage_ids) with db_transaction(): tag_manifest = TagManifest.create(tag=tag, digest=manifest.digest, json_data=manifest.bytes) @@ -613,7 +625,7 @@ def _create_manifest(tag, manifest, storage_id_map): return tag_manifest -def populate_manifest(repository, manifest, legacy_image, storage_id_map): +def populate_manifest(repository, manifest, legacy_image, storage_ids): """ Populates the rows for the manifest, including its blobs and legacy image. """ media_type = Manifest.media_type.get_id(manifest.media_type) with db_transaction(): @@ -621,21 +633,8 @@ def populate_manifest(repository, manifest, legacy_image, storage_id_map): manifest_bytes=manifest.bytes, media_type=media_type) ManifestLegacyImage.create(manifest=manifest_row, repository=repository, image=legacy_image) - blobs_to_insert = [] - blobs_created = set() - for blob_digest in reversed(manifest.blob_digests): - image_storage_id = storage_id_map.get(blob_digest) - if image_storage_id is None: - logger.error('Missing blob for manifest `%s` in: %s', blob_digest, storage_id_map) - raise DataModelException('Missing blob for manifest `%s`' % blob_digest) - - if image_storage_id in blobs_created: - continue - - blobs_to_insert.append(dict(manifest=manifest_row, repository=repository, - blob=image_storage_id)) - blobs_created.add(image_storage_id) - + blobs_to_insert = [dict(manifest=manifest_row, repository=repository, + blob=storage_id) for storage_id in storage_ids] if blobs_to_insert: ManifestBlob.insert_many(blobs_to_insert).execute() diff --git a/workers/manifestbackfillworker.py b/workers/manifestbackfillworker.py index 2fe3db8a6..5d35b5552 100644 --- a/workers/manifestbackfillworker.py +++ b/workers/manifestbackfillworker.py @@ -6,6 +6,7 @@ from peewee import JOIN, fn, IntegrityError from app import app from data.database import (UseThenDisconnect, TagManifest, TagManifestToManifest, Image, db_transaction) +from data.model.image import get_parent_images from data.model.tag import populate_manifest from image.docker.schema1 import (DockerSchema1Manifest, ManifestException, ManifestInterface, DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE) @@ -124,10 +125,12 @@ def backfill_manifest(tag_manifest): # Lookup the storages for the digests. root_image = tag_manifest.tag.image - storage_id_map = {root_image.storage.content_checksum: root_image.storage.id} - for parent_image_id in root_image.ancestor_id_list(): - storage = Image.get(id=parent_image_id).storage - storage_id_map[storage.content_checksum] = storage.id + repository = tag_manifest.tag.repository + + storage_ids = {root_image.storage.id} + parent_images = get_parent_images(repository.namespace_user.username, repository.name, root_image) + for parent_image in parent_images: + storage_ids.add(parent_image.storage.id) with db_transaction(): # Re-retrieve the tag manifest to ensure it still exists and we're pointing at the correct tag. @@ -138,7 +141,7 @@ def backfill_manifest(tag_manifest): # Create the new-style rows for the manifest. manifest_row = populate_manifest(tag_manifest.tag.repository, manifest, - tag_manifest.tag.image, storage_id_map) + tag_manifest.tag.image, storage_ids) # Create the mapping row. If we find another was created for this tag manifest in the # meantime, then we've been preempted. @@ -150,7 +153,6 @@ def backfill_manifest(tag_manifest): return False - if __name__ == "__main__": logging.config.fileConfig(logfile_path(debug=False), disable_existing_loggers=False) worker = ManifestBackfillWorker() diff --git a/workers/test/test_manifestbackfillworker.py b/workers/test/test_manifestbackfillworker.py index cbc678af9..edf4b464b 100644 --- a/workers/test/test_manifestbackfillworker.py +++ b/workers/test/test_manifestbackfillworker.py @@ -68,5 +68,3 @@ def test_manifestbackfillworker_broken_manifest(clear_rows, initialized_db): legacy_image = ManifestLegacyImage.get(manifest=manifest_row).image assert broken_manifest.tag.image == legacy_image - - assert not list(ManifestBlob.select().where(ManifestBlob.manifest == manifest_row))