Fix manifest backfill for manifests pointing to V1 images

V1 images don't have checksums, so we just always use the full storage set for the manifest, rather than a checksum map
This commit is contained in:
Joseph Schorr 2018-08-13 15:51:18 -04:00
parent 73e585df49
commit 96e0fc4ad6
3 changed files with 24 additions and 25 deletions

View file

@ -605,7 +605,19 @@ def associate_generated_tag_manifest(namespace, repo_name, tag_name, manifest, s
def _create_manifest(tag, manifest, storage_id_map): def _create_manifest(tag, manifest, storage_id_map):
manifest_row = populate_manifest(tag.repository, manifest, tag.image, storage_id_map) storage_ids = set()
for blob_digest in reversed(manifest.blob_digests):
image_storage_id = storage_id_map.get(blob_digest)
if image_storage_id is None:
logger.error('Missing blob for manifest `%s` in: %s', blob_digest, storage_id_map)
raise DataModelException('Missing blob for manifest `%s`' % blob_digest)
if image_storage_id in storage_ids:
continue
storage_ids.add(image_storage_id)
manifest_row = populate_manifest(tag.repository, manifest, tag.image, storage_ids)
with db_transaction(): with db_transaction():
tag_manifest = TagManifest.create(tag=tag, digest=manifest.digest, json_data=manifest.bytes) tag_manifest = TagManifest.create(tag=tag, digest=manifest.digest, json_data=manifest.bytes)
@ -613,7 +625,7 @@ def _create_manifest(tag, manifest, storage_id_map):
return tag_manifest return tag_manifest
def populate_manifest(repository, manifest, legacy_image, storage_id_map): def populate_manifest(repository, manifest, legacy_image, storage_ids):
""" Populates the rows for the manifest, including its blobs and legacy image. """ """ Populates the rows for the manifest, including its blobs and legacy image. """
media_type = Manifest.media_type.get_id(manifest.media_type) media_type = Manifest.media_type.get_id(manifest.media_type)
with db_transaction(): with db_transaction():
@ -621,21 +633,8 @@ def populate_manifest(repository, manifest, legacy_image, storage_id_map):
manifest_bytes=manifest.bytes, media_type=media_type) manifest_bytes=manifest.bytes, media_type=media_type)
ManifestLegacyImage.create(manifest=manifest_row, repository=repository, image=legacy_image) ManifestLegacyImage.create(manifest=manifest_row, repository=repository, image=legacy_image)
blobs_to_insert = [] blobs_to_insert = [dict(manifest=manifest_row, repository=repository,
blobs_created = set() blob=storage_id) for storage_id in storage_ids]
for blob_digest in reversed(manifest.blob_digests):
image_storage_id = storage_id_map.get(blob_digest)
if image_storage_id is None:
logger.error('Missing blob for manifest `%s` in: %s', blob_digest, storage_id_map)
raise DataModelException('Missing blob for manifest `%s`' % blob_digest)
if image_storage_id in blobs_created:
continue
blobs_to_insert.append(dict(manifest=manifest_row, repository=repository,
blob=image_storage_id))
blobs_created.add(image_storage_id)
if blobs_to_insert: if blobs_to_insert:
ManifestBlob.insert_many(blobs_to_insert).execute() ManifestBlob.insert_many(blobs_to_insert).execute()

View file

@ -6,6 +6,7 @@ from peewee import JOIN, fn, IntegrityError
from app import app from app import app
from data.database import (UseThenDisconnect, TagManifest, TagManifestToManifest, Image, from data.database import (UseThenDisconnect, TagManifest, TagManifestToManifest, Image,
db_transaction) db_transaction)
from data.model.image import get_parent_images
from data.model.tag import populate_manifest from data.model.tag import populate_manifest
from image.docker.schema1 import (DockerSchema1Manifest, ManifestException, ManifestInterface, from image.docker.schema1 import (DockerSchema1Manifest, ManifestException, ManifestInterface,
DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE) DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE)
@ -124,10 +125,12 @@ def backfill_manifest(tag_manifest):
# Lookup the storages for the digests. # Lookup the storages for the digests.
root_image = tag_manifest.tag.image root_image = tag_manifest.tag.image
storage_id_map = {root_image.storage.content_checksum: root_image.storage.id} repository = tag_manifest.tag.repository
for parent_image_id in root_image.ancestor_id_list():
storage = Image.get(id=parent_image_id).storage storage_ids = {root_image.storage.id}
storage_id_map[storage.content_checksum] = storage.id parent_images = get_parent_images(repository.namespace_user.username, repository.name, root_image)
for parent_image in parent_images:
storage_ids.add(parent_image.storage.id)
with db_transaction(): with db_transaction():
# Re-retrieve the tag manifest to ensure it still exists and we're pointing at the correct tag. # Re-retrieve the tag manifest to ensure it still exists and we're pointing at the correct tag.
@ -138,7 +141,7 @@ def backfill_manifest(tag_manifest):
# Create the new-style rows for the manifest. # Create the new-style rows for the manifest.
manifest_row = populate_manifest(tag_manifest.tag.repository, manifest, manifest_row = populate_manifest(tag_manifest.tag.repository, manifest,
tag_manifest.tag.image, storage_id_map) tag_manifest.tag.image, storage_ids)
# Create the mapping row. If we find another was created for this tag manifest in the # Create the mapping row. If we find another was created for this tag manifest in the
# meantime, then we've been preempted. # meantime, then we've been preempted.
@ -150,7 +153,6 @@ def backfill_manifest(tag_manifest):
return False return False
if __name__ == "__main__": if __name__ == "__main__":
logging.config.fileConfig(logfile_path(debug=False), disable_existing_loggers=False) logging.config.fileConfig(logfile_path(debug=False), disable_existing_loggers=False)
worker = ManifestBackfillWorker() worker = ManifestBackfillWorker()

View file

@ -68,5 +68,3 @@ def test_manifestbackfillworker_broken_manifest(clear_rows, initialized_db):
legacy_image = ManifestLegacyImage.get(manifest=manifest_row).image legacy_image = ManifestLegacyImage.get(manifest=manifest_row).image
assert broken_manifest.tag.image == legacy_image assert broken_manifest.tag.image == legacy_image
assert not list(ManifestBlob.select().where(ManifestBlob.manifest == manifest_row))