From e30b746aeffee4968105a317f37b2f00b914797b Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Sun, 19 Aug 2018 23:50:18 -0400 Subject: [PATCH] Fix TagManifests with shared digests under the same repository. TagManifests can (apparently, in very rare scenarios) share manifests with the exact same digests, so we need to support that case in the backfill worker. We also need to remove a unique constraint on the manifest column in the mapping table to support this case. --- data/database.py | 2 +- ...emove_unique_from_tagmanifesttomanifest.py | 43 +++++++++++++++++++ workers/manifestbackfillworker.py | 21 +++++++-- workers/test/test_manifestbackfillworker.py | 30 +++++++++++++ 4 files changed, 91 insertions(+), 5 deletions(-) create mode 100644 data/migrations/versions/13411de1c0ff_remove_unique_from_tagmanifesttomanifest.py diff --git a/data/database.py b/data/database.py index 866542c2b..6eb320400 100644 --- a/data/database.py +++ b/data/database.py @@ -1435,7 +1435,7 @@ class TagManifest(BaseModel): class TagManifestToManifest(BaseModel): """ NOTE: Only used for the duration of the migrations. """ tag_manifest = ForeignKeyField(TagManifest, index=True, unique=True) - manifest = ForeignKeyField(Manifest, index=True, unique=True) + manifest = ForeignKeyField(Manifest, index=True) broken = BooleanField(index=True, default=False) diff --git a/data/migrations/versions/13411de1c0ff_remove_unique_from_tagmanifesttomanifest.py b/data/migrations/versions/13411de1c0ff_remove_unique_from_tagmanifesttomanifest.py new file mode 100644 index 000000000..6aa576e31 --- /dev/null +++ b/data/migrations/versions/13411de1c0ff_remove_unique_from_tagmanifesttomanifest.py @@ -0,0 +1,43 @@ +"""Remove unique from TagManifestToManifest + +Revision ID: 13411de1c0ff +Revises: 654e6df88b71 +Create Date: 2018-08-19 23:30:24.969549 + +""" + +# revision identifiers, used by Alembic. +revision = '13411de1c0ff' +down_revision = '654e6df88b71' + +from alembic import op +import sqlalchemy as sa + +def upgrade(tables, tester): + # Note: Because of a restriction in MySQL, we cannot simply remove the index and re-add + # it without the unique=False, nor can we simply alter the index. To make it work, we'd have to + # remove the primary key on the field, so instead we simply drop the table entirely and + # recreate it with the modified index. The backfill will re-fill this in. + op.drop_table('tagmanifesttomanifest') + + op.create_table('tagmanifesttomanifest', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('tag_manifest_id', sa.Integer(), nullable=False), + sa.Column('manifest_id', sa.Integer(), nullable=False), + sa.Column('broken', sa.Boolean(), nullable=False, server_default=sa.sql.expression.false()), + sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_tagmanifesttomanifest_manifest_id_manifest')), + sa.ForeignKeyConstraint(['tag_manifest_id'], ['tagmanifest.id'], name=op.f('fk_tagmanifesttomanifest_tag_manifest_id_tagmanifest')), + sa.PrimaryKeyConstraint('id', name=op.f('pk_tagmanifesttomanifest')) + ) + op.create_index('tagmanifesttomanifest_broken', 'tagmanifesttomanifest', ['broken'], unique=False) + op.create_index('tagmanifesttomanifest_manifest_id', 'tagmanifesttomanifest', ['manifest_id'], unique=False) + op.create_index('tagmanifesttomanifest_tag_manifest_id', 'tagmanifesttomanifest', ['tag_manifest_id'], unique=True) + + tester.populate_table('tagmanifesttomanifest', [ + ('manifest_id', tester.TestDataType.Foreign('manifest')), + ('tag_manifest_id', tester.TestDataType.Foreign('tagmanifest')), + ]) + + +def downgrade(tables, tester): + pass diff --git a/workers/manifestbackfillworker.py b/workers/manifestbackfillworker.py index 3a6aab4c6..19139cd38 100644 --- a/workers/manifestbackfillworker.py +++ b/workers/manifestbackfillworker.py @@ -5,7 +5,7 @@ from peewee import JOIN, fn, IntegrityError from app import app from data.database import (UseThenDisconnect, TagManifest, TagManifestToManifest, Image, - db_transaction) + Manifest, db_transaction) from data.model.image import get_parent_images from data.model.tag import populate_manifest from data.model.blob import get_repo_blob_by_digest, BlobDoesNotExist @@ -158,9 +158,22 @@ def backfill_manifest(tag_manifest): except TagManifest.DoesNotExist: return True - # Create the new-style rows for the manifest. - manifest_row = populate_manifest(tag_manifest.tag.repository, manifest, - tag_manifest.tag.image, storage_ids) + # Ensure it wasn't already created. + if lookup_map_row(tag_manifest): + return False + + # Check for a pre-existing manifest matching the digest in the repository. This can happen + # if we've already created the manifest row (typically for tag reverision). + try: + manifest_row = Manifest.get(digest=manifest.digest, repository=tag_manifest.tag.repository) + except Manifest.DoesNotExist: + # Create the new-style rows for the manifest. + try: + manifest_row = populate_manifest(tag_manifest.tag.repository, manifest, + tag_manifest.tag.image, storage_ids) + except IntegrityError: + # Pre-empted. + return False # Create the mapping row. If we find another was created for this tag manifest in the # meantime, then we've been preempted. diff --git a/workers/test/test_manifestbackfillworker.py b/workers/test/test_manifestbackfillworker.py index 0a63325de..4d8a9182e 100644 --- a/workers/test/test_manifestbackfillworker.py +++ b/workers/test/test_manifestbackfillworker.py @@ -138,3 +138,33 @@ def test_manifestbackfillworker_mislinked_invalid_manifest(clear_rows, initializ manifest_blobs = list(ManifestBlob.select().where(ManifestBlob.manifest == manifest_row)) assert len(manifest_blobs) == 0 + + +def test_manifestbackfillworker_repeat_digest(clear_rows, initialized_db): + """ Tests that a manifest with a shared digest will be properly linked. """ + # Delete existing tag manifest so we can reuse the tag. + TagManifestLabel.delete().execute() + TagManifest.delete().execute() + + repo = model.repository.get_repository('devtable', 'gargantuan') + tag_v30 = model.tag.get_active_tag('devtable', 'gargantuan', 'v3.0') + tag_v50 = model.tag.get_active_tag('devtable', 'gargantuan', 'v5.0') + + # Build a manifest and assign it to both tags (this is allowed in the old model). + builder = DockerSchema1ManifestBuilder('devtable', 'gargantuan', 'sometag') + builder.add_layer('sha256:deadbeef', '{"id": "foo"}') + manifest = builder.build(docker_v2_signing_key) + + manifest_1 = TagManifest.create(json_data=manifest.bytes, digest=manifest.digest, + tag=tag_v30) + manifest_2 = TagManifest.create(json_data=manifest.bytes, digest=manifest.digest, + tag=tag_v50) + + # Backfill "both" manifests and ensure both are pointed to by a single resulting row. + assert backfill_manifest(manifest_1) + assert backfill_manifest(manifest_2) + + map_row1 = TagManifestToManifest.get(tag_manifest=manifest_1) + map_row2 = TagManifestToManifest.get(tag_manifest=manifest_2) + + assert map_row1.manifest == map_row2.manifest