diff --git a/data/database.py b/data/database.py index 866542c2b..6eb320400 100644 --- a/data/database.py +++ b/data/database.py @@ -1435,7 +1435,7 @@ class TagManifest(BaseModel): class TagManifestToManifest(BaseModel): """ NOTE: Only used for the duration of the migrations. """ tag_manifest = ForeignKeyField(TagManifest, index=True, unique=True) - manifest = ForeignKeyField(Manifest, index=True, unique=True) + manifest = ForeignKeyField(Manifest, index=True) broken = BooleanField(index=True, default=False) diff --git a/data/migrations/versions/13411de1c0ff_remove_unique_from_tagmanifesttomanifest.py b/data/migrations/versions/13411de1c0ff_remove_unique_from_tagmanifesttomanifest.py new file mode 100644 index 000000000..6aa576e31 --- /dev/null +++ b/data/migrations/versions/13411de1c0ff_remove_unique_from_tagmanifesttomanifest.py @@ -0,0 +1,43 @@ +"""Remove unique from TagManifestToManifest + +Revision ID: 13411de1c0ff +Revises: 654e6df88b71 +Create Date: 2018-08-19 23:30:24.969549 + +""" + +# revision identifiers, used by Alembic. +revision = '13411de1c0ff' +down_revision = '654e6df88b71' + +from alembic import op +import sqlalchemy as sa + +def upgrade(tables, tester): + # Note: Because of a restriction in MySQL, we cannot simply remove the index and re-add + # it without the unique=False, nor can we simply alter the index. To make it work, we'd have to + # remove the primary key on the field, so instead we simply drop the table entirely and + # recreate it with the modified index. The backfill will re-fill this in. + op.drop_table('tagmanifesttomanifest') + + op.create_table('tagmanifesttomanifest', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('tag_manifest_id', sa.Integer(), nullable=False), + sa.Column('manifest_id', sa.Integer(), nullable=False), + sa.Column('broken', sa.Boolean(), nullable=False, server_default=sa.sql.expression.false()), + sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_tagmanifesttomanifest_manifest_id_manifest')), + sa.ForeignKeyConstraint(['tag_manifest_id'], ['tagmanifest.id'], name=op.f('fk_tagmanifesttomanifest_tag_manifest_id_tagmanifest')), + sa.PrimaryKeyConstraint('id', name=op.f('pk_tagmanifesttomanifest')) + ) + op.create_index('tagmanifesttomanifest_broken', 'tagmanifesttomanifest', ['broken'], unique=False) + op.create_index('tagmanifesttomanifest_manifest_id', 'tagmanifesttomanifest', ['manifest_id'], unique=False) + op.create_index('tagmanifesttomanifest_tag_manifest_id', 'tagmanifesttomanifest', ['tag_manifest_id'], unique=True) + + tester.populate_table('tagmanifesttomanifest', [ + ('manifest_id', tester.TestDataType.Foreign('manifest')), + ('tag_manifest_id', tester.TestDataType.Foreign('tagmanifest')), + ]) + + +def downgrade(tables, tester): + pass diff --git a/data/registry_model/test/test_pre_oci_model.py b/data/registry_model/test/test_pre_oci_model.py index 3d7140475..6e81c3fff 100644 --- a/data/registry_model/test/test_pre_oci_model.py +++ b/data/registry_model/test/test_pre_oci_model.py @@ -12,9 +12,10 @@ def pre_oci_model(initialized_db): @pytest.mark.parametrize('names, expected', [ (['unknown'], None), - (['latest'], 'latest'), - (['latest', 'prod'], 'latest'), - (['foo', 'prod'], 'prod'), + (['latest'], {'latest'}), + (['latest', 'prod'], {'latest', 'prod'}), + (['latest', 'prod', 'another'], {'latest', 'prod'}), + (['foo', 'prod'], {'prod'}), ]) def test_find_matching_tag(names, expected, pre_oci_model): repo = model.repository.get_repository('devtable', 'simple') @@ -23,12 +24,12 @@ def test_find_matching_tag(names, expected, pre_oci_model): if expected is None: assert found is None else: - assert found.name == expected + assert found.name in expected @pytest.mark.parametrize('repo_namespace, repo_name, expected', [ - ('devtable', 'simple', 'latest'), - ('buynlarge', 'orgrepo', 'latest'), + ('devtable', 'simple', {'latest'}), + ('buynlarge', 'orgrepo', {'latest', 'prod'}), ]) def test_get_most_recent_tag(repo_namespace, repo_name, expected, pre_oci_model): repo = model.repository.get_repository(repo_namespace, repo_name) @@ -37,4 +38,4 @@ def test_get_most_recent_tag(repo_namespace, repo_name, expected, pre_oci_model) if expected is None: assert found is None else: - assert found.name == expected + assert found.name in expected diff --git a/workers/manifestbackfillworker.py b/workers/manifestbackfillworker.py index 3a6aab4c6..1d9be2992 100644 --- a/workers/manifestbackfillworker.py +++ b/workers/manifestbackfillworker.py @@ -5,7 +5,8 @@ from peewee import JOIN, fn, IntegrityError from app import app from data.database import (UseThenDisconnect, TagManifest, TagManifestToManifest, Image, - db_transaction) + Manifest, db_transaction) +from data.model import DataModelException from data.model.image import get_parent_images from data.model.tag import populate_manifest from data.model.blob import get_repo_blob_by_digest, BlobDoesNotExist @@ -129,7 +130,16 @@ def backfill_manifest(tag_manifest): repository = tag_manifest.tag.repository image_storage_id_map = {root_image.storage.content_checksum: root_image.storage.id} - parent_images = get_parent_images(repository.namespace_user.username, repository.name, root_image) + + try: + parent_images = get_parent_images(repository.namespace_user.username, repository.name, + root_image) + except DataModelException: + logger.exception('Exception when trying to load parent images for manifest `%s`', + tag_manifest.id) + parent_images = {} + is_broken = True + for parent_image in parent_images: image_storage_id_map[parent_image.storage.content_checksum] = parent_image.storage.id @@ -158,9 +168,22 @@ def backfill_manifest(tag_manifest): except TagManifest.DoesNotExist: return True - # Create the new-style rows for the manifest. - manifest_row = populate_manifest(tag_manifest.tag.repository, manifest, - tag_manifest.tag.image, storage_ids) + # Ensure it wasn't already created. + if lookup_map_row(tag_manifest): + return False + + # Check for a pre-existing manifest matching the digest in the repository. This can happen + # if we've already created the manifest row (typically for tag reverision). + try: + manifest_row = Manifest.get(digest=manifest.digest, repository=tag_manifest.tag.repository) + except Manifest.DoesNotExist: + # Create the new-style rows for the manifest. + try: + manifest_row = populate_manifest(tag_manifest.tag.repository, manifest, + tag_manifest.tag.image, storage_ids) + except IntegrityError: + # Pre-empted. + return False # Create the mapping row. If we find another was created for this tag manifest in the # meantime, then we've been preempted. diff --git a/workers/test/test_manifestbackfillworker.py b/workers/test/test_manifestbackfillworker.py index 0a63325de..4d8a9182e 100644 --- a/workers/test/test_manifestbackfillworker.py +++ b/workers/test/test_manifestbackfillworker.py @@ -138,3 +138,33 @@ def test_manifestbackfillworker_mislinked_invalid_manifest(clear_rows, initializ manifest_blobs = list(ManifestBlob.select().where(ManifestBlob.manifest == manifest_row)) assert len(manifest_blobs) == 0 + + +def test_manifestbackfillworker_repeat_digest(clear_rows, initialized_db): + """ Tests that a manifest with a shared digest will be properly linked. """ + # Delete existing tag manifest so we can reuse the tag. + TagManifestLabel.delete().execute() + TagManifest.delete().execute() + + repo = model.repository.get_repository('devtable', 'gargantuan') + tag_v30 = model.tag.get_active_tag('devtable', 'gargantuan', 'v3.0') + tag_v50 = model.tag.get_active_tag('devtable', 'gargantuan', 'v5.0') + + # Build a manifest and assign it to both tags (this is allowed in the old model). + builder = DockerSchema1ManifestBuilder('devtable', 'gargantuan', 'sometag') + builder.add_layer('sha256:deadbeef', '{"id": "foo"}') + manifest = builder.build(docker_v2_signing_key) + + manifest_1 = TagManifest.create(json_data=manifest.bytes, digest=manifest.digest, + tag=tag_v30) + manifest_2 = TagManifest.create(json_data=manifest.bytes, digest=manifest.digest, + tag=tag_v50) + + # Backfill "both" manifests and ensure both are pointed to by a single resulting row. + assert backfill_manifest(manifest_1) + assert backfill_manifest(manifest_2) + + map_row1 = TagManifestToManifest.get(tag_manifest=manifest_1) + map_row2 = TagManifestToManifest.get(tag_manifest=manifest_2) + + assert map_row1.manifest == map_row2.manifest