From 89582438cdc956f6bcbea56620601ee61c432244 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Mon, 6 Aug 2018 16:58:27 -0400 Subject: [PATCH] Fix the V22 phase 1 migrations to use new tables for mapping rather than editing existing tables The ALTER TABLE operations previously used were causing the DB to die when run on the production TagManifest table which has 7 million rows. We instead now use new mapping tables, which is less nice, but these are temporary anyway, so hopefully we only have to deal with their ugliness for a short duration. --- data/database.py | 33 +++++--- ...784_add_v2_2_data_models_for_manifest_.py} | 78 ++++++++++++------- data/model/label.py | 26 +++++-- data/model/tag.py | 28 +++++-- data/model/test/test_gc.py | 4 +- data/model/test/test_tag.py | 14 ++-- endpoints/api/test/test_repository.py | 2 + 7 files changed, 128 insertions(+), 57 deletions(-) rename data/migrations/versions/{7734c7584421_add_v2_2_data_models_for_manifest_.py => 9093adccc784_add_v2_2_data_models_for_manifest_.py} (64%) diff --git a/data/database.py b/data/database.py index 4a12e05c2..efd015efd 100644 --- a/data/database.py +++ b/data/database.py @@ -504,7 +504,7 @@ class User(BaseModel): RepositoryNotification, OAuthAuthorizationCode, RepositoryActionCount, TagManifestLabel, TeamSync, RepositorySearchScore, - DeletedNamespace} | appr_classes | v22_classes + DeletedNamespace} | appr_classes | v22_classes | transition_classes delete_instance_filtered(self, User, delete_nullable, skip_transitive_deletes) @@ -652,7 +652,7 @@ class Repository(BaseModel): # are cleaned up directly skip_transitive_deletes = {RepositoryTag, RepositoryBuild, RepositoryBuildTrigger, BlobUpload, Image, TagManifest, TagManifestLabel, Label, DerivedStorageForImage, - RepositorySearchScore} | appr_classes | v22_classes + RepositorySearchScore} | appr_classes | v22_classes | transition_classes delete_instance_filtered(self, Repository, delete_nullable, skip_transitive_deletes) @@ -1433,10 +1433,13 @@ class TagManifest(BaseModel): digest = CharField(index=True) json_data = TextField() - # Note: `manifest` will be back-filled by a worker and may not be present - # currently. - manifest = ForeignKeyField(Manifest, null=True, index=True) - broken = BooleanField(null=True, index=True) + +class TagManifestToManifest(BaseModel): + """ NOTE: Only used for the duration of the migrations. """ + tag_manifest = ForeignKeyField(TagManifest, index=True, unique=True) + manifest = ForeignKeyField(Manifest, index=True, unique=True) + broken = BooleanField(index=True, default=False) + class TagManifestLabel(BaseModel): """ TO BE DEPRECATED: Mapping from a tag manifest to a label. @@ -1445,11 +1448,6 @@ class TagManifestLabel(BaseModel): annotated = ForeignKeyField(TagManifest, index=True) label = ForeignKeyField(Label) - # Note: `manifest_label` will be back-filled by a worker and may not be present - # currently. - manifest_label = ForeignKeyField(ManifestLabel, null=True, index=True) - broken_manifest = BooleanField(null=True, index=True) - class Meta: database = db read_slaves = (read_slave,) @@ -1458,11 +1456,24 @@ class TagManifestLabel(BaseModel): ) +class TagManifestLabelMap(BaseModel): + """ NOTE: Only used for the duration of the migrations. """ + tag_manifest = ForeignKeyField(TagManifest, index=True) + manifest = ForeignKeyField(Manifest, null=True, index=True) + + label = ForeignKeyField(Label, index=True) + + tag_manifest_label = ForeignKeyField(TagManifestLabel, index=True) + manifest_label = ForeignKeyField(ManifestLabel, null=True, index=True) + + broken_manifest = BooleanField(index=True, default=False) + appr_classes = set([ApprTag, ApprTagKind, ApprBlobPlacementLocation, ApprManifestList, ApprManifestBlob, ApprBlob, ApprManifestListManifest, ApprManifest, ApprBlobPlacement]) v22_classes = set([Manifest, ManifestLabel, ManifestBlob, ManifestLegacyImage]) +transition_classes = set([TagManifestToManifest, TagManifestLabelMap]) is_model = lambda x: inspect.isclass(x) and issubclass(x, BaseModel) and x is not BaseModel all_models = [model[1] for model in inspect.getmembers(sys.modules[__name__], is_model)] diff --git a/data/migrations/versions/7734c7584421_add_v2_2_data_models_for_manifest_.py b/data/migrations/versions/9093adccc784_add_v2_2_data_models_for_manifest_.py similarity index 64% rename from data/migrations/versions/7734c7584421_add_v2_2_data_models_for_manifest_.py rename to data/migrations/versions/9093adccc784_add_v2_2_data_models_for_manifest_.py index 6cbbbe076..63bd98631 100644 --- a/data/migrations/versions/7734c7584421_add_v2_2_data_models_for_manifest_.py +++ b/data/migrations/versions/9093adccc784_add_v2_2_data_models_for_manifest_.py @@ -1,13 +1,13 @@ """Add V2_2 data models for Manifest, ManifestBlob and ManifestLegacyImage -Revision ID: 7734c7584421 +Revision ID: 9093adccc784 Revises: 6c21e2cfb8b6 -Create Date: 2018-07-31 13:26:02.850353 +Create Date: 2018-08-06 16:07:50.222749 """ # revision identifiers, used by Alembic. -revision = '7734c7584421' +revision = '9093adccc784' down_revision = '6c21e2cfb8b6' from alembic import op @@ -75,17 +75,39 @@ def upgrade(tables, tester): op.create_index('manifestlegacyimage_image_id', 'manifestlegacyimage', ['image_id'], unique=False) op.create_index('manifestlegacyimage_manifest_id', 'manifestlegacyimage', ['manifest_id'], unique=True) op.create_index('manifestlegacyimage_repository_id', 'manifestlegacyimage', ['repository_id'], unique=False) - - op.add_column(u'tagmanifest', sa.Column('broken', sa.Boolean(), nullable=True)) - op.add_column(u'tagmanifest', sa.Column('manifest_id', sa.Integer(), nullable=True)) - op.create_index('tagmanifest_broken', 'tagmanifest', ['broken'], unique=False) - op.create_index('tagmanifest_manifest_id', 'tagmanifest', ['manifest_id'], unique=False) - op.create_foreign_key(op.f('fk_tagmanifest_manifest_id_manifest'), 'tagmanifest', 'manifest', ['manifest_id'], ['id']) - op.add_column(u'tagmanifestlabel', sa.Column('broken_manifest', sa.Boolean(), nullable=True)) - op.add_column(u'tagmanifestlabel', sa.Column('manifest_label_id', sa.Integer(), nullable=True)) - op.create_index('tagmanifestlabel_broken_manifest', 'tagmanifestlabel', ['broken_manifest'], unique=False) - op.create_index('tagmanifestlabel_manifest_label_id', 'tagmanifestlabel', ['manifest_label_id'], unique=False) - op.create_foreign_key(op.f('fk_tagmanifestlabel_manifest_label_id_manifestlabel'), 'tagmanifestlabel', 'manifestlabel', ['manifest_label_id'], ['id']) + op.create_table('tagmanifesttomanifest', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('tag_manifest_id', sa.Integer(), nullable=False), + sa.Column('manifest_id', sa.Integer(), nullable=False), + sa.Column('broken', sa.Boolean(), nullable=False, server_default=sa.sql.expression.false()), + sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_tagmanifesttomanifest_manifest_id_manifest')), + sa.ForeignKeyConstraint(['tag_manifest_id'], ['tagmanifest.id'], name=op.f('fk_tagmanifesttomanifest_tag_manifest_id_tagmanifest')), + sa.PrimaryKeyConstraint('id', name=op.f('pk_tagmanifesttomanifest')) + ) + op.create_index('tagmanifesttomanifest_broken', 'tagmanifesttomanifest', ['broken'], unique=False) + op.create_index('tagmanifesttomanifest_manifest_id', 'tagmanifesttomanifest', ['manifest_id'], unique=True) + op.create_index('tagmanifesttomanifest_tag_manifest_id', 'tagmanifesttomanifest', ['tag_manifest_id'], unique=True) + op.create_table('tagmanifestlabelmap', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('tag_manifest_id', sa.Integer(), nullable=False), + sa.Column('manifest_id', sa.Integer(), nullable=True), + sa.Column('label_id', sa.Integer(), nullable=False), + sa.Column('tag_manifest_label_id', sa.Integer(), nullable=False), + sa.Column('manifest_label_id', sa.Integer(), nullable=True), + sa.Column('broken_manifest', sa.Boolean(), nullable=False, server_default=sa.sql.expression.false()), + sa.ForeignKeyConstraint(['label_id'], ['label.id'], name=op.f('fk_tagmanifestlabelmap_label_id_label')), + sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_tagmanifestlabelmap_manifest_id_manifest')), + sa.ForeignKeyConstraint(['manifest_label_id'], ['manifestlabel.id'], name=op.f('fk_tagmanifestlabelmap_manifest_label_id_manifestlabel')), + sa.ForeignKeyConstraint(['tag_manifest_id'], ['tagmanifest.id'], name=op.f('fk_tagmanifestlabelmap_tag_manifest_id_tagmanifest')), + sa.ForeignKeyConstraint(['tag_manifest_label_id'], ['tagmanifestlabel.id'], name=op.f('fk_tagmanifestlabelmap_tag_manifest_label_id_tagmanifestlabel')), + sa.PrimaryKeyConstraint('id', name=op.f('pk_tagmanifestlabelmap')) + ) + op.create_index('tagmanifestlabelmap_broken_manifest', 'tagmanifestlabelmap', ['broken_manifest'], unique=False) + op.create_index('tagmanifestlabelmap_label_id', 'tagmanifestlabelmap', ['label_id'], unique=False) + op.create_index('tagmanifestlabelmap_manifest_id', 'tagmanifestlabelmap', ['manifest_id'], unique=False) + op.create_index('tagmanifestlabelmap_manifest_label_id', 'tagmanifestlabelmap', ['manifest_label_id'], unique=False) + op.create_index('tagmanifestlabelmap_tag_manifest_id', 'tagmanifestlabelmap', ['tag_manifest_id'], unique=False) + op.create_index('tagmanifestlabelmap_tag_manifest_label_id', 'tagmanifestlabelmap', ['tag_manifest_label_id'], unique=False) # ### end Alembic commands ### for media_type in DOCKER_SCHEMA1_CONTENT_TYPES: @@ -121,8 +143,19 @@ def upgrade(tables, tester): ('repository_id', tester.TestDataType.Foreign('repository')), ]) - tester.populate_column('tagmanifest', 'manifest_id', tester.TestDataType.Foreign('manifest')) - tester.populate_column('tagmanifestlabel', 'manifest_label_id', tester.TestDataType.Foreign('manifestlabel')) + tester.populate_table('tagmanifesttomanifest', [ + ('manifest_id', tester.TestDataType.Foreign('manifest')), + ('tag_manifest_id', tester.TestDataType.Foreign('tagmanifest')), + ]) + + tester.populate_table('tagmanifestlabelmap', [ + ('manifest_id', tester.TestDataType.Foreign('manifest')), + ('tag_manifest_id', tester.TestDataType.Foreign('tagmanifest')), + ('tag_manifest_label_id', tester.TestDataType.Foreign('tagmanifestlabel')), + ('manifest_label_id', tester.TestDataType.Foreign('manifestlabel')), + ('label_id', tester.TestDataType.Foreign('label')), + ]) + # ### end population of test data ### # @@ -135,17 +168,8 @@ def downgrade(tables, tester): mediatype.c.name == op.inline_literal(media_type))) # ### commands auto generated by Alembic - please adjust! ### - op.drop_constraint(op.f('fk_tagmanifestlabel_manifest_label_id_manifestlabel'), 'tagmanifestlabel', type_='foreignkey') - op.drop_index('tagmanifestlabel_manifest_label_id', table_name='tagmanifestlabel') - op.drop_index('tagmanifestlabel_broken_manifest', table_name='tagmanifestlabel') - op.drop_column(u'tagmanifestlabel', 'manifest_label_id') - op.drop_column(u'tagmanifestlabel', 'broken_manifest') - op.drop_constraint(op.f('fk_tagmanifest_manifest_id_manifest'), 'tagmanifest', type_='foreignkey') - op.drop_index('tagmanifest_manifest_id', table_name='tagmanifest') - op.drop_index('tagmanifest_broken', table_name='tagmanifest') - op.drop_column(u'tagmanifest', 'manifest_id') - op.drop_column(u'tagmanifest', 'broken') - + op.drop_table('tagmanifestlabelmap') + op.drop_table('tagmanifesttomanifest') op.drop_table('manifestlegacyimage') op.drop_table('manifestlabel') op.drop_table('manifestblob') diff --git a/data/model/label.py b/data/model/label.py index e2fe6ee21..1592efaa6 100644 --- a/data/model/label.py +++ b/data/model/label.py @@ -3,7 +3,7 @@ import logging from cachetools import lru_cache from data.database import (Label, TagManifestLabel, MediaType, LabelSourceType, db_transaction, - ManifestLabel) + ManifestLabel, TagManifestLabelMap, TagManifestToManifest) from data.model import InvalidLabelKeyException, InvalidMediaTypeException, DataModelException from data.text import prefix_search from util.validation import validate_label_key @@ -69,11 +69,20 @@ def create_manifest_label(tag_manifest, key, value, source_type_name, media_type with db_transaction(): label = Label.create(key=key, value=value, source_type=source_type_id, media_type=media_type_id) - TagManifestLabel.create(annotated=tag_manifest, label=label, - repository=tag_manifest.tag.repository) - if tag_manifest.manifest is not None: - ManifestLabel.create(manifest=tag_manifest.manifest, label=label, - repository=tag_manifest.tag.repository) + tag_manifest_label = TagManifestLabel.create(annotated=tag_manifest, label=label, + repository=tag_manifest.tag.repository) + try: + mapping_row = TagManifestToManifest.get(tag_manifest=tag_manifest) + if mapping_row.manifest: + manifest_label = ManifestLabel.create(manifest=mapping_row.manifest, label=label, + repository=tag_manifest.tag.repository) + TagManifestLabelMap.create(manifest_label=manifest_label, + tag_manifest_label=tag_manifest_label, + label=label, + manifest=mapping_row.manifest, + tag_manifest=tag_manifest) + except TagManifestToManifest.DoesNotExist: + pass return label @@ -120,6 +129,11 @@ def delete_manifest_label(label_uuid, tag_manifest): raise DataModelException('Cannot delete immutable label') # Delete the mapping records and label. + (TagManifestLabelMap + .delete() + .where(TagManifestLabelMap.label == label) + .execute()) + deleted_count = TagManifestLabel.delete().where(TagManifestLabel.label == label).execute() if deleted_count != 1: logger.warning('More than a single label deleted for matching label %s', label_uuid) diff --git a/data/model/tag.py b/data/model/tag.py index 9c653d0d1..46a356f58 100644 --- a/data/model/tag.py +++ b/data/model/tag.py @@ -10,7 +10,8 @@ from data.model import (image, db_transaction, DataModelException, _basequery, from data.database import (RepositoryTag, Repository, Image, ImageStorage, Namespace, TagManifest, RepositoryNotification, Label, TagManifestLabel, get_epoch_timestamp, db_for_update, Manifest, ManifestLabel, ManifestBlob, - ManifestLegacyImage) + ManifestLegacyImage, TagManifestToManifest, + TagManifestLabelMap) from util.timedeltastring import convert_to_timedelta @@ -358,11 +359,22 @@ def _delete_tags(repo, query_modifier=None): .join(RepositoryTag) .where(RepositoryTag.id << tags_to_delete)) tag_manifest_ids_to_delete = [tagmanifest.id for tagmanifest in tag_manifests_to_delete] - manifest_ids_to_delete = [tagmanifest.manifest_id for tagmanifest in tag_manifests_to_delete - if tagmanifest.manifest is not None] + # Find all the new-style manifests to delete, if any. + tmt_query = (TagManifestToManifest + .select() + .where(TagManifestToManifest.tag_manifest << tag_manifests_to_delete, + TagManifestToManifest.broken == False)) + + manifest_ids_to_delete = [tmt.manifest_id for tmt in tmt_query] num_deleted_manifests = 0 if len(tag_manifest_ids_to_delete) > 0: + # Delete tag manifest -> manifest mapping entries. + (TagManifestToManifest + .delete() + .where(TagManifestToManifest.tag_manifest << tag_manifest_ids_to_delete) + .execute()) + # Find the set of IDs for all the labels to delete. manifest_labels_query = (TagManifestLabel .select() @@ -371,6 +383,11 @@ def _delete_tags(repo, query_modifier=None): label_ids = [manifest_label.label_id for manifest_label in manifest_labels_query] # Delete all the mapping entries for labels. + (TagManifestLabelMap + .delete() + .where(TagManifestLabelMap.tag_manifest << tag_manifest_ids_to_delete) + .execute()) + (TagManifestLabel .delete() .where(TagManifestLabel.repository == repo, @@ -608,8 +625,9 @@ def _create_manifest(tag, manifest): ManifestBlob.create(manifest=manifest_row, repository=tag.repository, blob=image_storage, blob_index=index) - return TagManifest.create(tag=tag, digest=manifest.digest, json_data=manifest.bytes, - manifest=manifest_row) + tag_manifest = TagManifest.create(tag=tag, digest=manifest.digest, json_data=manifest.bytes) + TagManifestToManifest.create(tag_manifest=tag_manifest, manifest=manifest_row) + return tag_manifest def load_tag_manifest(namespace, repo_name, tag_name): diff --git a/data/model/test/test_gc.py b/data/model/test/test_gc.py index 5ff79a2e4..aa77a82df 100644 --- a/data/model/test/test_gc.py +++ b/data/model/test/test_gc.py @@ -11,7 +11,7 @@ from playhouse.test_utils import assert_query_count from data import model, database from data.database import (Image, ImageStorage, DerivedStorageForImage, Label, TagManifestLabel, - ApprBlob, Manifest, TagManifest) + ApprBlob, Manifest, TagManifest, TagManifestToManifest) from image.docker.schema1 import DockerSchema1ManifestBuilder from test.fixtures import * @@ -162,7 +162,7 @@ def _get_dangling_label_count(): def _get_dangling_manifest_count(): manifest_ids = set([current.id for current in Manifest.select()]) - referenced_by_tag_manifest = set([manifest.manifest_id for manifest in TagManifest.select()]) + referenced_by_tag_manifest = set([tmt.manifest_id for tmt in TagManifestToManifest.select()]) return len(manifest_ids - referenced_by_tag_manifest) diff --git a/data/model/test/test_tag.py b/data/model/test/test_tag.py index 4cdb94acb..bb5c9d28b 100644 --- a/data/model/test/test_tag.py +++ b/data/model/test/test_tag.py @@ -9,7 +9,7 @@ from mock import patch from app import docker_v2_signing_key from data.database import (Image, RepositoryTag, ImageStorage, Repository, Manifest, ManifestBlob, - ManifestLegacyImage) + ManifestLegacyImage, TagManifestToManifest) from data.model.repository import create_repository from data.model.tag import (list_active_repo_tags, create_or_update_tag, delete_tag, get_matching_tags, _tag_alive, get_matching_tags_for_images, @@ -237,12 +237,14 @@ def test_store_tag_manifest(initialized_db): tag_manifest, _ = store_tag_manifest('devtable', 'simple', 'sometag', manifest) # Ensure we have the new-model expected rows. - assert tag_manifest.manifest is not None - assert tag_manifest.manifest.manifest_bytes == manifest.bytes - assert tag_manifest.manifest.digest == str(manifest.digest) + mapping_row = TagManifestToManifest.get(tag_manifest=tag_manifest) + + assert mapping_row.manifest is not None + assert mapping_row.manifest.manifest_bytes == manifest.bytes + assert mapping_row.manifest.digest == str(manifest.digest) blob_rows = {m.blob_id for m in - ManifestBlob.select().where(ManifestBlob.manifest == tag_manifest.manifest)} + ManifestBlob.select().where(ManifestBlob.manifest == mapping_row.manifest)} assert blob_rows == {s.id for s in storages} - assert ManifestLegacyImage.get(manifest=tag_manifest.manifest).image == tag_manifest.tag.image + assert ManifestLegacyImage.get(manifest=mapping_row.manifest).image == tag_manifest.tag.image diff --git a/endpoints/api/test/test_repository.py b/endpoints/api/test/test_repository.py index 9d72be3b3..f728d92bf 100644 --- a/endpoints/api/test/test_repository.py +++ b/endpoints/api/test/test_repository.py @@ -128,6 +128,8 @@ def test_create_repository(repo_name, expected_status, client): def test_get_repo(has_tag_manifest, client, initialized_db): with client_with_identity('devtable', client) as cl: if not has_tag_manifest: + database.TagManifestLabelMap.delete().execute() + database.TagManifestToManifest.delete().execute() database.TagManifestLabel.delete().execute() database.TagManifest.delete().execute()