Fix the V22 phase 1 migrations to use new tables for mapping rather than editing existing tables

The ALTER TABLE operations previously used were causing the DB to die when run on the production TagManifest table which has 7 million rows. We instead now use new mapping tables, which is less nice, but these are temporary anyway, so hopefully we only have to deal with their ugliness for a short duration.
This commit is contained in:
Joseph Schorr 2018-08-06 16:58:27 -04:00
parent 7068010751
commit 89582438cd
7 changed files with 128 additions and 57 deletions

View file

@ -504,7 +504,7 @@ class User(BaseModel):
RepositoryNotification, OAuthAuthorizationCode,
RepositoryActionCount, TagManifestLabel,
TeamSync, RepositorySearchScore,
DeletedNamespace} | appr_classes | v22_classes
DeletedNamespace} | appr_classes | v22_classes | transition_classes
delete_instance_filtered(self, User, delete_nullable, skip_transitive_deletes)
@ -652,7 +652,7 @@ class Repository(BaseModel):
# are cleaned up directly
skip_transitive_deletes = {RepositoryTag, RepositoryBuild, RepositoryBuildTrigger, BlobUpload,
Image, TagManifest, TagManifestLabel, Label, DerivedStorageForImage,
RepositorySearchScore} | appr_classes | v22_classes
RepositorySearchScore} | appr_classes | v22_classes | transition_classes
delete_instance_filtered(self, Repository, delete_nullable, skip_transitive_deletes)
@ -1433,10 +1433,13 @@ class TagManifest(BaseModel):
digest = CharField(index=True)
json_data = TextField()
# Note: `manifest` will be back-filled by a worker and may not be present
# currently.
manifest = ForeignKeyField(Manifest, null=True, index=True)
broken = BooleanField(null=True, index=True)
class TagManifestToManifest(BaseModel):
""" NOTE: Only used for the duration of the migrations. """
tag_manifest = ForeignKeyField(TagManifest, index=True, unique=True)
manifest = ForeignKeyField(Manifest, index=True, unique=True)
broken = BooleanField(index=True, default=False)
class TagManifestLabel(BaseModel):
""" TO BE DEPRECATED: Mapping from a tag manifest to a label.
@ -1445,11 +1448,6 @@ class TagManifestLabel(BaseModel):
annotated = ForeignKeyField(TagManifest, index=True)
label = ForeignKeyField(Label)
# Note: `manifest_label` will be back-filled by a worker and may not be present
# currently.
manifest_label = ForeignKeyField(ManifestLabel, null=True, index=True)
broken_manifest = BooleanField(null=True, index=True)
class Meta:
database = db
read_slaves = (read_slave,)
@ -1458,11 +1456,24 @@ class TagManifestLabel(BaseModel):
)
class TagManifestLabelMap(BaseModel):
""" NOTE: Only used for the duration of the migrations. """
tag_manifest = ForeignKeyField(TagManifest, index=True)
manifest = ForeignKeyField(Manifest, null=True, index=True)
label = ForeignKeyField(Label, index=True)
tag_manifest_label = ForeignKeyField(TagManifestLabel, index=True)
manifest_label = ForeignKeyField(ManifestLabel, null=True, index=True)
broken_manifest = BooleanField(index=True, default=False)
appr_classes = set([ApprTag, ApprTagKind, ApprBlobPlacementLocation, ApprManifestList,
ApprManifestBlob, ApprBlob, ApprManifestListManifest, ApprManifest,
ApprBlobPlacement])
v22_classes = set([Manifest, ManifestLabel, ManifestBlob, ManifestLegacyImage])
transition_classes = set([TagManifestToManifest, TagManifestLabelMap])
is_model = lambda x: inspect.isclass(x) and issubclass(x, BaseModel) and x is not BaseModel
all_models = [model[1] for model in inspect.getmembers(sys.modules[__name__], is_model)]

View file

@ -1,13 +1,13 @@
"""Add V2_2 data models for Manifest, ManifestBlob and ManifestLegacyImage
Revision ID: 7734c7584421
Revision ID: 9093adccc784
Revises: 6c21e2cfb8b6
Create Date: 2018-07-31 13:26:02.850353
Create Date: 2018-08-06 16:07:50.222749
"""
# revision identifiers, used by Alembic.
revision = '7734c7584421'
revision = '9093adccc784'
down_revision = '6c21e2cfb8b6'
from alembic import op
@ -75,17 +75,39 @@ def upgrade(tables, tester):
op.create_index('manifestlegacyimage_image_id', 'manifestlegacyimage', ['image_id'], unique=False)
op.create_index('manifestlegacyimage_manifest_id', 'manifestlegacyimage', ['manifest_id'], unique=True)
op.create_index('manifestlegacyimage_repository_id', 'manifestlegacyimage', ['repository_id'], unique=False)
op.add_column(u'tagmanifest', sa.Column('broken', sa.Boolean(), nullable=True))
op.add_column(u'tagmanifest', sa.Column('manifest_id', sa.Integer(), nullable=True))
op.create_index('tagmanifest_broken', 'tagmanifest', ['broken'], unique=False)
op.create_index('tagmanifest_manifest_id', 'tagmanifest', ['manifest_id'], unique=False)
op.create_foreign_key(op.f('fk_tagmanifest_manifest_id_manifest'), 'tagmanifest', 'manifest', ['manifest_id'], ['id'])
op.add_column(u'tagmanifestlabel', sa.Column('broken_manifest', sa.Boolean(), nullable=True))
op.add_column(u'tagmanifestlabel', sa.Column('manifest_label_id', sa.Integer(), nullable=True))
op.create_index('tagmanifestlabel_broken_manifest', 'tagmanifestlabel', ['broken_manifest'], unique=False)
op.create_index('tagmanifestlabel_manifest_label_id', 'tagmanifestlabel', ['manifest_label_id'], unique=False)
op.create_foreign_key(op.f('fk_tagmanifestlabel_manifest_label_id_manifestlabel'), 'tagmanifestlabel', 'manifestlabel', ['manifest_label_id'], ['id'])
op.create_table('tagmanifesttomanifest',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('tag_manifest_id', sa.Integer(), nullable=False),
sa.Column('manifest_id', sa.Integer(), nullable=False),
sa.Column('broken', sa.Boolean(), nullable=False, server_default=sa.sql.expression.false()),
sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_tagmanifesttomanifest_manifest_id_manifest')),
sa.ForeignKeyConstraint(['tag_manifest_id'], ['tagmanifest.id'], name=op.f('fk_tagmanifesttomanifest_tag_manifest_id_tagmanifest')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_tagmanifesttomanifest'))
)
op.create_index('tagmanifesttomanifest_broken', 'tagmanifesttomanifest', ['broken'], unique=False)
op.create_index('tagmanifesttomanifest_manifest_id', 'tagmanifesttomanifest', ['manifest_id'], unique=True)
op.create_index('tagmanifesttomanifest_tag_manifest_id', 'tagmanifesttomanifest', ['tag_manifest_id'], unique=True)
op.create_table('tagmanifestlabelmap',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('tag_manifest_id', sa.Integer(), nullable=False),
sa.Column('manifest_id', sa.Integer(), nullable=True),
sa.Column('label_id', sa.Integer(), nullable=False),
sa.Column('tag_manifest_label_id', sa.Integer(), nullable=False),
sa.Column('manifest_label_id', sa.Integer(), nullable=True),
sa.Column('broken_manifest', sa.Boolean(), nullable=False, server_default=sa.sql.expression.false()),
sa.ForeignKeyConstraint(['label_id'], ['label.id'], name=op.f('fk_tagmanifestlabelmap_label_id_label')),
sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_tagmanifestlabelmap_manifest_id_manifest')),
sa.ForeignKeyConstraint(['manifest_label_id'], ['manifestlabel.id'], name=op.f('fk_tagmanifestlabelmap_manifest_label_id_manifestlabel')),
sa.ForeignKeyConstraint(['tag_manifest_id'], ['tagmanifest.id'], name=op.f('fk_tagmanifestlabelmap_tag_manifest_id_tagmanifest')),
sa.ForeignKeyConstraint(['tag_manifest_label_id'], ['tagmanifestlabel.id'], name=op.f('fk_tagmanifestlabelmap_tag_manifest_label_id_tagmanifestlabel')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_tagmanifestlabelmap'))
)
op.create_index('tagmanifestlabelmap_broken_manifest', 'tagmanifestlabelmap', ['broken_manifest'], unique=False)
op.create_index('tagmanifestlabelmap_label_id', 'tagmanifestlabelmap', ['label_id'], unique=False)
op.create_index('tagmanifestlabelmap_manifest_id', 'tagmanifestlabelmap', ['manifest_id'], unique=False)
op.create_index('tagmanifestlabelmap_manifest_label_id', 'tagmanifestlabelmap', ['manifest_label_id'], unique=False)
op.create_index('tagmanifestlabelmap_tag_manifest_id', 'tagmanifestlabelmap', ['tag_manifest_id'], unique=False)
op.create_index('tagmanifestlabelmap_tag_manifest_label_id', 'tagmanifestlabelmap', ['tag_manifest_label_id'], unique=False)
# ### end Alembic commands ###
for media_type in DOCKER_SCHEMA1_CONTENT_TYPES:
@ -121,8 +143,19 @@ def upgrade(tables, tester):
('repository_id', tester.TestDataType.Foreign('repository')),
])
tester.populate_column('tagmanifest', 'manifest_id', tester.TestDataType.Foreign('manifest'))
tester.populate_column('tagmanifestlabel', 'manifest_label_id', tester.TestDataType.Foreign('manifestlabel'))
tester.populate_table('tagmanifesttomanifest', [
('manifest_id', tester.TestDataType.Foreign('manifest')),
('tag_manifest_id', tester.TestDataType.Foreign('tagmanifest')),
])
tester.populate_table('tagmanifestlabelmap', [
('manifest_id', tester.TestDataType.Foreign('manifest')),
('tag_manifest_id', tester.TestDataType.Foreign('tagmanifest')),
('tag_manifest_label_id', tester.TestDataType.Foreign('tagmanifestlabel')),
('manifest_label_id', tester.TestDataType.Foreign('manifestlabel')),
('label_id', tester.TestDataType.Foreign('label')),
])
# ### end population of test data ### #
@ -135,17 +168,8 @@ def downgrade(tables, tester):
mediatype.c.name == op.inline_literal(media_type)))
# ### commands auto generated by Alembic - please adjust! ###
op.drop_constraint(op.f('fk_tagmanifestlabel_manifest_label_id_manifestlabel'), 'tagmanifestlabel', type_='foreignkey')
op.drop_index('tagmanifestlabel_manifest_label_id', table_name='tagmanifestlabel')
op.drop_index('tagmanifestlabel_broken_manifest', table_name='tagmanifestlabel')
op.drop_column(u'tagmanifestlabel', 'manifest_label_id')
op.drop_column(u'tagmanifestlabel', 'broken_manifest')
op.drop_constraint(op.f('fk_tagmanifest_manifest_id_manifest'), 'tagmanifest', type_='foreignkey')
op.drop_index('tagmanifest_manifest_id', table_name='tagmanifest')
op.drop_index('tagmanifest_broken', table_name='tagmanifest')
op.drop_column(u'tagmanifest', 'manifest_id')
op.drop_column(u'tagmanifest', 'broken')
op.drop_table('tagmanifestlabelmap')
op.drop_table('tagmanifesttomanifest')
op.drop_table('manifestlegacyimage')
op.drop_table('manifestlabel')
op.drop_table('manifestblob')

View file

@ -3,7 +3,7 @@ import logging
from cachetools import lru_cache
from data.database import (Label, TagManifestLabel, MediaType, LabelSourceType, db_transaction,
ManifestLabel)
ManifestLabel, TagManifestLabelMap, TagManifestToManifest)
from data.model import InvalidLabelKeyException, InvalidMediaTypeException, DataModelException
from data.text import prefix_search
from util.validation import validate_label_key
@ -69,11 +69,20 @@ def create_manifest_label(tag_manifest, key, value, source_type_name, media_type
with db_transaction():
label = Label.create(key=key, value=value, source_type=source_type_id, media_type=media_type_id)
TagManifestLabel.create(annotated=tag_manifest, label=label,
tag_manifest_label = TagManifestLabel.create(annotated=tag_manifest, label=label,
repository=tag_manifest.tag.repository)
if tag_manifest.manifest is not None:
ManifestLabel.create(manifest=tag_manifest.manifest, label=label,
try:
mapping_row = TagManifestToManifest.get(tag_manifest=tag_manifest)
if mapping_row.manifest:
manifest_label = ManifestLabel.create(manifest=mapping_row.manifest, label=label,
repository=tag_manifest.tag.repository)
TagManifestLabelMap.create(manifest_label=manifest_label,
tag_manifest_label=tag_manifest_label,
label=label,
manifest=mapping_row.manifest,
tag_manifest=tag_manifest)
except TagManifestToManifest.DoesNotExist:
pass
return label
@ -120,6 +129,11 @@ def delete_manifest_label(label_uuid, tag_manifest):
raise DataModelException('Cannot delete immutable label')
# Delete the mapping records and label.
(TagManifestLabelMap
.delete()
.where(TagManifestLabelMap.label == label)
.execute())
deleted_count = TagManifestLabel.delete().where(TagManifestLabel.label == label).execute()
if deleted_count != 1:
logger.warning('More than a single label deleted for matching label %s', label_uuid)

View file

@ -10,7 +10,8 @@ from data.model import (image, db_transaction, DataModelException, _basequery,
from data.database import (RepositoryTag, Repository, Image, ImageStorage, Namespace, TagManifest,
RepositoryNotification, Label, TagManifestLabel, get_epoch_timestamp,
db_for_update, Manifest, ManifestLabel, ManifestBlob,
ManifestLegacyImage)
ManifestLegacyImage, TagManifestToManifest,
TagManifestLabelMap)
from util.timedeltastring import convert_to_timedelta
@ -358,11 +359,22 @@ def _delete_tags(repo, query_modifier=None):
.join(RepositoryTag)
.where(RepositoryTag.id << tags_to_delete))
tag_manifest_ids_to_delete = [tagmanifest.id for tagmanifest in tag_manifests_to_delete]
manifest_ids_to_delete = [tagmanifest.manifest_id for tagmanifest in tag_manifests_to_delete
if tagmanifest.manifest is not None]
# Find all the new-style manifests to delete, if any.
tmt_query = (TagManifestToManifest
.select()
.where(TagManifestToManifest.tag_manifest << tag_manifests_to_delete,
TagManifestToManifest.broken == False))
manifest_ids_to_delete = [tmt.manifest_id for tmt in tmt_query]
num_deleted_manifests = 0
if len(tag_manifest_ids_to_delete) > 0:
# Delete tag manifest -> manifest mapping entries.
(TagManifestToManifest
.delete()
.where(TagManifestToManifest.tag_manifest << tag_manifest_ids_to_delete)
.execute())
# Find the set of IDs for all the labels to delete.
manifest_labels_query = (TagManifestLabel
.select()
@ -371,6 +383,11 @@ def _delete_tags(repo, query_modifier=None):
label_ids = [manifest_label.label_id for manifest_label in manifest_labels_query]
# Delete all the mapping entries for labels.
(TagManifestLabelMap
.delete()
.where(TagManifestLabelMap.tag_manifest << tag_manifest_ids_to_delete)
.execute())
(TagManifestLabel
.delete()
.where(TagManifestLabel.repository == repo,
@ -608,8 +625,9 @@ def _create_manifest(tag, manifest):
ManifestBlob.create(manifest=manifest_row, repository=tag.repository, blob=image_storage,
blob_index=index)
return TagManifest.create(tag=tag, digest=manifest.digest, json_data=manifest.bytes,
manifest=manifest_row)
tag_manifest = TagManifest.create(tag=tag, digest=manifest.digest, json_data=manifest.bytes)
TagManifestToManifest.create(tag_manifest=tag_manifest, manifest=manifest_row)
return tag_manifest
def load_tag_manifest(namespace, repo_name, tag_name):

View file

@ -11,7 +11,7 @@ from playhouse.test_utils import assert_query_count
from data import model, database
from data.database import (Image, ImageStorage, DerivedStorageForImage, Label, TagManifestLabel,
ApprBlob, Manifest, TagManifest)
ApprBlob, Manifest, TagManifest, TagManifestToManifest)
from image.docker.schema1 import DockerSchema1ManifestBuilder
from test.fixtures import *
@ -162,7 +162,7 @@ def _get_dangling_label_count():
def _get_dangling_manifest_count():
manifest_ids = set([current.id for current in Manifest.select()])
referenced_by_tag_manifest = set([manifest.manifest_id for manifest in TagManifest.select()])
referenced_by_tag_manifest = set([tmt.manifest_id for tmt in TagManifestToManifest.select()])
return len(manifest_ids - referenced_by_tag_manifest)

View file

@ -9,7 +9,7 @@ from mock import patch
from app import docker_v2_signing_key
from data.database import (Image, RepositoryTag, ImageStorage, Repository, Manifest, ManifestBlob,
ManifestLegacyImage)
ManifestLegacyImage, TagManifestToManifest)
from data.model.repository import create_repository
from data.model.tag import (list_active_repo_tags, create_or_update_tag, delete_tag,
get_matching_tags, _tag_alive, get_matching_tags_for_images,
@ -237,12 +237,14 @@ def test_store_tag_manifest(initialized_db):
tag_manifest, _ = store_tag_manifest('devtable', 'simple', 'sometag', manifest)
# Ensure we have the new-model expected rows.
assert tag_manifest.manifest is not None
assert tag_manifest.manifest.manifest_bytes == manifest.bytes
assert tag_manifest.manifest.digest == str(manifest.digest)
mapping_row = TagManifestToManifest.get(tag_manifest=tag_manifest)
assert mapping_row.manifest is not None
assert mapping_row.manifest.manifest_bytes == manifest.bytes
assert mapping_row.manifest.digest == str(manifest.digest)
blob_rows = {m.blob_id for m in
ManifestBlob.select().where(ManifestBlob.manifest == tag_manifest.manifest)}
ManifestBlob.select().where(ManifestBlob.manifest == mapping_row.manifest)}
assert blob_rows == {s.id for s in storages}
assert ManifestLegacyImage.get(manifest=tag_manifest.manifest).image == tag_manifest.tag.image
assert ManifestLegacyImage.get(manifest=mapping_row.manifest).image == tag_manifest.tag.image

View file

@ -128,6 +128,8 @@ def test_create_repository(repo_name, expected_status, client):
def test_get_repo(has_tag_manifest, client, initialized_db):
with client_with_identity('devtable', client) as cl:
if not has_tag_manifest:
database.TagManifestLabelMap.delete().execute()
database.TagManifestToManifest.delete().execute()
database.TagManifestLabel.delete().execute()
database.TagManifest.delete().execute()