Fix the V22 phase 1 migrations to use new tables for mapping rather than editing existing tables

The ALTER TABLE operations previously used were causing the DB to die when run on the production TagManifest table which has 7 million rows. We instead now use new mapping tables, which is less nice, but these are temporary anyway, so hopefully we only have to deal with their ugliness for a short duration.
This commit is contained in:
Joseph Schorr 2018-08-06 16:58:27 -04:00
parent 7068010751
commit 89582438cd
7 changed files with 128 additions and 57 deletions

View file

@ -3,7 +3,7 @@ import logging
from cachetools import lru_cache
from data.database import (Label, TagManifestLabel, MediaType, LabelSourceType, db_transaction,
ManifestLabel)
ManifestLabel, TagManifestLabelMap, TagManifestToManifest)
from data.model import InvalidLabelKeyException, InvalidMediaTypeException, DataModelException
from data.text import prefix_search
from util.validation import validate_label_key
@ -69,11 +69,20 @@ def create_manifest_label(tag_manifest, key, value, source_type_name, media_type
with db_transaction():
label = Label.create(key=key, value=value, source_type=source_type_id, media_type=media_type_id)
TagManifestLabel.create(annotated=tag_manifest, label=label,
repository=tag_manifest.tag.repository)
if tag_manifest.manifest is not None:
ManifestLabel.create(manifest=tag_manifest.manifest, label=label,
repository=tag_manifest.tag.repository)
tag_manifest_label = TagManifestLabel.create(annotated=tag_manifest, label=label,
repository=tag_manifest.tag.repository)
try:
mapping_row = TagManifestToManifest.get(tag_manifest=tag_manifest)
if mapping_row.manifest:
manifest_label = ManifestLabel.create(manifest=mapping_row.manifest, label=label,
repository=tag_manifest.tag.repository)
TagManifestLabelMap.create(manifest_label=manifest_label,
tag_manifest_label=tag_manifest_label,
label=label,
manifest=mapping_row.manifest,
tag_manifest=tag_manifest)
except TagManifestToManifest.DoesNotExist:
pass
return label
@ -120,6 +129,11 @@ def delete_manifest_label(label_uuid, tag_manifest):
raise DataModelException('Cannot delete immutable label')
# Delete the mapping records and label.
(TagManifestLabelMap
.delete()
.where(TagManifestLabelMap.label == label)
.execute())
deleted_count = TagManifestLabel.delete().where(TagManifestLabel.label == label).execute()
if deleted_count != 1:
logger.warning('More than a single label deleted for matching label %s', label_uuid)

View file

@ -10,7 +10,8 @@ from data.model import (image, db_transaction, DataModelException, _basequery,
from data.database import (RepositoryTag, Repository, Image, ImageStorage, Namespace, TagManifest,
RepositoryNotification, Label, TagManifestLabel, get_epoch_timestamp,
db_for_update, Manifest, ManifestLabel, ManifestBlob,
ManifestLegacyImage)
ManifestLegacyImage, TagManifestToManifest,
TagManifestLabelMap)
from util.timedeltastring import convert_to_timedelta
@ -358,11 +359,22 @@ def _delete_tags(repo, query_modifier=None):
.join(RepositoryTag)
.where(RepositoryTag.id << tags_to_delete))
tag_manifest_ids_to_delete = [tagmanifest.id for tagmanifest in tag_manifests_to_delete]
manifest_ids_to_delete = [tagmanifest.manifest_id for tagmanifest in tag_manifests_to_delete
if tagmanifest.manifest is not None]
# Find all the new-style manifests to delete, if any.
tmt_query = (TagManifestToManifest
.select()
.where(TagManifestToManifest.tag_manifest << tag_manifests_to_delete,
TagManifestToManifest.broken == False))
manifest_ids_to_delete = [tmt.manifest_id for tmt in tmt_query]
num_deleted_manifests = 0
if len(tag_manifest_ids_to_delete) > 0:
# Delete tag manifest -> manifest mapping entries.
(TagManifestToManifest
.delete()
.where(TagManifestToManifest.tag_manifest << tag_manifest_ids_to_delete)
.execute())
# Find the set of IDs for all the labels to delete.
manifest_labels_query = (TagManifestLabel
.select()
@ -371,6 +383,11 @@ def _delete_tags(repo, query_modifier=None):
label_ids = [manifest_label.label_id for manifest_label in manifest_labels_query]
# Delete all the mapping entries for labels.
(TagManifestLabelMap
.delete()
.where(TagManifestLabelMap.tag_manifest << tag_manifest_ids_to_delete)
.execute())
(TagManifestLabel
.delete()
.where(TagManifestLabel.repository == repo,
@ -608,8 +625,9 @@ def _create_manifest(tag, manifest):
ManifestBlob.create(manifest=manifest_row, repository=tag.repository, blob=image_storage,
blob_index=index)
return TagManifest.create(tag=tag, digest=manifest.digest, json_data=manifest.bytes,
manifest=manifest_row)
tag_manifest = TagManifest.create(tag=tag, digest=manifest.digest, json_data=manifest.bytes)
TagManifestToManifest.create(tag_manifest=tag_manifest, manifest=manifest_row)
return tag_manifest
def load_tag_manifest(namespace, repo_name, tag_name):

View file

@ -11,7 +11,7 @@ from playhouse.test_utils import assert_query_count
from data import model, database
from data.database import (Image, ImageStorage, DerivedStorageForImage, Label, TagManifestLabel,
ApprBlob, Manifest, TagManifest)
ApprBlob, Manifest, TagManifest, TagManifestToManifest)
from image.docker.schema1 import DockerSchema1ManifestBuilder
from test.fixtures import *
@ -162,7 +162,7 @@ def _get_dangling_label_count():
def _get_dangling_manifest_count():
manifest_ids = set([current.id for current in Manifest.select()])
referenced_by_tag_manifest = set([manifest.manifest_id for manifest in TagManifest.select()])
referenced_by_tag_manifest = set([tmt.manifest_id for tmt in TagManifestToManifest.select()])
return len(manifest_ids - referenced_by_tag_manifest)

View file

@ -9,7 +9,7 @@ from mock import patch
from app import docker_v2_signing_key
from data.database import (Image, RepositoryTag, ImageStorage, Repository, Manifest, ManifestBlob,
ManifestLegacyImage)
ManifestLegacyImage, TagManifestToManifest)
from data.model.repository import create_repository
from data.model.tag import (list_active_repo_tags, create_or_update_tag, delete_tag,
get_matching_tags, _tag_alive, get_matching_tags_for_images,
@ -237,12 +237,14 @@ def test_store_tag_manifest(initialized_db):
tag_manifest, _ = store_tag_manifest('devtable', 'simple', 'sometag', manifest)
# Ensure we have the new-model expected rows.
assert tag_manifest.manifest is not None
assert tag_manifest.manifest.manifest_bytes == manifest.bytes
assert tag_manifest.manifest.digest == str(manifest.digest)
mapping_row = TagManifestToManifest.get(tag_manifest=tag_manifest)
assert mapping_row.manifest is not None
assert mapping_row.manifest.manifest_bytes == manifest.bytes
assert mapping_row.manifest.digest == str(manifest.digest)
blob_rows = {m.blob_id for m in
ManifestBlob.select().where(ManifestBlob.manifest == tag_manifest.manifest)}
ManifestBlob.select().where(ManifestBlob.manifest == mapping_row.manifest)}
assert blob_rows == {s.id for s in storages}
assert ManifestLegacyImage.get(manifest=tag_manifest.manifest).image == tag_manifest.tag.image
assert ManifestLegacyImage.get(manifest=mapping_row.manifest).image == tag_manifest.tag.image