From ce61ec6668b5870c6866c3fb30349c3151315c40 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Tue, 28 Aug 2018 23:19:57 -0400 Subject: [PATCH] Fix bug where GC attempts to delete manifests that are shared by multiple tag manifests Also adds a test for this case All of this will be moot once we get rid of tag manifests, but for now, its causing exceptions --- data/model/tag.py | 36 ++++++++++++++++---- data/model/test/test_gc.py | 67 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 93 insertions(+), 10 deletions(-) diff --git a/data/model/tag.py b/data/model/tag.py index 61da144c2..fb366c01a 100644 --- a/data/model/tag.py +++ b/data/model/tag.py @@ -367,15 +367,21 @@ def _delete_tags(repo, query_modifier=None): .where(TagManifestToManifest.tag_manifest << tag_manifests_to_delete, TagManifestToManifest.broken == False)) - manifest_ids_to_delete = [tmt.manifest_id for tmt in tmt_query] + manifest_ids_to_possibly_delete = [tmt.manifest_id for tmt in tmt_query] + manifest_ids_to_delete = set(manifest_ids_to_possibly_delete) + + # Filter out any manifests referenced by other tag manifests. + if manifest_ids_to_possibly_delete: + ref_query = (TagManifestToManifest + .select() + .where(TagManifestToManifest.manifest << manifest_ids_to_possibly_delete, + ~(TagManifestToManifest.tag_manifest << tag_manifest_ids_to_delete))) + + still_referenced_manifests = set([tmt.manifest_id for tmt in ref_query]) + manifest_ids_to_delete = list(manifest_ids_to_delete - still_referenced_manifests) + num_deleted_manifests = 0 if len(tag_manifest_ids_to_delete) > 0: - # Delete tag manifest -> manifest mapping entries. - (TagManifestToManifest - .delete() - .where(TagManifestToManifest.tag_manifest << tag_manifest_ids_to_delete) - .execute()) - # Find the set of IDs for all the labels to delete. manifest_labels_query = (TagManifestLabel .select() @@ -383,6 +389,22 @@ def _delete_tags(repo, query_modifier=None): TagManifestLabel.annotated << tag_manifest_ids_to_delete)) label_ids = [manifest_label.label_id for manifest_label in manifest_labels_query] + # Filter out any labels still referenced by manifests. + if label_ids: + ref_query = (ManifestLabel + .select() + .where(ManifestLabel.repository == repo, + ManifestLabel.label << label_ids, + ~(ManifestLabel.manifest << (manifest_ids_to_delete or [-1])))) + label_ids = set(label_ids) - set([manifest_label.label_id for manifest_label in ref_query]) + label_ids = list(label_ids) + + # Delete tag manifest -> manifest mapping entries. + (TagManifestToManifest + .delete() + .where(TagManifestToManifest.tag_manifest << tag_manifest_ids_to_delete) + .execute()) + # Delete all the mapping entries for labels. (TagManifestLabelMap .delete() diff --git a/data/model/test/test_gc.py b/data/model/test/test_gc.py index 369cdc4d5..f40154c3a 100644 --- a/data/model/test/test_gc.py +++ b/data/model/test/test_gc.py @@ -11,7 +11,8 @@ from playhouse.test_utils import assert_query_count from data import model, database from data.database import (Image, ImageStorage, DerivedStorageForImage, Label, TagManifestLabel, - ApprBlob, Manifest, TagManifest, TagManifestToManifest) + ApprBlob, Manifest, TagManifest, TagManifestToManifest, ManifestLabel, + TagManifestLabelMap, ManifestBlob) from image.docker.schema1 import DockerSchema1ManifestBuilder from test.fixtures import * @@ -157,9 +158,13 @@ def _get_dangling_storage_count(): def _get_dangling_label_count(): + return len(_get_dangling_labels()) + + +def _get_dangling_labels(): label_ids = set([current.id for current in Label.select()]) referenced_by_manifest = set([mlabel.label_id for mlabel in TagManifestLabel.select()]) - return len(label_ids - referenced_by_manifest) + return label_ids - referenced_by_manifest def _get_dangling_manifest_count(): @@ -190,7 +195,7 @@ def assert_gc_integrity(expect_storage_removed=True): assert updated_storage_count == existing_storage_count updated_label_count = _get_dangling_label_count() - assert updated_label_count == existing_label_count + assert updated_label_count == existing_label_count, _get_dangling_labels() updated_manifest_count = _get_dangling_manifest_count() assert updated_manifest_count == existing_manifest_count @@ -654,3 +659,59 @@ def test_super_long_image_chain_gc(app, default_tag_policy): # Ensure the repository is now empty. assert_deleted(repository, *images) + + +def test_shared_manifest(default_tag_policy, initialized_db): + """ Test to ensure GC doesn't raise foreign key issues when trying to delete + a tag whose manifest is shared with another tag. This is a temporary test until we remove + the older data model tables. + """ + # Note: We do *not* assert_gc_integrity here, as it checks for dangling labels, which we end up + # with because we're retargetting manifests manually. + + repository = create_repository(latest=['i1', 'i2', 'i3'], other=['f1', 'f2']) + + # Associate the manifest with the other tag. + latest_tag = model.tag.get_active_tag_for_repo(repository, 'latest') + other_tag = model.tag.get_active_tag_for_repo(repository, 'other') + + latest_tag_manifest = TagManifest.get(tag=latest_tag) + other_tag_manifest = TagManifest.get(tag=other_tag) + + latest_tmt = TagManifestToManifest.get(tag_manifest=latest_tag_manifest) + + other_tmt = TagManifestToManifest.get(tag_manifest=other_tag_manifest) + dangling_manifest = other_tmt.manifest + + # Repoint the manifest for testing. + other_tmt.manifest = latest_tmt.manifest + other_tmt.save() + + # Delete the dangling manifest and all its labels and blobs. + manifest_labels = ManifestLabel.select().where(ManifestLabel.manifest == dangling_manifest) + label_ids = [m.label_id for m in manifest_labels] + + TagManifestLabelMap.delete().where(TagManifestLabelMap.manifest == dangling_manifest).execute() + TagManifestLabel.delete().where(TagManifestLabel.label << label_ids).execute() + ManifestLabel.delete().where(ManifestLabel.manifest == dangling_manifest).execute() + ManifestBlob.delete().where(ManifestBlob.manifest == dangling_manifest).execute() + Label.delete().where(Label.id << label_ids).execute() + dangling_manifest.delete_instance(recursive=True) + + # Check the manifest. + manifest_id = latest_tmt.manifest_id + Manifest.get(id=manifest_id) + + # Delete the latest tag. + delete_tag(repository, 'latest') + + # Check the manifest again. + Manifest.get(id=manifest_id) + + # Check the tags and images. + assert_deleted(repository, 'i1') + assert_deleted(repository, 'i2') + assert_deleted(repository, 'i3') + + assert_not_deleted(repository, 'f1') + assert_not_deleted(repository, 'f2')