Merge pull request #3232 from quay/fix-shared-manifest-gc-bug

Fix bug where GC attempts to delete manifests that are shared by multiple tag manifests
This commit is contained in:
Joseph Schorr 2018-08-29 13:58:03 -04:00 committed by GitHub
commit 7aa0091398
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 93 additions and 10 deletions

View file

@ -367,15 +367,21 @@ def _delete_tags(repo, query_modifier=None):
.where(TagManifestToManifest.tag_manifest << tag_manifests_to_delete,
TagManifestToManifest.broken == False))
manifest_ids_to_delete = [tmt.manifest_id for tmt in tmt_query]
manifest_ids_to_possibly_delete = [tmt.manifest_id for tmt in tmt_query]
manifest_ids_to_delete = set(manifest_ids_to_possibly_delete)
# Filter out any manifests referenced by other tag manifests.
if manifest_ids_to_possibly_delete:
ref_query = (TagManifestToManifest
.select()
.where(TagManifestToManifest.manifest << manifest_ids_to_possibly_delete,
~(TagManifestToManifest.tag_manifest << tag_manifest_ids_to_delete)))
still_referenced_manifests = set([tmt.manifest_id for tmt in ref_query])
manifest_ids_to_delete = list(manifest_ids_to_delete - still_referenced_manifests)
num_deleted_manifests = 0
if len(tag_manifest_ids_to_delete) > 0:
# Delete tag manifest -> manifest mapping entries.
(TagManifestToManifest
.delete()
.where(TagManifestToManifest.tag_manifest << tag_manifest_ids_to_delete)
.execute())
# Find the set of IDs for all the labels to delete.
manifest_labels_query = (TagManifestLabel
.select()
@ -383,6 +389,22 @@ def _delete_tags(repo, query_modifier=None):
TagManifestLabel.annotated << tag_manifest_ids_to_delete))
label_ids = [manifest_label.label_id for manifest_label in manifest_labels_query]
# Filter out any labels still referenced by manifests.
if label_ids:
ref_query = (ManifestLabel
.select()
.where(ManifestLabel.repository == repo,
ManifestLabel.label << label_ids,
~(ManifestLabel.manifest << (manifest_ids_to_delete or [-1]))))
label_ids = set(label_ids) - set([manifest_label.label_id for manifest_label in ref_query])
label_ids = list(label_ids)
# Delete tag manifest -> manifest mapping entries.
(TagManifestToManifest
.delete()
.where(TagManifestToManifest.tag_manifest << tag_manifest_ids_to_delete)
.execute())
# Delete all the mapping entries for labels.
(TagManifestLabelMap
.delete()

View file

@ -11,7 +11,8 @@ from playhouse.test_utils import assert_query_count
from data import model, database
from data.database import (Image, ImageStorage, DerivedStorageForImage, Label, TagManifestLabel,
ApprBlob, Manifest, TagManifest, TagManifestToManifest)
ApprBlob, Manifest, TagManifest, TagManifestToManifest, ManifestLabel,
TagManifestLabelMap, ManifestBlob)
from image.docker.schema1 import DockerSchema1ManifestBuilder
from test.fixtures import *
@ -157,9 +158,13 @@ def _get_dangling_storage_count():
def _get_dangling_label_count():
return len(_get_dangling_labels())
def _get_dangling_labels():
label_ids = set([current.id for current in Label.select()])
referenced_by_manifest = set([mlabel.label_id for mlabel in TagManifestLabel.select()])
return len(label_ids - referenced_by_manifest)
return label_ids - referenced_by_manifest
def _get_dangling_manifest_count():
@ -190,7 +195,7 @@ def assert_gc_integrity(expect_storage_removed=True):
assert updated_storage_count == existing_storage_count
updated_label_count = _get_dangling_label_count()
assert updated_label_count == existing_label_count
assert updated_label_count == existing_label_count, _get_dangling_labels()
updated_manifest_count = _get_dangling_manifest_count()
assert updated_manifest_count == existing_manifest_count
@ -654,3 +659,59 @@ def test_super_long_image_chain_gc(app, default_tag_policy):
# Ensure the repository is now empty.
assert_deleted(repository, *images)
def test_shared_manifest(default_tag_policy, initialized_db):
""" Test to ensure GC doesn't raise foreign key issues when trying to delete
a tag whose manifest is shared with another tag. This is a temporary test until we remove
the older data model tables.
"""
# Note: We do *not* assert_gc_integrity here, as it checks for dangling labels, which we end up
# with because we're retargetting manifests manually.
repository = create_repository(latest=['i1', 'i2', 'i3'], other=['f1', 'f2'])
# Associate the manifest with the other tag.
latest_tag = model.tag.get_active_tag_for_repo(repository, 'latest')
other_tag = model.tag.get_active_tag_for_repo(repository, 'other')
latest_tag_manifest = TagManifest.get(tag=latest_tag)
other_tag_manifest = TagManifest.get(tag=other_tag)
latest_tmt = TagManifestToManifest.get(tag_manifest=latest_tag_manifest)
other_tmt = TagManifestToManifest.get(tag_manifest=other_tag_manifest)
dangling_manifest = other_tmt.manifest
# Repoint the manifest for testing.
other_tmt.manifest = latest_tmt.manifest
other_tmt.save()
# Delete the dangling manifest and all its labels and blobs.
manifest_labels = ManifestLabel.select().where(ManifestLabel.manifest == dangling_manifest)
label_ids = [m.label_id for m in manifest_labels]
TagManifestLabelMap.delete().where(TagManifestLabelMap.manifest == dangling_manifest).execute()
TagManifestLabel.delete().where(TagManifestLabel.label << label_ids).execute()
ManifestLabel.delete().where(ManifestLabel.manifest == dangling_manifest).execute()
ManifestBlob.delete().where(ManifestBlob.manifest == dangling_manifest).execute()
Label.delete().where(Label.id << label_ids).execute()
dangling_manifest.delete_instance(recursive=True)
# Check the manifest.
manifest_id = latest_tmt.manifest_id
Manifest.get(id=manifest_id)
# Delete the latest tag.
delete_tag(repository, 'latest')
# Check the manifest again.
Manifest.get(id=manifest_id)
# Check the tags and images.
assert_deleted(repository, 'i1')
assert_deleted(repository, 'i2')
assert_deleted(repository, 'i3')
assert_not_deleted(repository, 'f1')
assert_not_deleted(repository, 'f2')