diff --git a/data/model/repository.py b/data/model/repository.py index ebba1029f..aaceb74e6 100644 --- a/data/model/repository.py +++ b/data/model/repository.py @@ -57,16 +57,17 @@ def _purge_all_repository_tags(namespace_name, repository_name): return # Find all labels to delete. - labels = list(TagManifestLabel - .select(TagManifestLabel.label) - .where(TagManifestLabel.repository == repo)) + manifest_labels_query = (TagManifestLabel + .select() + .where(TagManifestLabel.repository == repo)) - # Delete all the mapping entries. - TagManifestLabel.delete().where(TagManifestLabel.repository == repo).execute() + label_ids = [manifest_label.label_id for manifest_label in manifest_labels_query] + if label_ids: + # Delete all the mapping entries. + TagManifestLabel.delete().where(TagManifestLabel.repository == repo).execute() - # Delete all the matching labels. - if labels: - Label.delete().where(Label.id << [label.id for label in labels]).execute() + # Delete all the matching labels. + Label.delete().where(Label.id << label_ids).execute() # Delete all the manifests. TagManifest.delete().where(TagManifest.tag << repo_tags).execute() diff --git a/data/model/tag.py b/data/model/tag.py index 96aa34e76..6f2def74f 100644 --- a/data/model/tag.py +++ b/data/model/tag.py @@ -152,19 +152,22 @@ def garbage_collect_tags(repo): num_deleted_manifests = 0 if len(manifests_to_delete) > 0: # Find the set of IDs for all the labels to delete. - labels = list(TagManifestLabel - .select(TagManifestLabel.id) - .where(TagManifestLabel.annotated << manifests_to_delete)) + manifest_labels_query = (TagManifestLabel + .select() + .where(TagManifestLabel.repository == repo, + TagManifestLabel.annotated << manifests_to_delete)) - if len(labels) > 0: - # Delete the mapping entries. + label_ids = [manifest_label.label_id for manifest_label in manifest_labels_query] + if label_ids: + # Delete all the mapping entries. (TagManifestLabel .delete() - .where(TagManifestLabel.annotated << manifests_to_delete) + .where(TagManifestLabel.repository == repo, + TagManifestLabel.annotated << manifests_to_delete) .execute()) - # Delete the labels themselves. - Label.delete().where(Label.id << [label.id for label in labels]).execute() + # Delete all the matching labels. + Label.delete().where(Label.id << label_ids).execute() # Delete the tag manifests themselves. num_deleted_manifests = (TagManifest diff --git a/test/test_api_usage.py b/test/test_api_usage.py index 8761281f5..45e0b297f 100644 --- a/test/test_api_usage.py +++ b/test/test_api_usage.py @@ -1768,6 +1768,8 @@ class TestDeleteRepository(ApiTestCase): date=datetime.datetime.now() - datetime.timedelta(days=5), count=6) # Create some labels. + pre_delete_label_count = database.Label.select().count() + tag_manifest = model.tag.load_tag_manifest(ADMIN_ACCESS_USER, 'complex', 'prod') model.label.create_manifest_label(tag_manifest, 'foo', 'bar', 'manifest') model.label.create_manifest_label(tag_manifest, 'foo', 'baz', 'manifest') @@ -1785,6 +1787,10 @@ class TestDeleteRepository(ApiTestCase): params=dict(repository=self.COMPLEX_REPO), expected_code=404) + # Verify the labels are gone. + post_delete_label_count = database.Label.select().count() + self.assertEquals(post_delete_label_count, pre_delete_label_count) + class TestGetRepository(ApiTestCase): PUBLIC_REPO = PUBLIC_USER + '/publicrepo' diff --git a/test/test_gc.py b/test/test_gc.py index ce9f13e98..e5d7cef22 100644 --- a/test/test_gc.py +++ b/test/test_gc.py @@ -6,7 +6,7 @@ from playhouse.test_utils import assert_query_count from app import app, storage from initdb import setup_database_for_testing, finished_database_for_testing from data import model, database -from data.database import Image, ImageStorage, DerivedStorageForImage +from data.database import Image, ImageStorage, DerivedStorageForImage, Label, TagManifestLabel from endpoints.v2.manifest import _generate_and_store_manifest @@ -16,6 +16,26 @@ PUBLIC_USER = 'public' REPO = 'somerepo' +class assert_no_new_dangling_labels(object): + """ Specialized assertion for ensuring that GC cleans up all labels. + """ + def __init__(self): + self.existing_count = 0 + + def _get_dangling_count(self): + label_ids = set([current.id for current in Label.select()]) + referenced_by_manifest = set([mlabel.label_id for mlabel in TagManifestLabel.select()]) + return len(label_ids - referenced_by_manifest) + + def __enter__(self): + self.existing_count = self._get_dangling_count() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + updated_count = self._get_dangling_count() + assert updated_count == self.existing_count + + class assert_no_new_dangling_storages(object): """ Specialized assertion for ensuring that GC cleans up all dangling storages. """ @@ -114,7 +134,12 @@ class TestGarbageCollection(unittest.TestCase): parent=parent) # Set the tag for the image. - model.tag.create_or_update_tag(namespace, name, tag_name, image_ids[-1]) + tag_manifest, _ = model.tag.store_tag_manifest(namespace, name, tag_name, image_ids[-1], + 'sha:someshahere', '{}') + + # Add some labels to the tag. + model.label.create_manifest_label(tag_manifest, 'foo', 'bar', 'manifest') + model.label.create_manifest_label(tag_manifest, 'meh', 'grah', 'manifest') return repo @@ -201,164 +226,175 @@ class TestGarbageCollection(unittest.TestCase): def test_one_tag(self): """ Create a repository with a single tag, then remove that tag and verify that the repository is now empty. """ - with assert_no_new_dangling_storages(): - repository = self.createRepository(latest=['i1', 'i2', 'i3']) - self.deleteTag(repository, 'latest') - self.assertDeleted(repository, 'i1', 'i2', 'i3') + with assert_no_new_dangling_labels(): + with assert_no_new_dangling_storages(): + repository = self.createRepository(latest=['i1', 'i2', 'i3']) + self.deleteTag(repository, 'latest') + self.assertDeleted(repository, 'i1', 'i2', 'i3') def test_two_tags_unshared_images(self): """ Repository has two tags with no shared images between them. """ - with assert_no_new_dangling_storages(): - repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['f1', 'f2']) - self.deleteTag(repository, 'latest') - self.assertDeleted(repository, 'i1', 'i2', 'i3') - self.assertNotDeleted(repository, 'f1', 'f2') + with assert_no_new_dangling_labels(): + with assert_no_new_dangling_storages(): + repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['f1', 'f2']) + self.deleteTag(repository, 'latest') + self.assertDeleted(repository, 'i1', 'i2', 'i3') + self.assertNotDeleted(repository, 'f1', 'f2') def test_two_tags_shared_images(self): """ Repository has two tags with shared images. Deleting the tag should only remove the unshared images. """ - with assert_no_new_dangling_storages(): - repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1']) - self.deleteTag(repository, 'latest') - self.assertDeleted(repository, 'i2', 'i3') - self.assertNotDeleted(repository, 'i1', 'f1') + with assert_no_new_dangling_labels(): + with assert_no_new_dangling_storages(): + repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1']) + self.deleteTag(repository, 'latest') + self.assertDeleted(repository, 'i2', 'i3') + self.assertNotDeleted(repository, 'i1', 'f1') def test_unrelated_repositories(self): """ Two repositories with different images. Removing the tag from one leaves the other's images intact. """ - with assert_no_new_dangling_storages(): - repository1 = self.createRepository(latest=['i1', 'i2', 'i3'], name='repo1') - repository2 = self.createRepository(latest=['j1', 'j2', 'j3'], name='repo2') + with assert_no_new_dangling_labels(): + with assert_no_new_dangling_storages(): + repository1 = self.createRepository(latest=['i1', 'i2', 'i3'], name='repo1') + repository2 = self.createRepository(latest=['j1', 'j2', 'j3'], name='repo2') - self.deleteTag(repository1, 'latest') + self.deleteTag(repository1, 'latest') - self.assertDeleted(repository1, 'i1', 'i2', 'i3') - self.assertNotDeleted(repository2, 'j1', 'j2', 'j3') + self.assertDeleted(repository1, 'i1', 'i2', 'i3') + self.assertNotDeleted(repository2, 'j1', 'j2', 'j3') def test_related_repositories(self): """ Two repositories with shared images. Removing the tag from one leaves the other's images intact. """ - with assert_no_new_dangling_storages(): - repository1 = self.createRepository(latest=['i1', 'i2', 'i3'], name='repo1') - repository2 = self.createRepository(latest=['i1', 'i2', 'j1'], name='repo2') + with assert_no_new_dangling_labels(): + with assert_no_new_dangling_storages(): + repository1 = self.createRepository(latest=['i1', 'i2', 'i3'], name='repo1') + repository2 = self.createRepository(latest=['i1', 'i2', 'j1'], name='repo2') - self.deleteTag(repository1, 'latest') + self.deleteTag(repository1, 'latest') - self.assertDeleted(repository1, 'i3') - self.assertNotDeleted(repository2, 'i1', 'i2', 'j1') + self.assertDeleted(repository1, 'i3') + self.assertNotDeleted(repository2, 'i1', 'i2', 'j1') def test_inaccessible_repositories(self): """ Two repositories under different namespaces should result in the images being deleted but not completely removed from the database. """ - with assert_no_new_dangling_storages(): - repository1 = self.createRepository(namespace=ADMIN_ACCESS_USER, latest=['i1', 'i2', 'i3']) - repository2 = self.createRepository(namespace=PUBLIC_USER, latest=['i1', 'i2', 'i3']) + with assert_no_new_dangling_labels(): + with assert_no_new_dangling_storages(): + repository1 = self.createRepository(namespace=ADMIN_ACCESS_USER, latest=['i1', 'i2', 'i3']) + repository2 = self.createRepository(namespace=PUBLIC_USER, latest=['i1', 'i2', 'i3']) - self.deleteTag(repository1, 'latest') - self.assertDeleted(repository1, 'i1', 'i2', 'i3') - self.assertNotDeleted(repository2, 'i1', 'i2', 'i3') + self.deleteTag(repository1, 'latest') + self.assertDeleted(repository1, 'i1', 'i2', 'i3') + self.assertNotDeleted(repository2, 'i1', 'i2', 'i3') def test_multiple_shared_images(self): """ Repository has multiple tags with shared images. Selectively deleting the tags, and verifying at each step. """ - with assert_no_new_dangling_storages(): - repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'], - third=['t1', 't2', 't3'], fourth=['i1', 'f1']) + with assert_no_new_dangling_labels(): + with assert_no_new_dangling_storages(): + repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'], + third=['t1', 't2', 't3'], fourth=['i1', 'f1']) - # Delete tag other. Should delete f2, since it is not shared. - self.deleteTag(repository, 'other') - self.assertDeleted(repository, 'f2') - self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1') + # Delete tag other. Should delete f2, since it is not shared. + self.deleteTag(repository, 'other') + self.assertDeleted(repository, 'f2') + self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1') - # Move tag fourth to i3. This should remove f1 since it is no longer referenced. - self.moveTag(repository, 'fourth', 'i3') - self.assertDeleted(repository, 'f1') - self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3') + # Move tag fourth to i3. This should remove f1 since it is no longer referenced. + self.moveTag(repository, 'fourth', 'i3') + self.assertDeleted(repository, 'f1') + self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3') - # Delete tag 'latest'. This should do nothing since fourth is on the same branch. - self.deleteTag(repository, 'latest') - self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3') + # Delete tag 'latest'. This should do nothing since fourth is on the same branch. + self.deleteTag(repository, 'latest') + self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3') - # Delete tag 'third'. This should remove t1->t3. - self.deleteTag(repository, 'third') - self.assertDeleted(repository, 't1', 't2', 't3') - self.assertNotDeleted(repository, 'i1', 'i2', 'i3') + # Delete tag 'third'. This should remove t1->t3. + self.deleteTag(repository, 'third') + self.assertDeleted(repository, 't1', 't2', 't3') + self.assertNotDeleted(repository, 'i1', 'i2', 'i3') - # Add tag to i1. - self.moveTag(repository, 'newtag', 'i1') - self.assertNotDeleted(repository, 'i1', 'i2', 'i3') + # Add tag to i1. + self.moveTag(repository, 'newtag', 'i1') + self.assertNotDeleted(repository, 'i1', 'i2', 'i3') - # Delete tag 'fourth'. This should remove i2 and i3. - self.deleteTag(repository, 'fourth') - self.assertDeleted(repository, 'i2', 'i3') - self.assertNotDeleted(repository, 'i1') + # Delete tag 'fourth'. This should remove i2 and i3. + self.deleteTag(repository, 'fourth') + self.assertDeleted(repository, 'i2', 'i3') + self.assertNotDeleted(repository, 'i1') - # Delete tag 'newtag'. This should remove the remaining image. - self.deleteTag(repository, 'newtag') - self.assertDeleted(repository, 'i1') + # Delete tag 'newtag'. This should remove the remaining image. + self.deleteTag(repository, 'newtag') + self.assertDeleted(repository, 'i1') def test_empty_gc(self): - with assert_no_new_dangling_storages(): - repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'], - third=['t1', 't2', 't3'], fourth=['i1', 'f1']) + with assert_no_new_dangling_labels(): + with assert_no_new_dangling_storages(): + repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'], + third=['t1', 't2', 't3'], fourth=['i1', 'f1']) - self.gcNow(repository) - self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1', 'f2') + self.gcNow(repository) + self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1', 'f2') def test_time_machine_no_gc(self): """ Repository has two tags with shared images. Deleting the tag should not remove any images """ - with assert_no_new_dangling_storages(): - repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1']) - self._set_tag_expiration_policy(repository.namespace_user.username, 60*60*24) + with assert_no_new_dangling_labels(): + with assert_no_new_dangling_storages(): + repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1']) + self._set_tag_expiration_policy(repository.namespace_user.username, 60*60*24) - self.deleteTag(repository, 'latest') - self.assertNotDeleted(repository, 'i2', 'i3') - self.assertNotDeleted(repository, 'i1', 'f1') + self.deleteTag(repository, 'latest') + self.assertNotDeleted(repository, 'i2', 'i3') + self.assertNotDeleted(repository, 'i1', 'f1') def test_time_machine_gc(self): """ Repository has two tags with shared images. Deleting the second tag should cause the images for the first deleted tag to gc. """ - with assert_no_new_dangling_storages(): - repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1']) + with assert_no_new_dangling_labels(): + with assert_no_new_dangling_storages(): + repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1']) - self._set_tag_expiration_policy(repository.namespace_user.username, 1) + self._set_tag_expiration_policy(repository.namespace_user.username, 1) - self.deleteTag(repository, 'latest') - self.assertNotDeleted(repository, 'i2', 'i3') - self.assertNotDeleted(repository, 'i1', 'f1') + self.deleteTag(repository, 'latest') + self.assertNotDeleted(repository, 'i2', 'i3') + self.assertNotDeleted(repository, 'i1', 'f1') - time.sleep(2) + time.sleep(2) - self.deleteTag(repository, 'other') # This will cause the images associated with latest to gc - self.assertDeleted(repository, 'i2', 'i3') - self.assertNotDeleted(repository, 'i1', 'f1') + self.deleteTag(repository, 'other') # This will cause the images associated with latest to gc + self.assertDeleted(repository, 'i2', 'i3') + self.assertNotDeleted(repository, 'i1', 'f1') def test_manifest_gc(self): - with assert_no_new_dangling_storages(): - repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1']) - _generate_and_store_manifest(ADMIN_ACCESS_USER, REPO, 'latest') + with assert_no_new_dangling_labels(): + with assert_no_new_dangling_storages(): + repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1']) + _generate_and_store_manifest(ADMIN_ACCESS_USER, REPO, 'latest') - self._set_tag_expiration_policy(repository.namespace_user.username, 0) + self._set_tag_expiration_policy(repository.namespace_user.username, 0) - self.deleteTag(repository, 'latest') - self.assertDeleted(repository, 'i2', 'i3') + self.deleteTag(repository, 'latest') + self.assertDeleted(repository, 'i2', 'i3') if __name__ == '__main__':