diff --git a/data/model/repository.py b/data/model/repository.py index 75b4b65c7..a1b8196e1 100644 --- a/data/model/repository.py +++ b/data/model/repository.py @@ -44,6 +44,12 @@ def get_repository(namespace_name, repository_name): def purge_repository(namespace_name, repository_name): + """ Completely delete all traces of the repository. Will return True upon + complete success, and False upon partial or total failure. Garbage + collection is incremental and repeatable, so this return value does + not need to be checked or responded to. + """ + repo = _basequery.get_existing_repository(namespace_name, repository_name) # Delete all tags to allow gc to reclaim storage @@ -57,12 +63,18 @@ def purge_repository(namespace_name, repository_name): unreferenced_candidates = set(img[0] for img in unreferenced_image_q.tuples()) # Gc to remove the images and storage - garbage_collect_repo(repo, previously_referenced | unreferenced_candidates) + all_repo_images = previously_referenced | unreferenced_candidates + successful_gc = garbage_collect_repo(repo, all_repo_images) + + if not successful_gc: + return False # Delete the rest of the repository metadata fetched = _basequery.get_existing_repository(namespace_name, repository_name) fetched.delete_instance(recursive=True, delete_nullable=False) + return True + @ttl_cache(maxsize=1, ttl=600) def _get_gc_expiration_policies(): @@ -112,6 +124,13 @@ def find_repository_with_garbage(limit_to_gc_policy_s): def garbage_collect_repo(repo, extra_candidate_set=None): + """ Garbage collect the specified repository object. This will remove all + images, derived images, and other associated metadata, for images which + are no longer referenced by a tag or another image which is itself + tagged. Returns True if garbage collection was completed without error + and False otherwise. Retries are safe and work incrementally, so this + return value does not need to be checked or handled. + """ logger.debug('Garbage collecting repository %s', repo.id) storage_id_whitelist = set() @@ -122,7 +141,7 @@ def garbage_collect_repo(repo, extra_candidate_set=None): if not len(candidate_orphan_image_set): logger.debug('No candidate images for GC for repo: %s', repo.id) - return + return True candidates_orphans = list(candidate_orphan_image_set) @@ -190,18 +209,20 @@ def garbage_collect_repo(repo, extra_candidate_set=None): .execute()) except IntegrityError: logger.info('Could not GC derived images %s; will try again soon', to_remove) - return + return False try: Image.delete().where(Image.id << to_remove).execute() except IntegrityError: logger.info('Could not GC images %s; will try again soon', to_remove) - return + return False if len(to_remove) > 0: logger.info('Garbage collecting storage for images: %s', to_remove) storage.garbage_collect_storage(storage_id_whitelist) + return True + def star_repository(user, repository): """ Stars a repository. """ diff --git a/test/test_gc.py b/test/test_gc.py index b313f3024..c366a5a83 100644 --- a/test/test_gc.py +++ b/test/test_gc.py @@ -1,13 +1,13 @@ import unittest import time +from contextlib import contextmanager from playhouse.test_utils import assert_query_count from app import app, storage from initdb import setup_database_for_testing, finished_database_for_testing from data import model, database from data.database import Image, ImageStorage, DerivedStorageForImage, Label, TagManifestLabel -from endpoints.v2.manifest import _generate_and_store_manifest ADMIN_ACCESS_USER = 'devtable' @@ -16,48 +16,6 @@ PUBLIC_USER = 'public' REPO = 'somerepo' -class assert_no_new_dangling_labels(object): - """ Specialized assertion for ensuring that GC cleans up all labels. - """ - def __init__(self): - self.existing_count = 0 - - def _get_dangling_count(self): - label_ids = set([current.id for current in Label.select()]) - referenced_by_manifest = set([mlabel.label_id for mlabel in TagManifestLabel.select()]) - return len(label_ids - referenced_by_manifest) - - def __enter__(self): - self.existing_count = self._get_dangling_count() - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - updated_count = self._get_dangling_count() - assert updated_count == self.existing_count - - -class assert_no_new_dangling_storages(object): - """ Specialized assertion for ensuring that GC cleans up all dangling storages. - """ - def __init__(self): - self.existing_count = 0 - - def _get_dangling_count(self): - storage_ids = set([current.id for current in ImageStorage.select()]) - referneced_by_image = set([image.storage_id for image in Image.select()]) - referenced_by_derived = set([derived.derivative_id for derived in DerivedStorageForImage.select()]) - - return len(storage_ids - referneced_by_image - referenced_by_derived) - - def __enter__(self): - self.existing_count = self._get_dangling_count() - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - updated_count = self._get_dangling_count() - assert updated_count == self.existing_count - - class TestGarbageCollection(unittest.TestCase): @staticmethod def _set_tag_expiration_policy(namespace, expiration_s): @@ -78,7 +36,8 @@ class TestGarbageCollection(unittest.TestCase): finished_database_for_testing(self) self.ctx.__exit__(True, None, None) - def createImage(self, docker_image_id, repository_obj, username): + @staticmethod + def createImage(docker_image_id, repository_obj, username): preferred = storage.preferred_locations[0] image = model.image.find_create_or_link_image(docker_image_id, repository_obj, username, {}, preferred) @@ -91,10 +50,9 @@ class TestGarbageCollection(unittest.TestCase): # Add some torrent info. try: + database.TorrentInfo.get(storage=image.storage) + except database.TorrentInfo.DoesNotExist: model.storage.save_torrent_info(image.storage, 1, 'helloworld') - model.storage.save_torrent_info(image.storage, 2, 'helloworlds!') - except: - pass # Add some additional placements to the image. for location_name in ['local_eu']: @@ -144,17 +102,17 @@ class TestGarbageCollection(unittest.TestCase): return repo def gcNow(self, repository): - model.repository.garbage_collect_repo(repository) + self.assertTrue(model.repository.garbage_collect_repo(repository)) def deleteTag(self, repository, tag, perform_gc=True): model.tag.delete_tag(repository.namespace_user.username, repository.name, tag) if perform_gc: - model.repository.garbage_collect_repo(repository) + self.assertTrue(model.repository.garbage_collect_repo(repository)) def moveTag(self, repository, tag, docker_image_id): model.tag.create_or_update_tag(repository.namespace_user.username, repository.name, tag, - docker_image_id) - model.repository.garbage_collect_repo(repository) + docker_image_id) + self.assertTrue(model.repository.garbage_collect_repo(repository)) def assertNotDeleted(self, repository, *args): for docker_image_id in args: @@ -172,14 +130,43 @@ class TestGarbageCollection(unittest.TestCase): self.fail('Expected image %s to be deleted' % docker_image_id) + @staticmethod + def _get_dangling_storage_count(): + storage_ids = set([current.id for current in ImageStorage.select()]) + referenced_by_image = set([image.storage_id for image in Image.select()]) + referenced_by_derived = set([derived.derivative_id + for derived in DerivedStorageForImage.select()]) + + return len(storage_ids - referenced_by_image - referenced_by_derived) + + @staticmethod + def _get_dangling_label_count(): + label_ids = set([current.id for current in Label.select()]) + referenced_by_manifest = set([mlabel.label_id for mlabel in TagManifestLabel.select()]) + return len(label_ids - referenced_by_manifest) + + @contextmanager + def assert_no_new_dangling_storages_or_labels(self): + """ Specialized assertion for ensuring that GC cleans up all dangling storages + and labels. + """ + # TODO: Consider also asserting the number of DB queries being performed. + existing_storage_count = self._get_dangling_storage_count() + existing_label_count = self._get_dangling_label_count() + yield + updated_storage_count = self._get_dangling_storage_count() + self.assertEqual(updated_storage_count, existing_storage_count) + + updated_label_count = self._get_dangling_label_count() + self.assertEqual(updated_label_count, existing_label_count) def test_has_garbage(self): """ Remove all existing repositories, then add one without garbage, check, then add one with garbage, and check again. """ # Delete all existing repos. - for repo in database.Repository.select(): - model.repository.purge_repository(repo.namespace_user.username, repo.name) + for repo in database.Repository.select().order_by(database.Repository.id): + self.assertTrue(model.repository.purge_repository(repo.namespace_user.username, repo.name)) # Change the time machine expiration on the namespace. (database.User @@ -211,191 +198,158 @@ class TestGarbageCollection(unittest.TestCase): self.assertEquals(REPO, repository.name) # GC the repository. - model.repository.garbage_collect_repo(repository) + self.assertTrue(model.repository.garbage_collect_repo(repository)) # There should now be no repositories with garbage. self.assertIsNone(model.repository.find_repository_with_garbage(0)) - def test_find_garbage_policy_functions(self): with assert_query_count(1): one_policy = model.repository.get_random_gc_policy() all_policies = model.repository._get_gc_expiration_policies() self.assertIn(one_policy, all_policies) - def test_one_tag(self): """ Create a repository with a single tag, then remove that tag and verify that the repository is now empty. """ - with assert_no_new_dangling_labels(): - with assert_no_new_dangling_storages(): - repository = self.createRepository(latest=['i1', 'i2', 'i3']) - self.deleteTag(repository, 'latest') - self.assertDeleted(repository, 'i1', 'i2', 'i3') - + with self.assert_no_new_dangling_storages_or_labels(): + repository = self.createRepository(latest=['i1', 'i2', 'i3']) + self.deleteTag(repository, 'latest') + self.assertDeleted(repository, 'i1', 'i2', 'i3') def test_two_tags_unshared_images(self): """ Repository has two tags with no shared images between them. """ - with assert_no_new_dangling_labels(): - with assert_no_new_dangling_storages(): - repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['f1', 'f2']) - self.deleteTag(repository, 'latest') - self.assertDeleted(repository, 'i1', 'i2', 'i3') - self.assertNotDeleted(repository, 'f1', 'f2') - + with self.assert_no_new_dangling_storages_or_labels(): + repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['f1', 'f2']) + self.deleteTag(repository, 'latest') + self.assertDeleted(repository, 'i1', 'i2', 'i3') + self.assertNotDeleted(repository, 'f1', 'f2') def test_two_tags_shared_images(self): """ Repository has two tags with shared images. Deleting the tag should only remove the unshared images. """ - with assert_no_new_dangling_labels(): - with assert_no_new_dangling_storages(): - repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1']) - self.deleteTag(repository, 'latest') - self.assertDeleted(repository, 'i2', 'i3') - self.assertNotDeleted(repository, 'i1', 'f1') - + with self.assert_no_new_dangling_storages_or_labels(): + repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1']) + self.deleteTag(repository, 'latest') + self.assertDeleted(repository, 'i2', 'i3') + self.assertNotDeleted(repository, 'i1', 'f1') def test_unrelated_repositories(self): """ Two repositories with different images. Removing the tag from one leaves the other's images intact. """ - with assert_no_new_dangling_labels(): - with assert_no_new_dangling_storages(): - repository1 = self.createRepository(latest=['i1', 'i2', 'i3'], name='repo1') - repository2 = self.createRepository(latest=['j1', 'j2', 'j3'], name='repo2') + with self.assert_no_new_dangling_storages_or_labels(): + repository1 = self.createRepository(latest=['i1', 'i2', 'i3'], name='repo1') + repository2 = self.createRepository(latest=['j1', 'j2', 'j3'], name='repo2') - self.deleteTag(repository1, 'latest') - - self.assertDeleted(repository1, 'i1', 'i2', 'i3') - self.assertNotDeleted(repository2, 'j1', 'j2', 'j3') + self.deleteTag(repository1, 'latest') + self.assertDeleted(repository1, 'i1', 'i2', 'i3') + self.assertNotDeleted(repository2, 'j1', 'j2', 'j3') def test_related_repositories(self): """ Two repositories with shared images. Removing the tag from one leaves the other's images intact. """ - with assert_no_new_dangling_labels(): - with assert_no_new_dangling_storages(): - repository1 = self.createRepository(latest=['i1', 'i2', 'i3'], name='repo1') - repository2 = self.createRepository(latest=['i1', 'i2', 'j1'], name='repo2') + with self.assert_no_new_dangling_storages_or_labels(): + repository1 = self.createRepository(latest=['i1', 'i2', 'i3'], name='repo1') + repository2 = self.createRepository(latest=['i1', 'i2', 'j1'], name='repo2') - self.deleteTag(repository1, 'latest') - - self.assertDeleted(repository1, 'i3') - self.assertNotDeleted(repository2, 'i1', 'i2', 'j1') + self.deleteTag(repository1, 'latest') + self.assertDeleted(repository1, 'i3') + self.assertNotDeleted(repository2, 'i1', 'i2', 'j1') def test_inaccessible_repositories(self): """ Two repositories under different namespaces should result in the images being deleted but not completely removed from the database. """ - with assert_no_new_dangling_labels(): - with assert_no_new_dangling_storages(): - repository1 = self.createRepository(namespace=ADMIN_ACCESS_USER, latest=['i1', 'i2', 'i3']) - repository2 = self.createRepository(namespace=PUBLIC_USER, latest=['i1', 'i2', 'i3']) - - self.deleteTag(repository1, 'latest') - self.assertDeleted(repository1, 'i1', 'i2', 'i3') - self.assertNotDeleted(repository2, 'i1', 'i2', 'i3') + with self.assert_no_new_dangling_storages_or_labels(): + repository1 = self.createRepository(namespace=ADMIN_ACCESS_USER, latest=['i1', 'i2', 'i3']) + repository2 = self.createRepository(namespace=PUBLIC_USER, latest=['i1', 'i2', 'i3']) + self.deleteTag(repository1, 'latest') + self.assertDeleted(repository1, 'i1', 'i2', 'i3') + self.assertNotDeleted(repository2, 'i1', 'i2', 'i3') def test_multiple_shared_images(self): """ Repository has multiple tags with shared images. Selectively deleting the tags, and verifying at each step. """ - with assert_no_new_dangling_labels(): - with assert_no_new_dangling_storages(): - repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'], - third=['t1', 't2', 't3'], fourth=['i1', 'f1']) + with self.assert_no_new_dangling_storages_or_labels(): + repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'], + third=['t1', 't2', 't3'], fourth=['i1', 'f1']) - # Delete tag other. Should delete f2, since it is not shared. - self.deleteTag(repository, 'other') - self.assertDeleted(repository, 'f2') - self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1') + # Delete tag other. Should delete f2, since it is not shared. + self.deleteTag(repository, 'other') + self.assertDeleted(repository, 'f2') + self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1') - # Move tag fourth to i3. This should remove f1 since it is no longer referenced. - self.moveTag(repository, 'fourth', 'i3') - self.assertDeleted(repository, 'f1') - self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3') + # Move tag fourth to i3. This should remove f1 since it is no longer referenced. + self.moveTag(repository, 'fourth', 'i3') + self.assertDeleted(repository, 'f1') + self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3') - # Delete tag 'latest'. This should do nothing since fourth is on the same branch. - self.deleteTag(repository, 'latest') - self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3') + # Delete tag 'latest'. This should do nothing since fourth is on the same branch. + self.deleteTag(repository, 'latest') + self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3') - # Delete tag 'third'. This should remove t1->t3. - self.deleteTag(repository, 'third') - self.assertDeleted(repository, 't1', 't2', 't3') - self.assertNotDeleted(repository, 'i1', 'i2', 'i3') + # Delete tag 'third'. This should remove t1->t3. + self.deleteTag(repository, 'third') + self.assertDeleted(repository, 't1', 't2', 't3') + self.assertNotDeleted(repository, 'i1', 'i2', 'i3') - # Add tag to i1. - self.moveTag(repository, 'newtag', 'i1') - self.assertNotDeleted(repository, 'i1', 'i2', 'i3') + # Add tag to i1. + self.moveTag(repository, 'newtag', 'i1') + self.assertNotDeleted(repository, 'i1', 'i2', 'i3') - # Delete tag 'fourth'. This should remove i2 and i3. - self.deleteTag(repository, 'fourth') - self.assertDeleted(repository, 'i2', 'i3') - self.assertNotDeleted(repository, 'i1') - - # Delete tag 'newtag'. This should remove the remaining image. - self.deleteTag(repository, 'newtag') - self.assertDeleted(repository, 'i1') + # Delete tag 'fourth'. This should remove i2 and i3. + self.deleteTag(repository, 'fourth') + self.assertDeleted(repository, 'i2', 'i3') + self.assertNotDeleted(repository, 'i1') + # Delete tag 'newtag'. This should remove the remaining image. + self.deleteTag(repository, 'newtag') + self.assertDeleted(repository, 'i1') def test_empty_gc(self): - with assert_no_new_dangling_labels(): - with assert_no_new_dangling_storages(): - repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'], - third=['t1', 't2', 't3'], fourth=['i1', 'f1']) - - self.gcNow(repository) - self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1', 'f2') + with self.assert_no_new_dangling_storages_or_labels(): + repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'], + third=['t1', 't2', 't3'], fourth=['i1', 'f1']) + self.gcNow(repository) + self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1', 'f2') def test_time_machine_no_gc(self): """ Repository has two tags with shared images. Deleting the tag should not remove any images """ - with assert_no_new_dangling_labels(): - with assert_no_new_dangling_storages(): - repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1']) - self._set_tag_expiration_policy(repository.namespace_user.username, 60*60*24) - - self.deleteTag(repository, 'latest') - self.assertNotDeleted(repository, 'i2', 'i3') - self.assertNotDeleted(repository, 'i1', 'f1') + with self.assert_no_new_dangling_storages_or_labels(): + repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1']) + self._set_tag_expiration_policy(repository.namespace_user.username, 60*60*24) + self.deleteTag(repository, 'latest') + self.assertNotDeleted(repository, 'i2', 'i3') + self.assertNotDeleted(repository, 'i1', 'f1') def test_time_machine_gc(self): """ Repository has two tags with shared images. Deleting the second tag should cause the images for the first deleted tag to gc. """ - with assert_no_new_dangling_labels(): - with assert_no_new_dangling_storages(): - repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1']) + with self.assert_no_new_dangling_storages_or_labels(): + repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1']) - self._set_tag_expiration_policy(repository.namespace_user.username, 1) + self._set_tag_expiration_policy(repository.namespace_user.username, 1) - self.deleteTag(repository, 'latest') - self.assertNotDeleted(repository, 'i2', 'i3') - self.assertNotDeleted(repository, 'i1', 'f1') + self.deleteTag(repository, 'latest') + self.assertNotDeleted(repository, 'i2', 'i3') + self.assertNotDeleted(repository, 'i1', 'f1') - time.sleep(2) + time.sleep(2) - self.deleteTag(repository, 'other') # This will cause the images associated with latest to gc - self.assertDeleted(repository, 'i2', 'i3') - self.assertNotDeleted(repository, 'i1', 'f1') - - - def test_manifest_gc(self): - with assert_no_new_dangling_labels(): - with assert_no_new_dangling_storages(): - repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1']) - _generate_and_store_manifest(ADMIN_ACCESS_USER, REPO, 'latest') - - self._set_tag_expiration_policy(repository.namespace_user.username, 0) - - self.deleteTag(repository, 'latest') - self.assertDeleted(repository, 'i2', 'i3') + self.deleteTag(repository, 'other') # This will cause the images associated with latest to gc + self.assertDeleted(repository, 'i2', 'i3') + self.assertNotDeleted(repository, 'i1', 'f1') if __name__ == '__main__':