import unittest import time from playhouse.test_utils import assert_query_count from app import app, storage from initdb import setup_database_for_testing, finished_database_for_testing from data import model, database from data.database import Image, ImageStorage, DerivedStorageForImage, Label, TagManifestLabel from endpoints.v2.manifest import _generate_and_store_manifest ADMIN_ACCESS_USER = 'devtable' PUBLIC_USER = 'public' REPO = 'somerepo' class assert_no_new_dangling_labels(object): """ Specialized assertion for ensuring that GC cleans up all labels. """ def __init__(self): self.existing_count = 0 def _get_dangling_count(self): label_ids = set([current.id for current in Label.select()]) referenced_by_manifest = set([mlabel.label_id for mlabel in TagManifestLabel.select()]) return len(label_ids - referenced_by_manifest) def __enter__(self): self.existing_count = self._get_dangling_count() return self def __exit__(self, exc_type, exc_val, exc_tb): updated_count = self._get_dangling_count() assert updated_count == self.existing_count class assert_no_new_dangling_storages(object): """ Specialized assertion for ensuring that GC cleans up all dangling storages. """ def __init__(self): self.existing_count = 0 def _get_dangling_count(self): storage_ids = set([current.id for current in ImageStorage.select()]) referneced_by_image = set([image.storage_id for image in Image.select()]) referenced_by_derived = set([derived.derivative_id for derived in DerivedStorageForImage.select()]) return len(storage_ids - referneced_by_image - referenced_by_derived) def __enter__(self): self.existing_count = self._get_dangling_count() return self def __exit__(self, exc_type, exc_val, exc_tb): updated_count = self._get_dangling_count() assert updated_count == self.existing_count class TestGarbageCollection(unittest.TestCase): @staticmethod def _set_tag_expiration_policy(namespace, expiration_s): namespace_user = model.user.get_user(namespace) model.user.change_user_tag_expiration(namespace_user, expiration_s) def setUp(self): setup_database_for_testing(self) self._set_tag_expiration_policy(ADMIN_ACCESS_USER, 0) self._set_tag_expiration_policy(PUBLIC_USER, 0) self.app = app.test_client() self.ctx = app.test_request_context() self.ctx.__enter__() def tearDown(self): finished_database_for_testing(self) self.ctx.__exit__(True, None, None) def createImage(self, docker_image_id, repository_obj, username): preferred = storage.preferred_locations[0] image = model.image.find_create_or_link_image(docker_image_id, repository_obj, username, {}, preferred) image.storage.uploading = False image.storage.save() # Create derived images as well. model.image.find_or_create_derived_storage(image, 'squash', preferred) model.image.find_or_create_derived_storage(image, 'aci', preferred) # Add some torrent info. try: model.storage.save_torrent_info(image.storage, 1, 'helloworld') model.storage.save_torrent_info(image.storage, 2, 'helloworlds!') except: pass # Add some additional placements to the image. for location_name in ['local_eu']: location = database.ImageStorageLocation.get(name=location_name) try: database.ImageStoragePlacement.get(location=location, storage=image.storage) except: continue database.ImageStoragePlacement.create(location=location, storage=image.storage) return image.storage def createRepository(self, namespace=ADMIN_ACCESS_USER, name=REPO, **kwargs): user = model.user.get_user(namespace) repo = model.repository.create_repository(namespace, name, user) # Populate the repository with the tags. image_map = {} for tag_name in kwargs: image_ids = kwargs[tag_name] parent = None for image_id in image_ids: if not image_id in image_map: image_map[image_id] = self.createImage(image_id, repo, namespace) v1_metadata = { 'id': image_id, } if parent is not None: v1_metadata['parent'] = parent.docker_image_id # Set the ancestors for the image. parent = model.image.set_image_metadata(image_id, namespace, name, '', '', '', v1_metadata, parent=parent) # Set the tag for the image. tag_manifest, _ = model.tag.store_tag_manifest(namespace, name, tag_name, image_ids[-1], 'sha:someshahere', '{}') # Add some labels to the tag. model.label.create_manifest_label(tag_manifest, 'foo', 'bar', 'manifest') model.label.create_manifest_label(tag_manifest, 'meh', 'grah', 'manifest') return repo def gcNow(self, repository): model.repository.garbage_collect_repository(repository.namespace_user.username, repository.name) def deleteTag(self, repository, tag, perform_gc=True): model.tag.delete_tag(repository.namespace_user.username, repository.name, tag) if perform_gc: model.repository.garbage_collect_repository(repository.namespace_user.username, repository.name) def moveTag(self, repository, tag, docker_image_id): model.tag.create_or_update_tag(repository.namespace_user.username, repository.name, tag, docker_image_id) model.repository.garbage_collect_repository(repository.namespace_user.username, repository.name) def assertNotDeleted(self, repository, *args): for docker_image_id in args: self.assertTrue(bool(model.image.get_image_by_id(repository.namespace_user.username, repository.name, docker_image_id))) def assertDeleted(self, repository, *args): for docker_image_id in args: try: # Verify the image is missing when accessed by the repository. model.image.get_image_by_id(repository.namespace_user.username, repository.name, docker_image_id) except model.DataModelException: return self.fail('Expected image %s to be deleted' % docker_image_id) def test_has_garbage(self): """ Remove all existing repositories, then add one without garbage, check, then add one with garbage, and check again. """ # Delete all existing repos. for repo in database.Repository.select(): model.repository.purge_repository(repo.namespace_user.username, repo.name) # Change the time machine expiration on the namespace. (database.User.update(removed_tag_expiration_s=1000000000) .where(database.User.username == ADMIN_ACCESS_USER) .execute()) # Create a repository without any garbage. repository = self.createRepository(latest=['i1', 'i2', 'i3']) # Ensure that no repositories are returned by the has garbage check. self.assertIsNone(model.repository.find_repository_with_garbage(1000000000)) # Delete a tag. self.deleteTag(repository, 'latest', perform_gc=False) # There should still not be any repositories with garbage, due to time machine. self.assertIsNone(model.repository.find_repository_with_garbage(1000000000)) # Change the time machine expiration on the namespace. (database.User.update(removed_tag_expiration_s=0) .where(database.User.username == ADMIN_ACCESS_USER) .execute()) # Now we should find the repository for GC. repository = model.repository.find_repository_with_garbage(0) self.assertIsNotNone(repository) self.assertEquals(REPO, repository.name) # GC the repository. model.repository.garbage_collect_repository(repository.namespace_user.username, repository.name) # There should now be no repositories with garbage. self.assertIsNone(model.repository.find_repository_with_garbage(0)) def test_find_garbage_policy_functions(self): with assert_query_count(1): one_policy = model.repository.get_random_gc_policy() all_policies = model.repository._get_gc_expiration_policies() self.assertIn(one_policy, all_policies) def test_one_tag(self): """ Create a repository with a single tag, then remove that tag and verify that the repository is now empty. """ with assert_no_new_dangling_labels(): with assert_no_new_dangling_storages(): repository = self.createRepository(latest=['i1', 'i2', 'i3']) self.deleteTag(repository, 'latest') self.assertDeleted(repository, 'i1', 'i2', 'i3') def test_two_tags_unshared_images(self): """ Repository has two tags with no shared images between them. """ with assert_no_new_dangling_labels(): with assert_no_new_dangling_storages(): repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['f1', 'f2']) self.deleteTag(repository, 'latest') self.assertDeleted(repository, 'i1', 'i2', 'i3') self.assertNotDeleted(repository, 'f1', 'f2') def test_two_tags_shared_images(self): """ Repository has two tags with shared images. Deleting the tag should only remove the unshared images. """ with assert_no_new_dangling_labels(): with assert_no_new_dangling_storages(): repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1']) self.deleteTag(repository, 'latest') self.assertDeleted(repository, 'i2', 'i3') self.assertNotDeleted(repository, 'i1', 'f1') def test_unrelated_repositories(self): """ Two repositories with different images. Removing the tag from one leaves the other's images intact. """ with assert_no_new_dangling_labels(): with assert_no_new_dangling_storages(): repository1 = self.createRepository(latest=['i1', 'i2', 'i3'], name='repo1') repository2 = self.createRepository(latest=['j1', 'j2', 'j3'], name='repo2') self.deleteTag(repository1, 'latest') self.assertDeleted(repository1, 'i1', 'i2', 'i3') self.assertNotDeleted(repository2, 'j1', 'j2', 'j3') def test_related_repositories(self): """ Two repositories with shared images. Removing the tag from one leaves the other's images intact. """ with assert_no_new_dangling_labels(): with assert_no_new_dangling_storages(): repository1 = self.createRepository(latest=['i1', 'i2', 'i3'], name='repo1') repository2 = self.createRepository(latest=['i1', 'i2', 'j1'], name='repo2') self.deleteTag(repository1, 'latest') self.assertDeleted(repository1, 'i3') self.assertNotDeleted(repository2, 'i1', 'i2', 'j1') def test_inaccessible_repositories(self): """ Two repositories under different namespaces should result in the images being deleted but not completely removed from the database. """ with assert_no_new_dangling_labels(): with assert_no_new_dangling_storages(): repository1 = self.createRepository(namespace=ADMIN_ACCESS_USER, latest=['i1', 'i2', 'i3']) repository2 = self.createRepository(namespace=PUBLIC_USER, latest=['i1', 'i2', 'i3']) self.deleteTag(repository1, 'latest') self.assertDeleted(repository1, 'i1', 'i2', 'i3') self.assertNotDeleted(repository2, 'i1', 'i2', 'i3') def test_multiple_shared_images(self): """ Repository has multiple tags with shared images. Selectively deleting the tags, and verifying at each step. """ with assert_no_new_dangling_labels(): with assert_no_new_dangling_storages(): repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'], third=['t1', 't2', 't3'], fourth=['i1', 'f1']) # Delete tag other. Should delete f2, since it is not shared. self.deleteTag(repository, 'other') self.assertDeleted(repository, 'f2') self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1') # Move tag fourth to i3. This should remove f1 since it is no longer referenced. self.moveTag(repository, 'fourth', 'i3') self.assertDeleted(repository, 'f1') self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3') # Delete tag 'latest'. This should do nothing since fourth is on the same branch. self.deleteTag(repository, 'latest') self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3') # Delete tag 'third'. This should remove t1->t3. self.deleteTag(repository, 'third') self.assertDeleted(repository, 't1', 't2', 't3') self.assertNotDeleted(repository, 'i1', 'i2', 'i3') # Add tag to i1. self.moveTag(repository, 'newtag', 'i1') self.assertNotDeleted(repository, 'i1', 'i2', 'i3') # Delete tag 'fourth'. This should remove i2 and i3. self.deleteTag(repository, 'fourth') self.assertDeleted(repository, 'i2', 'i3') self.assertNotDeleted(repository, 'i1') # Delete tag 'newtag'. This should remove the remaining image. self.deleteTag(repository, 'newtag') self.assertDeleted(repository, 'i1') def test_empty_gc(self): with assert_no_new_dangling_labels(): with assert_no_new_dangling_storages(): repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'], third=['t1', 't2', 't3'], fourth=['i1', 'f1']) self.gcNow(repository) self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1', 'f2') def test_time_machine_no_gc(self): """ Repository has two tags with shared images. Deleting the tag should not remove any images """ with assert_no_new_dangling_labels(): with assert_no_new_dangling_storages(): repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1']) self._set_tag_expiration_policy(repository.namespace_user.username, 60*60*24) self.deleteTag(repository, 'latest') self.assertNotDeleted(repository, 'i2', 'i3') self.assertNotDeleted(repository, 'i1', 'f1') def test_time_machine_gc(self): """ Repository has two tags with shared images. Deleting the second tag should cause the images for the first deleted tag to gc. """ with assert_no_new_dangling_labels(): with assert_no_new_dangling_storages(): repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1']) self._set_tag_expiration_policy(repository.namespace_user.username, 1) self.deleteTag(repository, 'latest') self.assertNotDeleted(repository, 'i2', 'i3') self.assertNotDeleted(repository, 'i1', 'f1') time.sleep(2) self.deleteTag(repository, 'other') # This will cause the images associated with latest to gc self.assertDeleted(repository, 'i2', 'i3') self.assertNotDeleted(repository, 'i1', 'f1') def test_manifest_gc(self): with assert_no_new_dangling_labels(): with assert_no_new_dangling_storages(): repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1']) _generate_and_store_manifest(ADMIN_ACCESS_USER, REPO, 'latest') self._set_tag_expiration_policy(repository.namespace_user.username, 0) self.deleteTag(repository, 'latest') self.assertDeleted(repository, 'i2', 'i3') if __name__ == '__main__': unittest.main()