Improve the garbage collection tests.
This commit is contained in:
parent
584a5a7ddd
commit
cf83c9a16a
2 changed files with 147 additions and 172 deletions
|
@ -44,6 +44,12 @@ def get_repository(namespace_name, repository_name):
|
|||
|
||||
|
||||
def purge_repository(namespace_name, repository_name):
|
||||
""" Completely delete all traces of the repository. Will return True upon
|
||||
complete success, and False upon partial or total failure. Garbage
|
||||
collection is incremental and repeatable, so this return value does
|
||||
not need to be checked or responded to.
|
||||
"""
|
||||
|
||||
repo = _basequery.get_existing_repository(namespace_name, repository_name)
|
||||
|
||||
# Delete all tags to allow gc to reclaim storage
|
||||
|
@ -57,12 +63,18 @@ def purge_repository(namespace_name, repository_name):
|
|||
unreferenced_candidates = set(img[0] for img in unreferenced_image_q.tuples())
|
||||
|
||||
# Gc to remove the images and storage
|
||||
garbage_collect_repo(repo, previously_referenced | unreferenced_candidates)
|
||||
all_repo_images = previously_referenced | unreferenced_candidates
|
||||
successful_gc = garbage_collect_repo(repo, all_repo_images)
|
||||
|
||||
if not successful_gc:
|
||||
return False
|
||||
|
||||
# Delete the rest of the repository metadata
|
||||
fetched = _basequery.get_existing_repository(namespace_name, repository_name)
|
||||
fetched.delete_instance(recursive=True, delete_nullable=False)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
@ttl_cache(maxsize=1, ttl=600)
|
||||
def _get_gc_expiration_policies():
|
||||
|
@ -112,6 +124,13 @@ def find_repository_with_garbage(limit_to_gc_policy_s):
|
|||
|
||||
|
||||
def garbage_collect_repo(repo, extra_candidate_set=None):
|
||||
""" Garbage collect the specified repository object. This will remove all
|
||||
images, derived images, and other associated metadata, for images which
|
||||
are no longer referenced by a tag or another image which is itself
|
||||
tagged. Returns True if garbage collection was completed without error
|
||||
and False otherwise. Retries are safe and work incrementally, so this
|
||||
return value does not need to be checked or handled.
|
||||
"""
|
||||
logger.debug('Garbage collecting repository %s', repo.id)
|
||||
|
||||
storage_id_whitelist = set()
|
||||
|
@ -122,7 +141,7 @@ def garbage_collect_repo(repo, extra_candidate_set=None):
|
|||
|
||||
if not len(candidate_orphan_image_set):
|
||||
logger.debug('No candidate images for GC for repo: %s', repo.id)
|
||||
return
|
||||
return True
|
||||
|
||||
candidates_orphans = list(candidate_orphan_image_set)
|
||||
|
||||
|
@ -190,18 +209,20 @@ def garbage_collect_repo(repo, extra_candidate_set=None):
|
|||
.execute())
|
||||
except IntegrityError:
|
||||
logger.info('Could not GC derived images %s; will try again soon', to_remove)
|
||||
return
|
||||
return False
|
||||
|
||||
try:
|
||||
Image.delete().where(Image.id << to_remove).execute()
|
||||
except IntegrityError:
|
||||
logger.info('Could not GC images %s; will try again soon', to_remove)
|
||||
return
|
||||
return False
|
||||
|
||||
if len(to_remove) > 0:
|
||||
logger.info('Garbage collecting storage for images: %s', to_remove)
|
||||
storage.garbage_collect_storage(storage_id_whitelist)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def star_repository(user, repository):
|
||||
""" Stars a repository. """
|
||||
|
|
146
test/test_gc.py
146
test/test_gc.py
|
@ -1,13 +1,13 @@
|
|||
import unittest
|
||||
import time
|
||||
|
||||
from contextlib import contextmanager
|
||||
from playhouse.test_utils import assert_query_count
|
||||
|
||||
from app import app, storage
|
||||
from initdb import setup_database_for_testing, finished_database_for_testing
|
||||
from data import model, database
|
||||
from data.database import Image, ImageStorage, DerivedStorageForImage, Label, TagManifestLabel
|
||||
from endpoints.v2.manifest import _generate_and_store_manifest
|
||||
|
||||
|
||||
ADMIN_ACCESS_USER = 'devtable'
|
||||
|
@ -16,48 +16,6 @@ PUBLIC_USER = 'public'
|
|||
REPO = 'somerepo'
|
||||
|
||||
|
||||
class assert_no_new_dangling_labels(object):
|
||||
""" Specialized assertion for ensuring that GC cleans up all labels.
|
||||
"""
|
||||
def __init__(self):
|
||||
self.existing_count = 0
|
||||
|
||||
def _get_dangling_count(self):
|
||||
label_ids = set([current.id for current in Label.select()])
|
||||
referenced_by_manifest = set([mlabel.label_id for mlabel in TagManifestLabel.select()])
|
||||
return len(label_ids - referenced_by_manifest)
|
||||
|
||||
def __enter__(self):
|
||||
self.existing_count = self._get_dangling_count()
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
updated_count = self._get_dangling_count()
|
||||
assert updated_count == self.existing_count
|
||||
|
||||
|
||||
class assert_no_new_dangling_storages(object):
|
||||
""" Specialized assertion for ensuring that GC cleans up all dangling storages.
|
||||
"""
|
||||
def __init__(self):
|
||||
self.existing_count = 0
|
||||
|
||||
def _get_dangling_count(self):
|
||||
storage_ids = set([current.id for current in ImageStorage.select()])
|
||||
referneced_by_image = set([image.storage_id for image in Image.select()])
|
||||
referenced_by_derived = set([derived.derivative_id for derived in DerivedStorageForImage.select()])
|
||||
|
||||
return len(storage_ids - referneced_by_image - referenced_by_derived)
|
||||
|
||||
def __enter__(self):
|
||||
self.existing_count = self._get_dangling_count()
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
updated_count = self._get_dangling_count()
|
||||
assert updated_count == self.existing_count
|
||||
|
||||
|
||||
class TestGarbageCollection(unittest.TestCase):
|
||||
@staticmethod
|
||||
def _set_tag_expiration_policy(namespace, expiration_s):
|
||||
|
@ -78,7 +36,8 @@ class TestGarbageCollection(unittest.TestCase):
|
|||
finished_database_for_testing(self)
|
||||
self.ctx.__exit__(True, None, None)
|
||||
|
||||
def createImage(self, docker_image_id, repository_obj, username):
|
||||
@staticmethod
|
||||
def createImage(docker_image_id, repository_obj, username):
|
||||
preferred = storage.preferred_locations[0]
|
||||
image = model.image.find_create_or_link_image(docker_image_id, repository_obj, username, {},
|
||||
preferred)
|
||||
|
@ -91,10 +50,9 @@ class TestGarbageCollection(unittest.TestCase):
|
|||
|
||||
# Add some torrent info.
|
||||
try:
|
||||
database.TorrentInfo.get(storage=image.storage)
|
||||
except database.TorrentInfo.DoesNotExist:
|
||||
model.storage.save_torrent_info(image.storage, 1, 'helloworld')
|
||||
model.storage.save_torrent_info(image.storage, 2, 'helloworlds!')
|
||||
except:
|
||||
pass
|
||||
|
||||
# Add some additional placements to the image.
|
||||
for location_name in ['local_eu']:
|
||||
|
@ -144,17 +102,17 @@ class TestGarbageCollection(unittest.TestCase):
|
|||
return repo
|
||||
|
||||
def gcNow(self, repository):
|
||||
model.repository.garbage_collect_repo(repository)
|
||||
self.assertTrue(model.repository.garbage_collect_repo(repository))
|
||||
|
||||
def deleteTag(self, repository, tag, perform_gc=True):
|
||||
model.tag.delete_tag(repository.namespace_user.username, repository.name, tag)
|
||||
if perform_gc:
|
||||
model.repository.garbage_collect_repo(repository)
|
||||
self.assertTrue(model.repository.garbage_collect_repo(repository))
|
||||
|
||||
def moveTag(self, repository, tag, docker_image_id):
|
||||
model.tag.create_or_update_tag(repository.namespace_user.username, repository.name, tag,
|
||||
docker_image_id)
|
||||
model.repository.garbage_collect_repo(repository)
|
||||
self.assertTrue(model.repository.garbage_collect_repo(repository))
|
||||
|
||||
def assertNotDeleted(self, repository, *args):
|
||||
for docker_image_id in args:
|
||||
|
@ -172,14 +130,43 @@ class TestGarbageCollection(unittest.TestCase):
|
|||
|
||||
self.fail('Expected image %s to be deleted' % docker_image_id)
|
||||
|
||||
@staticmethod
|
||||
def _get_dangling_storage_count():
|
||||
storage_ids = set([current.id for current in ImageStorage.select()])
|
||||
referenced_by_image = set([image.storage_id for image in Image.select()])
|
||||
referenced_by_derived = set([derived.derivative_id
|
||||
for derived in DerivedStorageForImage.select()])
|
||||
|
||||
return len(storage_ids - referenced_by_image - referenced_by_derived)
|
||||
|
||||
@staticmethod
|
||||
def _get_dangling_label_count():
|
||||
label_ids = set([current.id for current in Label.select()])
|
||||
referenced_by_manifest = set([mlabel.label_id for mlabel in TagManifestLabel.select()])
|
||||
return len(label_ids - referenced_by_manifest)
|
||||
|
||||
@contextmanager
|
||||
def assert_no_new_dangling_storages_or_labels(self):
|
||||
""" Specialized assertion for ensuring that GC cleans up all dangling storages
|
||||
and labels.
|
||||
"""
|
||||
# TODO: Consider also asserting the number of DB queries being performed.
|
||||
existing_storage_count = self._get_dangling_storage_count()
|
||||
existing_label_count = self._get_dangling_label_count()
|
||||
yield
|
||||
updated_storage_count = self._get_dangling_storage_count()
|
||||
self.assertEqual(updated_storage_count, existing_storage_count)
|
||||
|
||||
updated_label_count = self._get_dangling_label_count()
|
||||
self.assertEqual(updated_label_count, existing_label_count)
|
||||
|
||||
def test_has_garbage(self):
|
||||
""" Remove all existing repositories, then add one without garbage, check, then add one with
|
||||
garbage, and check again.
|
||||
"""
|
||||
# Delete all existing repos.
|
||||
for repo in database.Repository.select():
|
||||
model.repository.purge_repository(repo.namespace_user.username, repo.name)
|
||||
for repo in database.Repository.select().order_by(database.Repository.id):
|
||||
self.assertTrue(model.repository.purge_repository(repo.namespace_user.username, repo.name))
|
||||
|
||||
# Change the time machine expiration on the namespace.
|
||||
(database.User
|
||||
|
@ -211,57 +198,48 @@ class TestGarbageCollection(unittest.TestCase):
|
|||
self.assertEquals(REPO, repository.name)
|
||||
|
||||
# GC the repository.
|
||||
model.repository.garbage_collect_repo(repository)
|
||||
self.assertTrue(model.repository.garbage_collect_repo(repository))
|
||||
|
||||
# There should now be no repositories with garbage.
|
||||
self.assertIsNone(model.repository.find_repository_with_garbage(0))
|
||||
|
||||
|
||||
def test_find_garbage_policy_functions(self):
|
||||
with assert_query_count(1):
|
||||
one_policy = model.repository.get_random_gc_policy()
|
||||
all_policies = model.repository._get_gc_expiration_policies()
|
||||
self.assertIn(one_policy, all_policies)
|
||||
|
||||
|
||||
def test_one_tag(self):
|
||||
""" Create a repository with a single tag, then remove that tag and verify that the repository
|
||||
is now empty. """
|
||||
with assert_no_new_dangling_labels():
|
||||
with assert_no_new_dangling_storages():
|
||||
with self.assert_no_new_dangling_storages_or_labels():
|
||||
repository = self.createRepository(latest=['i1', 'i2', 'i3'])
|
||||
self.deleteTag(repository, 'latest')
|
||||
self.assertDeleted(repository, 'i1', 'i2', 'i3')
|
||||
|
||||
|
||||
def test_two_tags_unshared_images(self):
|
||||
""" Repository has two tags with no shared images between them. """
|
||||
with assert_no_new_dangling_labels():
|
||||
with assert_no_new_dangling_storages():
|
||||
with self.assert_no_new_dangling_storages_or_labels():
|
||||
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['f1', 'f2'])
|
||||
self.deleteTag(repository, 'latest')
|
||||
self.assertDeleted(repository, 'i1', 'i2', 'i3')
|
||||
self.assertNotDeleted(repository, 'f1', 'f2')
|
||||
|
||||
|
||||
def test_two_tags_shared_images(self):
|
||||
""" Repository has two tags with shared images. Deleting the tag should only remove the
|
||||
unshared images.
|
||||
"""
|
||||
with assert_no_new_dangling_labels():
|
||||
with assert_no_new_dangling_storages():
|
||||
with self.assert_no_new_dangling_storages_or_labels():
|
||||
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1'])
|
||||
self.deleteTag(repository, 'latest')
|
||||
self.assertDeleted(repository, 'i2', 'i3')
|
||||
self.assertNotDeleted(repository, 'i1', 'f1')
|
||||
|
||||
|
||||
def test_unrelated_repositories(self):
|
||||
""" Two repositories with different images. Removing the tag from one leaves the other's
|
||||
images intact.
|
||||
"""
|
||||
with assert_no_new_dangling_labels():
|
||||
with assert_no_new_dangling_storages():
|
||||
with self.assert_no_new_dangling_storages_or_labels():
|
||||
repository1 = self.createRepository(latest=['i1', 'i2', 'i3'], name='repo1')
|
||||
repository2 = self.createRepository(latest=['j1', 'j2', 'j3'], name='repo2')
|
||||
|
||||
|
@ -270,13 +248,11 @@ class TestGarbageCollection(unittest.TestCase):
|
|||
self.assertDeleted(repository1, 'i1', 'i2', 'i3')
|
||||
self.assertNotDeleted(repository2, 'j1', 'j2', 'j3')
|
||||
|
||||
|
||||
def test_related_repositories(self):
|
||||
""" Two repositories with shared images. Removing the tag from one leaves the other's
|
||||
images intact.
|
||||
"""
|
||||
with assert_no_new_dangling_labels():
|
||||
with assert_no_new_dangling_storages():
|
||||
with self.assert_no_new_dangling_storages_or_labels():
|
||||
repository1 = self.createRepository(latest=['i1', 'i2', 'i3'], name='repo1')
|
||||
repository2 = self.createRepository(latest=['i1', 'i2', 'j1'], name='repo2')
|
||||
|
||||
|
@ -285,13 +261,11 @@ class TestGarbageCollection(unittest.TestCase):
|
|||
self.assertDeleted(repository1, 'i3')
|
||||
self.assertNotDeleted(repository2, 'i1', 'i2', 'j1')
|
||||
|
||||
|
||||
def test_inaccessible_repositories(self):
|
||||
""" Two repositories under different namespaces should result in the images being deleted
|
||||
but not completely removed from the database.
|
||||
"""
|
||||
with assert_no_new_dangling_labels():
|
||||
with assert_no_new_dangling_storages():
|
||||
with self.assert_no_new_dangling_storages_or_labels():
|
||||
repository1 = self.createRepository(namespace=ADMIN_ACCESS_USER, latest=['i1', 'i2', 'i3'])
|
||||
repository2 = self.createRepository(namespace=PUBLIC_USER, latest=['i1', 'i2', 'i3'])
|
||||
|
||||
|
@ -299,13 +273,11 @@ class TestGarbageCollection(unittest.TestCase):
|
|||
self.assertDeleted(repository1, 'i1', 'i2', 'i3')
|
||||
self.assertNotDeleted(repository2, 'i1', 'i2', 'i3')
|
||||
|
||||
|
||||
def test_multiple_shared_images(self):
|
||||
""" Repository has multiple tags with shared images. Selectively deleting the tags, and
|
||||
verifying at each step.
|
||||
"""
|
||||
with assert_no_new_dangling_labels():
|
||||
with assert_no_new_dangling_storages():
|
||||
with self.assert_no_new_dangling_storages_or_labels():
|
||||
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'],
|
||||
third=['t1', 't2', 't3'], fourth=['i1', 'f1'])
|
||||
|
||||
|
@ -341,22 +313,18 @@ class TestGarbageCollection(unittest.TestCase):
|
|||
self.deleteTag(repository, 'newtag')
|
||||
self.assertDeleted(repository, 'i1')
|
||||
|
||||
|
||||
def test_empty_gc(self):
|
||||
with assert_no_new_dangling_labels():
|
||||
with assert_no_new_dangling_storages():
|
||||
with self.assert_no_new_dangling_storages_or_labels():
|
||||
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'],
|
||||
third=['t1', 't2', 't3'], fourth=['i1', 'f1'])
|
||||
|
||||
self.gcNow(repository)
|
||||
self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1', 'f2')
|
||||
|
||||
|
||||
def test_time_machine_no_gc(self):
|
||||
""" Repository has two tags with shared images. Deleting the tag should not remove any images
|
||||
"""
|
||||
with assert_no_new_dangling_labels():
|
||||
with assert_no_new_dangling_storages():
|
||||
with self.assert_no_new_dangling_storages_or_labels():
|
||||
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1'])
|
||||
self._set_tag_expiration_policy(repository.namespace_user.username, 60*60*24)
|
||||
|
||||
|
@ -364,13 +332,11 @@ class TestGarbageCollection(unittest.TestCase):
|
|||
self.assertNotDeleted(repository, 'i2', 'i3')
|
||||
self.assertNotDeleted(repository, 'i1', 'f1')
|
||||
|
||||
|
||||
def test_time_machine_gc(self):
|
||||
""" Repository has two tags with shared images. Deleting the second tag should cause the images
|
||||
for the first deleted tag to gc.
|
||||
"""
|
||||
with assert_no_new_dangling_labels():
|
||||
with assert_no_new_dangling_storages():
|
||||
with self.assert_no_new_dangling_storages_or_labels():
|
||||
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1'])
|
||||
|
||||
self._set_tag_expiration_policy(repository.namespace_user.username, 1)
|
||||
|
@ -386,17 +352,5 @@ class TestGarbageCollection(unittest.TestCase):
|
|||
self.assertNotDeleted(repository, 'i1', 'f1')
|
||||
|
||||
|
||||
def test_manifest_gc(self):
|
||||
with assert_no_new_dangling_labels():
|
||||
with assert_no_new_dangling_storages():
|
||||
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1'])
|
||||
_generate_and_store_manifest(ADMIN_ACCESS_USER, REPO, 'latest')
|
||||
|
||||
self._set_tag_expiration_policy(repository.namespace_user.username, 0)
|
||||
|
||||
self.deleteTag(repository, 'latest')
|
||||
self.assertDeleted(repository, 'i2', 'i3')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
Reference in a new issue