5225642850
Add support to GC to invoke a callback with the image+storages removed. Only images whose storage was also removed will be sent to the callback. This will be used by security scanning for its own GC in the followup change.
410 lines
16 KiB
Python
410 lines
16 KiB
Python
import unittest
|
|
import time
|
|
|
|
from contextlib import contextmanager
|
|
from playhouse.test_utils import assert_query_count
|
|
|
|
from app import app, storage
|
|
from initdb import setup_database_for_testing, finished_database_for_testing
|
|
from data import model, database
|
|
from data.database import Image, ImageStorage, DerivedStorageForImage, Label, TagManifestLabel
|
|
|
|
|
|
ADMIN_ACCESS_USER = 'devtable'
|
|
PUBLIC_USER = 'public'
|
|
|
|
REPO = 'somerepo'
|
|
|
|
|
|
class TestGarbageCollection(unittest.TestCase):
|
|
@staticmethod
|
|
def _set_tag_expiration_policy(namespace, expiration_s):
|
|
namespace_user = model.user.get_user(namespace)
|
|
model.user.change_user_tag_expiration(namespace_user, expiration_s)
|
|
|
|
def setUp(self):
|
|
setup_database_for_testing(self)
|
|
|
|
self._set_tag_expiration_policy(ADMIN_ACCESS_USER, 0)
|
|
self._set_tag_expiration_policy(PUBLIC_USER, 0)
|
|
|
|
self.app = app.test_client()
|
|
self.ctx = app.test_request_context()
|
|
self.ctx.__enter__()
|
|
|
|
def tearDown(self):
|
|
finished_database_for_testing(self)
|
|
self.ctx.__exit__(True, None, None)
|
|
|
|
@staticmethod
|
|
def createImage(docker_image_id, repository_obj, username):
|
|
preferred = storage.preferred_locations[0]
|
|
image = model.image.find_create_or_link_image(docker_image_id, repository_obj, username, {},
|
|
preferred)
|
|
image.storage.uploading = False
|
|
image.storage.save()
|
|
|
|
# Create derived images as well.
|
|
model.image.find_or_create_derived_storage(image, 'squash', preferred)
|
|
model.image.find_or_create_derived_storage(image, 'aci', preferred)
|
|
|
|
# Add some torrent info.
|
|
try:
|
|
database.TorrentInfo.get(storage=image.storage)
|
|
except database.TorrentInfo.DoesNotExist:
|
|
model.storage.save_torrent_info(image.storage, 1, 'helloworld')
|
|
|
|
# Add some additional placements to the image.
|
|
for location_name in ['local_eu']:
|
|
location = database.ImageStorageLocation.get(name=location_name)
|
|
|
|
try:
|
|
database.ImageStoragePlacement.get(location=location, storage=image.storage)
|
|
except:
|
|
continue
|
|
|
|
database.ImageStoragePlacement.create(location=location, storage=image.storage)
|
|
|
|
return image.storage
|
|
|
|
def createRepository(self, namespace=ADMIN_ACCESS_USER, name=REPO, **kwargs):
|
|
user = model.user.get_user(namespace)
|
|
repo = model.repository.create_repository(namespace, name, user)
|
|
|
|
# Populate the repository with the tags.
|
|
image_map = {}
|
|
for tag_name in kwargs:
|
|
image_ids = kwargs[tag_name]
|
|
parent = None
|
|
|
|
for image_id in image_ids:
|
|
if not image_id in image_map:
|
|
image_map[image_id] = self.createImage(image_id, repo, namespace)
|
|
|
|
v1_metadata = {
|
|
'id': image_id,
|
|
}
|
|
if parent is not None:
|
|
v1_metadata['parent'] = parent.docker_image_id
|
|
|
|
# Set the ancestors for the image.
|
|
parent = model.image.set_image_metadata(image_id, namespace, name, '', '', '', v1_metadata,
|
|
parent=parent)
|
|
|
|
# Set the tag for the image.
|
|
tag_manifest, _ = model.tag.store_tag_manifest(namespace, name, tag_name, image_ids[-1],
|
|
'sha:someshahere', '{}')
|
|
|
|
# Add some labels to the tag.
|
|
model.label.create_manifest_label(tag_manifest, 'foo', 'bar', 'manifest')
|
|
model.label.create_manifest_label(tag_manifest, 'meh', 'grah', 'manifest')
|
|
|
|
return repo
|
|
|
|
def gcNow(self, repository):
|
|
self.assertTrue(model.repository.garbage_collect_repo(repository))
|
|
|
|
def deleteTag(self, repository, tag, perform_gc=True):
|
|
model.tag.delete_tag(repository.namespace_user.username, repository.name, tag)
|
|
if perform_gc:
|
|
self.assertTrue(model.repository.garbage_collect_repo(repository))
|
|
|
|
def moveTag(self, repository, tag, docker_image_id):
|
|
model.tag.create_or_update_tag(repository.namespace_user.username, repository.name, tag,
|
|
docker_image_id)
|
|
self.assertTrue(model.repository.garbage_collect_repo(repository))
|
|
|
|
def assertNotDeleted(self, repository, *args):
|
|
for docker_image_id in args:
|
|
self.assertTrue(bool(model.image.get_image_by_id(repository.namespace_user.username,
|
|
repository.name, docker_image_id)))
|
|
|
|
def assertDeleted(self, repository, *args):
|
|
for docker_image_id in args:
|
|
try:
|
|
# Verify the image is missing when accessed by the repository.
|
|
model.image.get_image_by_id(repository.namespace_user.username, repository.name,
|
|
docker_image_id)
|
|
except model.DataModelException:
|
|
return
|
|
|
|
self.fail('Expected image %s to be deleted' % docker_image_id)
|
|
|
|
@staticmethod
|
|
def _get_dangling_storage_count():
|
|
storage_ids = set([current.id for current in ImageStorage.select()])
|
|
referenced_by_image = set([image.storage_id for image in Image.select()])
|
|
referenced_by_derived = set([derived.derivative_id
|
|
for derived in DerivedStorageForImage.select()])
|
|
|
|
return len(storage_ids - referenced_by_image - referenced_by_derived)
|
|
|
|
@staticmethod
|
|
def _get_dangling_label_count():
|
|
label_ids = set([current.id for current in Label.select()])
|
|
referenced_by_manifest = set([mlabel.label_id for mlabel in TagManifestLabel.select()])
|
|
return len(label_ids - referenced_by_manifest)
|
|
|
|
@contextmanager
|
|
def assert_gc_integrity(self, expect_storage_removed=True):
|
|
""" Specialized assertion for ensuring that GC cleans up all dangling storages
|
|
and labels, invokes the callback for images removed and doesn't invoke the
|
|
callback for images *not* removed.
|
|
"""
|
|
# TODO: Consider also asserting the number of DB queries being performed.
|
|
|
|
# Add a callback for when images are removed.
|
|
removed_image_storages = []
|
|
model.config.register_image_cleanup_callback(removed_image_storages.extend)
|
|
|
|
# Store the number of dangling storages and labels.
|
|
existing_storage_count = self._get_dangling_storage_count()
|
|
existing_label_count = self._get_dangling_label_count()
|
|
yield
|
|
|
|
# Ensure the number of dangling storages and labels has not changed.
|
|
updated_storage_count = self._get_dangling_storage_count()
|
|
self.assertEqual(updated_storage_count, existing_storage_count)
|
|
|
|
updated_label_count = self._get_dangling_label_count()
|
|
self.assertEqual(updated_label_count, existing_label_count)
|
|
|
|
# Ensure that for each call to the image+storage cleanup callback, the image and its
|
|
# storage is not found *anywhere* in the database.
|
|
for removed_image_and_storage in removed_image_storages:
|
|
with self.assertRaises(Image.DoesNotExist):
|
|
Image.get(id=removed_image_and_storage.id)
|
|
|
|
with self.assertRaises(ImageStorage.DoesNotExist):
|
|
ImageStorage.get(id=removed_image_and_storage.storage_id)
|
|
|
|
with self.assertRaises(ImageStorage.DoesNotExist):
|
|
ImageStorage.get(uuid=removed_image_and_storage.storage.uuid)
|
|
|
|
self.assertEquals(expect_storage_removed, bool(removed_image_storages))
|
|
|
|
def test_has_garbage(self):
|
|
""" Remove all existing repositories, then add one without garbage, check, then add one with
|
|
garbage, and check again.
|
|
"""
|
|
# Delete all existing repos.
|
|
for repo in database.Repository.select().order_by(database.Repository.id):
|
|
self.assertTrue(model.repository.purge_repository(repo.namespace_user.username, repo.name))
|
|
|
|
# Change the time machine expiration on the namespace.
|
|
(database.User
|
|
.update(removed_tag_expiration_s=1000000000)
|
|
.where(database.User.username == ADMIN_ACCESS_USER)
|
|
.execute())
|
|
|
|
# Create a repository without any garbage.
|
|
repository = self.createRepository(latest=['i1', 'i2', 'i3'])
|
|
|
|
# Ensure that no repositories are returned by the has garbage check.
|
|
self.assertIsNone(model.repository.find_repository_with_garbage(1000000000))
|
|
|
|
# Delete a tag.
|
|
self.deleteTag(repository, 'latest', perform_gc=False)
|
|
|
|
# There should still not be any repositories with garbage, due to time machine.
|
|
self.assertIsNone(model.repository.find_repository_with_garbage(1000000000))
|
|
|
|
# Change the time machine expiration on the namespace.
|
|
(database.User
|
|
.update(removed_tag_expiration_s=0)
|
|
.where(database.User.username == ADMIN_ACCESS_USER)
|
|
.execute())
|
|
|
|
# Now we should find the repository for GC.
|
|
repository = model.repository.find_repository_with_garbage(0)
|
|
self.assertIsNotNone(repository)
|
|
self.assertEquals(REPO, repository.name)
|
|
|
|
# GC the repository.
|
|
self.assertTrue(model.repository.garbage_collect_repo(repository))
|
|
|
|
# There should now be no repositories with garbage.
|
|
self.assertIsNone(model.repository.find_repository_with_garbage(0))
|
|
|
|
def test_find_garbage_policy_functions(self):
|
|
with assert_query_count(1):
|
|
one_policy = model.repository.get_random_gc_policy()
|
|
all_policies = model.repository._get_gc_expiration_policies()
|
|
self.assertIn(one_policy, all_policies)
|
|
|
|
def test_one_tag(self):
|
|
""" Create a repository with a single tag, then remove that tag and verify that the repository
|
|
is now empty. """
|
|
with self.assert_gc_integrity():
|
|
repository = self.createRepository(latest=['i1', 'i2', 'i3'])
|
|
self.deleteTag(repository, 'latest')
|
|
self.assertDeleted(repository, 'i1', 'i2', 'i3')
|
|
|
|
def test_two_tags_unshared_images(self):
|
|
""" Repository has two tags with no shared images between them. """
|
|
with self.assert_gc_integrity():
|
|
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['f1', 'f2'])
|
|
self.deleteTag(repository, 'latest')
|
|
self.assertDeleted(repository, 'i1', 'i2', 'i3')
|
|
self.assertNotDeleted(repository, 'f1', 'f2')
|
|
|
|
def test_two_tags_shared_images(self):
|
|
""" Repository has two tags with shared images. Deleting the tag should only remove the
|
|
unshared images.
|
|
"""
|
|
with self.assert_gc_integrity():
|
|
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1'])
|
|
self.deleteTag(repository, 'latest')
|
|
self.assertDeleted(repository, 'i2', 'i3')
|
|
self.assertNotDeleted(repository, 'i1', 'f1')
|
|
|
|
def test_unrelated_repositories(self):
|
|
""" Two repositories with different images. Removing the tag from one leaves the other's
|
|
images intact.
|
|
"""
|
|
with self.assert_gc_integrity():
|
|
repository1 = self.createRepository(latest=['i1', 'i2', 'i3'], name='repo1')
|
|
repository2 = self.createRepository(latest=['j1', 'j2', 'j3'], name='repo2')
|
|
|
|
self.deleteTag(repository1, 'latest')
|
|
|
|
self.assertDeleted(repository1, 'i1', 'i2', 'i3')
|
|
self.assertNotDeleted(repository2, 'j1', 'j2', 'j3')
|
|
|
|
def test_related_repositories(self):
|
|
""" Two repositories with shared images. Removing the tag from one leaves the other's
|
|
images intact.
|
|
"""
|
|
with self.assert_gc_integrity():
|
|
repository1 = self.createRepository(latest=['i1', 'i2', 'i3'], name='repo1')
|
|
repository2 = self.createRepository(latest=['i1', 'i2', 'j1'], name='repo2')
|
|
|
|
self.deleteTag(repository1, 'latest')
|
|
|
|
self.assertDeleted(repository1, 'i3')
|
|
self.assertNotDeleted(repository2, 'i1', 'i2', 'j1')
|
|
|
|
def test_inaccessible_repositories(self):
|
|
""" Two repositories under different namespaces should result in the images being deleted
|
|
but not completely removed from the database.
|
|
"""
|
|
with self.assert_gc_integrity():
|
|
repository1 = self.createRepository(namespace=ADMIN_ACCESS_USER, latest=['i1', 'i2', 'i3'])
|
|
repository2 = self.createRepository(namespace=PUBLIC_USER, latest=['i1', 'i2', 'i3'])
|
|
|
|
self.deleteTag(repository1, 'latest')
|
|
self.assertDeleted(repository1, 'i1', 'i2', 'i3')
|
|
self.assertNotDeleted(repository2, 'i1', 'i2', 'i3')
|
|
|
|
def test_multiple_shared_images(self):
|
|
""" Repository has multiple tags with shared images. Selectively deleting the tags, and
|
|
verifying at each step.
|
|
"""
|
|
with self.assert_gc_integrity():
|
|
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'],
|
|
third=['t1', 't2', 't3'], fourth=['i1', 'f1'])
|
|
|
|
# Delete tag other. Should delete f2, since it is not shared.
|
|
self.deleteTag(repository, 'other')
|
|
self.assertDeleted(repository, 'f2')
|
|
self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1')
|
|
|
|
# Move tag fourth to i3. This should remove f1 since it is no longer referenced.
|
|
self.moveTag(repository, 'fourth', 'i3')
|
|
self.assertDeleted(repository, 'f1')
|
|
self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3')
|
|
|
|
# Delete tag 'latest'. This should do nothing since fourth is on the same branch.
|
|
self.deleteTag(repository, 'latest')
|
|
self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3')
|
|
|
|
# Delete tag 'third'. This should remove t1->t3.
|
|
self.deleteTag(repository, 'third')
|
|
self.assertDeleted(repository, 't1', 't2', 't3')
|
|
self.assertNotDeleted(repository, 'i1', 'i2', 'i3')
|
|
|
|
# Add tag to i1.
|
|
self.moveTag(repository, 'newtag', 'i1')
|
|
self.assertNotDeleted(repository, 'i1', 'i2', 'i3')
|
|
|
|
# Delete tag 'fourth'. This should remove i2 and i3.
|
|
self.deleteTag(repository, 'fourth')
|
|
self.assertDeleted(repository, 'i2', 'i3')
|
|
self.assertNotDeleted(repository, 'i1')
|
|
|
|
# Delete tag 'newtag'. This should remove the remaining image.
|
|
self.deleteTag(repository, 'newtag')
|
|
self.assertDeleted(repository, 'i1')
|
|
|
|
def test_empty_gc(self):
|
|
with self.assert_gc_integrity(expect_storage_removed=False):
|
|
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'],
|
|
third=['t1', 't2', 't3'], fourth=['i1', 'f1'])
|
|
|
|
self.gcNow(repository)
|
|
self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1', 'f2')
|
|
|
|
def test_time_machine_no_gc(self):
|
|
""" Repository has two tags with shared images. Deleting the tag should not remove any images
|
|
"""
|
|
with self.assert_gc_integrity(expect_storage_removed=False):
|
|
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1'])
|
|
self._set_tag_expiration_policy(repository.namespace_user.username, 60*60*24)
|
|
|
|
self.deleteTag(repository, 'latest')
|
|
self.assertNotDeleted(repository, 'i2', 'i3')
|
|
self.assertNotDeleted(repository, 'i1', 'f1')
|
|
|
|
def test_time_machine_gc(self):
|
|
""" Repository has two tags with shared images. Deleting the second tag should cause the images
|
|
for the first deleted tag to gc.
|
|
"""
|
|
with self.assert_gc_integrity():
|
|
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1'])
|
|
|
|
self._set_tag_expiration_policy(repository.namespace_user.username, 1)
|
|
|
|
self.deleteTag(repository, 'latest')
|
|
self.assertNotDeleted(repository, 'i2', 'i3')
|
|
self.assertNotDeleted(repository, 'i1', 'f1')
|
|
|
|
time.sleep(2)
|
|
|
|
self.deleteTag(repository, 'other') # This will cause the images associated with latest to gc
|
|
self.assertDeleted(repository, 'i2', 'i3')
|
|
self.assertNotDeleted(repository, 'i1', 'f1')
|
|
|
|
def test_images_shared_storage(self):
|
|
""" Repository with two tags, both with the same shared storage. Deleting the first
|
|
tag should delete the first image, but *not* its storage.
|
|
"""
|
|
with self.assert_gc_integrity(expect_storage_removed=False):
|
|
repository = self.createRepository()
|
|
|
|
# Add two tags, each with their own image, but with the same storage.
|
|
image_storage = model.storage.create_v1_storage(storage.preferred_locations[0])
|
|
|
|
first_image = Image.create(docker_image_id='i1',
|
|
repository=repository, storage=image_storage,
|
|
ancestors='/')
|
|
|
|
second_image = Image.create(docker_image_id='i2',
|
|
repository=repository, storage=image_storage,
|
|
ancestors='/')
|
|
|
|
model.tag.store_tag_manifest(repository.namespace_user.username, repository.name,
|
|
'first', first_image.docker_image_id,
|
|
'sha:someshahere', '{}')
|
|
|
|
model.tag.store_tag_manifest(repository.namespace_user.username, repository.name,
|
|
'second', second_image.docker_image_id,
|
|
'sha:someshahere', '{}')
|
|
|
|
# Delete the first tag.
|
|
self.deleteTag(repository, 'first')
|
|
self.assertDeleted(repository, 'i1')
|
|
self.assertNotDeleted(repository, 'i2')
|
|
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main()
|