Merge pull request #2257 from coreos-inc/clair-gc-take2

feat(gc): Garbage collection for security scanning
This commit is contained in:
josephschorr 2017-01-17 14:49:36 -05:00 committed by GitHub
commit aafcb592a6
8 changed files with 194 additions and 62 deletions

View file

@ -146,20 +146,43 @@ class TestGarbageCollection(unittest.TestCase):
return len(label_ids - referenced_by_manifest)
@contextmanager
def assert_no_new_dangling_storages_or_labels(self):
def assert_gc_integrity(self, expect_storage_removed=True):
""" Specialized assertion for ensuring that GC cleans up all dangling storages
and labels.
and labels, invokes the callback for images removed and doesn't invoke the
callback for images *not* removed.
"""
# TODO: Consider also asserting the number of DB queries being performed.
# Add a callback for when images are removed.
removed_image_storages = []
model.config.register_image_cleanup_callback(removed_image_storages.extend)
# Store the number of dangling storages and labels.
existing_storage_count = self._get_dangling_storage_count()
existing_label_count = self._get_dangling_label_count()
yield
# Ensure the number of dangling storages and labels has not changed.
updated_storage_count = self._get_dangling_storage_count()
self.assertEqual(updated_storage_count, existing_storage_count)
updated_label_count = self._get_dangling_label_count()
self.assertEqual(updated_label_count, existing_label_count)
# Ensure that for each call to the image+storage cleanup callback, the image and its
# storage is not found *anywhere* in the database.
for removed_image_and_storage in removed_image_storages:
with self.assertRaises(Image.DoesNotExist):
Image.get(id=removed_image_and_storage.id)
with self.assertRaises(ImageStorage.DoesNotExist):
ImageStorage.get(id=removed_image_and_storage.storage_id)
with self.assertRaises(ImageStorage.DoesNotExist):
ImageStorage.get(uuid=removed_image_and_storage.storage.uuid)
self.assertEquals(expect_storage_removed, bool(removed_image_storages))
def test_has_garbage(self):
""" Remove all existing repositories, then add one without garbage, check, then add one with
garbage, and check again.
@ -212,14 +235,14 @@ class TestGarbageCollection(unittest.TestCase):
def test_one_tag(self):
""" Create a repository with a single tag, then remove that tag and verify that the repository
is now empty. """
with self.assert_no_new_dangling_storages_or_labels():
with self.assert_gc_integrity():
repository = self.createRepository(latest=['i1', 'i2', 'i3'])
self.deleteTag(repository, 'latest')
self.assertDeleted(repository, 'i1', 'i2', 'i3')
def test_two_tags_unshared_images(self):
""" Repository has two tags with no shared images between them. """
with self.assert_no_new_dangling_storages_or_labels():
with self.assert_gc_integrity():
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['f1', 'f2'])
self.deleteTag(repository, 'latest')
self.assertDeleted(repository, 'i1', 'i2', 'i3')
@ -229,7 +252,7 @@ class TestGarbageCollection(unittest.TestCase):
""" Repository has two tags with shared images. Deleting the tag should only remove the
unshared images.
"""
with self.assert_no_new_dangling_storages_or_labels():
with self.assert_gc_integrity():
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1'])
self.deleteTag(repository, 'latest')
self.assertDeleted(repository, 'i2', 'i3')
@ -239,7 +262,7 @@ class TestGarbageCollection(unittest.TestCase):
""" Two repositories with different images. Removing the tag from one leaves the other's
images intact.
"""
with self.assert_no_new_dangling_storages_or_labels():
with self.assert_gc_integrity():
repository1 = self.createRepository(latest=['i1', 'i2', 'i3'], name='repo1')
repository2 = self.createRepository(latest=['j1', 'j2', 'j3'], name='repo2')
@ -252,7 +275,7 @@ class TestGarbageCollection(unittest.TestCase):
""" Two repositories with shared images. Removing the tag from one leaves the other's
images intact.
"""
with self.assert_no_new_dangling_storages_or_labels():
with self.assert_gc_integrity():
repository1 = self.createRepository(latest=['i1', 'i2', 'i3'], name='repo1')
repository2 = self.createRepository(latest=['i1', 'i2', 'j1'], name='repo2')
@ -265,7 +288,7 @@ class TestGarbageCollection(unittest.TestCase):
""" Two repositories under different namespaces should result in the images being deleted
but not completely removed from the database.
"""
with self.assert_no_new_dangling_storages_or_labels():
with self.assert_gc_integrity():
repository1 = self.createRepository(namespace=ADMIN_ACCESS_USER, latest=['i1', 'i2', 'i3'])
repository2 = self.createRepository(namespace=PUBLIC_USER, latest=['i1', 'i2', 'i3'])
@ -277,7 +300,7 @@ class TestGarbageCollection(unittest.TestCase):
""" Repository has multiple tags with shared images. Selectively deleting the tags, and
verifying at each step.
"""
with self.assert_no_new_dangling_storages_or_labels():
with self.assert_gc_integrity():
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'],
third=['t1', 't2', 't3'], fourth=['i1', 'f1'])
@ -314,7 +337,7 @@ class TestGarbageCollection(unittest.TestCase):
self.assertDeleted(repository, 'i1')
def test_empty_gc(self):
with self.assert_no_new_dangling_storages_or_labels():
with self.assert_gc_integrity(expect_storage_removed=False):
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'],
third=['t1', 't2', 't3'], fourth=['i1', 'f1'])
@ -324,7 +347,7 @@ class TestGarbageCollection(unittest.TestCase):
def test_time_machine_no_gc(self):
""" Repository has two tags with shared images. Deleting the tag should not remove any images
"""
with self.assert_no_new_dangling_storages_or_labels():
with self.assert_gc_integrity(expect_storage_removed=False):
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1'])
self._set_tag_expiration_policy(repository.namespace_user.username, 60*60*24)
@ -336,7 +359,7 @@ class TestGarbageCollection(unittest.TestCase):
""" Repository has two tags with shared images. Deleting the second tag should cause the images
for the first deleted tag to gc.
"""
with self.assert_no_new_dangling_storages_or_labels():
with self.assert_gc_integrity():
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1'])
self._set_tag_expiration_policy(repository.namespace_user.username, 1)
@ -351,6 +374,37 @@ class TestGarbageCollection(unittest.TestCase):
self.assertDeleted(repository, 'i2', 'i3')
self.assertNotDeleted(repository, 'i1', 'f1')
def test_images_shared_storage(self):
""" Repository with two tags, both with the same shared storage. Deleting the first
tag should delete the first image, but *not* its storage.
"""
with self.assert_gc_integrity(expect_storage_removed=False):
repository = self.createRepository()
# Add two tags, each with their own image, but with the same storage.
image_storage = model.storage.create_v1_storage(storage.preferred_locations[0])
first_image = Image.create(docker_image_id='i1',
repository=repository, storage=image_storage,
ancestors='/')
second_image = Image.create(docker_image_id='i2',
repository=repository, storage=image_storage,
ancestors='/')
model.tag.store_tag_manifest(repository.namespace_user.username, repository.name,
'first', first_image.docker_image_id,
'sha:someshahere', '{}')
model.tag.store_tag_manifest(repository.namespace_user.username, repository.name,
'second', second_image.docker_image_id,
'sha:someshahere', '{}')
# Delete the first tag.
self.deleteTag(repository, 'first')
self.assertDeleted(repository, 'i1')
self.assertNotDeleted(repository, 'i2')
if __name__ == '__main__':
unittest.main()

View file

@ -726,5 +726,34 @@ class TestSecurityScanner(unittest.TestCase):
self.assertIsNone(notification_queue.get())
def test_layer_gc(self):
layer = model.tag.get_tag_image(ADMIN_ACCESS_USER, SIMPLE_REPO, 'latest', include_storage=True)
# Delete the prod tag so that only the `latest` tag remains.
model.tag.delete_tag(ADMIN_ACCESS_USER, SIMPLE_REPO, 'prod')
with fake_security_scanner() as security_scanner:
# Analyze the layer.
analyzer = LayerAnalyzer(app.config, self.api)
analyzer.analyze_recursively(layer)
layer = model.tag.get_tag_image(ADMIN_ACCESS_USER, SIMPLE_REPO, 'latest')
self.assertAnalyzed(layer, security_scanner, True, 1)
self.assertTrue(security_scanner.has_layer(security_scanner.layer_id(layer)))
namespace_user = model.user.get_user(ADMIN_ACCESS_USER)
model.user.change_user_tag_expiration(namespace_user, 0)
# Delete the tag in the repository and GC.
model.tag.delete_tag(ADMIN_ACCESS_USER, SIMPLE_REPO, 'latest')
time.sleep(1)
repo = model.repository.get_repository(ADMIN_ACCESS_USER, SIMPLE_REPO)
model.repository.garbage_collect_repo(repo)
# Ensure that the security scanner no longer has the image.
self.assertFalse(security_scanner.has_layer(security_scanner.layer_id(layer)))
if __name__ == '__main__':
unittest.main()