From 49872838ab31d06d448b01a60d786f62fd779af3 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Tue, 6 Dec 2016 19:52:56 -0500 Subject: [PATCH] Add GC of layers in Clair Fixes https://www.pivotaltracker.com/story/show/135583207 --- app.py | 1 + data/model/__init__.py | 1 + data/model/repository.py | 44 +++++++++++++++++++--------------------- test/test_secscan.py | 13 ++++++++++++ util/secscan/api.py | 21 +++++++++++++++++++ 5 files changed, 57 insertions(+), 23 deletions(-) diff --git a/app.py b/app.py index c6c7eecbf..bdfef0d35 100644 --- a/app.py +++ b/app.py @@ -236,6 +236,7 @@ else: database.configure(app.config) model.config.app_config = app.config model.config.store = storage +model.config.secscan_api = secscan_api @login_manager.user_loader def load_user(user_uuid): diff --git a/data/model/__init__.py b/data/model/__init__.py index 23a1a647c..650855b8c 100644 --- a/data/model/__init__.py +++ b/data/model/__init__.py @@ -107,6 +107,7 @@ class Config(object): def __init__(self): self.app_config = None self.store = None + self.secscan_api = None config = Config() diff --git a/data/model/repository.py b/data/model/repository.py index 0281bc592..3c1971a7d 100644 --- a/data/model/repository.py +++ b/data/model/repository.py @@ -6,8 +6,8 @@ from peewee import JOIN_LEFT_OUTER, fn, SQL, IntegrityError from cachetools import ttl_cache from data.model import (DataModelException, tag, db_transaction, storage, permission, - _basequery) -from data.database import (Repository, Namespace, RepositoryTag, Star, Image, User, + _basequery, config) +from data.database import (Repository, Namespace, RepositoryTag, Star, Image, User, ImageStorage, Visibility, RepositoryPermission, RepositoryActionCount, Role, RepositoryAuthorizedEmail, TagManifest, DerivedStorageForImage, Label, TagManifestLabel, db_for_update, get_epoch_timestamp, @@ -173,29 +173,26 @@ def garbage_collect_repo(repo, extra_candidate_set=None): referenced_candidates = (direct_referenced | ancestor_referenced) - # We desire two pieces of information from the database from the following + # We desire three pieces of information from the database from the following # query: all of the image ids which are associated with this repository, - # and the storages which are associated with those images. In order to - # fetch just this information, and bypass all of the peewee model parsing - # code, which is overkill for just two fields, we use a tuple query, and - # feed that directly to the dictionary tuple constructor which takes an - # iterable of tuples containing [(k, v), (k, v), ...] + # the storages which are associated with those images and their docker IDs. unreferenced_candidates = (Image - .select(Image.id, Image.storage) + .select(Image.id, Image.storage, Image.docker_image_id, ImageStorage) + .join(ImageStorage) .where(Image.id << candidates_orphans, - ~(Image.id << referenced_candidates)) - .tuples()) + ~(Image.id << referenced_candidates))) - unreferecend_images_to_storages = dict(unreferenced_candidates) - to_remove = unreferecend_images_to_storages.keys() + tuples = [(image.id, image.storage_id) for image in unreferenced_candidates] + unreferenced_images_to_storage_ids = dict(tuples) + image_ids_to_remove = unreferenced_images_to_storage_ids.keys() - if len(to_remove) > 0: - logger.info('Cleaning up unreferenced images: %s', to_remove) - storage_id_whitelist = set(unreferecend_images_to_storages.values()) + if len(image_ids_to_remove) > 0: + logger.info('Cleaning up unreferenced images: %s', image_ids_to_remove) + storage_id_whitelist = set(unreferenced_images_to_storage_ids.values()) # Lookup any derived images for the images to remove. derived = DerivedStorageForImage.select().where( - DerivedStorageForImage.source_image << to_remove) + DerivedStorageForImage.source_image << image_ids_to_remove) has_derived = False for derived_image in derived: @@ -207,21 +204,22 @@ def garbage_collect_repo(repo, extra_candidate_set=None): try: (DerivedStorageForImage .delete() - .where(DerivedStorageForImage.source_image << to_remove) + .where(DerivedStorageForImage.source_image << image_ids_to_remove) .execute()) except IntegrityError: - logger.info('Could not GC derived images %s; will try again soon', to_remove) + logger.info('Could not GC derived images %s; will try again soon', image_ids_to_remove) return False try: - Image.delete().where(Image.id << to_remove).execute() + Image.delete().where(Image.id << image_ids_to_remove).execute() except IntegrityError: - logger.info('Could not GC images %s; will try again soon', to_remove) + logger.info('Could not GC images %s; will try again soon', image_ids_to_remove) return False - if len(to_remove) > 0: - logger.info('Garbage collecting storage for images: %s', to_remove) + if len(image_ids_to_remove) > 0: + logger.info('Garbage collecting storage for images: %s', image_ids_to_remove) storage.garbage_collect_storage(storage_id_whitelist) + config.secscan_api.delete_layers(unreferenced_candidates) return True diff --git a/test/test_secscan.py b/test/test_secscan.py index 4a5fc44db..5b9d87053 100644 --- a/test/test_secscan.py +++ b/test/test_secscan.py @@ -547,6 +547,19 @@ class TestSecurityScanner(unittest.TestCase): notification = model.notification.create_repo_notification(repo, 'vulnerability_found', 'quay_notification', {}, {'level': 0}) self.assertFalse(VulnerabilityFoundEvent().should_perform(event_data, notification)) + def test_garbage_collection(self): + requests_made = set() + + @urlmatch(netloc=r'(.*\.)?mockclairservice', path=r'/v1/layers/(.+)') + def delete_layer(url, request): + requests_made.add(request.method) + + with HTTMock(delete_layer): + # Purge a repository. + model.repository.purge_repository('devtable', 'simple') + + # Ensure that DELETE is called (and only DELETE) for the layers in the security scanner. + self.assertEquals(set(['DELETE']), requests_made) def test_notification_worker(self): pages_called = [] diff --git a/util/secscan/api.py b/util/secscan/api.py index cbe97745f..99e3b8d4d 100644 --- a/util/secscan/api.py +++ b/util/secscan/api.py @@ -28,6 +28,7 @@ class APIRequestFailure(Exception): _API_METHOD_INSERT = 'layers' _API_METHOD_GET_LAYER = 'layers/%s' +_API_METHOD_DELETE_LAYER = 'layers/%s' _API_METHOD_MARK_NOTIFICATION_READ = 'notifications/%s' _API_METHOD_GET_NOTIFICATION = 'notifications/%s' _API_METHOD_PING = 'metrics' @@ -122,6 +123,26 @@ class SecurityScannerAPI(object): 'Layer': layer_request, } + def delete_layers(self, layers): + """ Deletes the given layers in the security scanner. """ + if self._config is None: + # Not configured. + return + + for layer in layers: + layer_id = '%s.%s' % (layer.docker_image_id, layer.storage.uuid) + self.delete_layer(layer_id) + + def delete_layer(self, layer_id): + """ Calls DELETE on the layer with the given ID in the security scanner, removing it from + its database. + """ + try: + response = self._call('DELETE', _API_METHOD_DELETE_LAYER % layer_id) + return response.status_code / 100 == 2 + except requests.exceptions.RequestException: + logger.exception('Failed to delete layer: %s', layer_id) + return False def ping(self): """ Calls GET on the metrics endpoint of the security scanner to ensure it is running