Add GC of layers in Clair

Fixes https://www.pivotaltracker.com/story/show/135583207
This commit is contained in:
Joseph Schorr 2016-12-06 19:52:56 -05:00
parent 0aa6e6cd58
commit 49872838ab
5 changed files with 57 additions and 23 deletions

1
app.py
View file

@ -236,6 +236,7 @@ else:
database.configure(app.config) database.configure(app.config)
model.config.app_config = app.config model.config.app_config = app.config
model.config.store = storage model.config.store = storage
model.config.secscan_api = secscan_api
@login_manager.user_loader @login_manager.user_loader
def load_user(user_uuid): def load_user(user_uuid):

View file

@ -107,6 +107,7 @@ class Config(object):
def __init__(self): def __init__(self):
self.app_config = None self.app_config = None
self.store = None self.store = None
self.secscan_api = None
config = Config() config = Config()

View file

@ -6,8 +6,8 @@ from peewee import JOIN_LEFT_OUTER, fn, SQL, IntegrityError
from cachetools import ttl_cache from cachetools import ttl_cache
from data.model import (DataModelException, tag, db_transaction, storage, permission, from data.model import (DataModelException, tag, db_transaction, storage, permission,
_basequery) _basequery, config)
from data.database import (Repository, Namespace, RepositoryTag, Star, Image, User, from data.database import (Repository, Namespace, RepositoryTag, Star, Image, User, ImageStorage,
Visibility, RepositoryPermission, RepositoryActionCount, Visibility, RepositoryPermission, RepositoryActionCount,
Role, RepositoryAuthorizedEmail, TagManifest, DerivedStorageForImage, Role, RepositoryAuthorizedEmail, TagManifest, DerivedStorageForImage,
Label, TagManifestLabel, db_for_update, get_epoch_timestamp, Label, TagManifestLabel, db_for_update, get_epoch_timestamp,
@ -173,29 +173,26 @@ def garbage_collect_repo(repo, extra_candidate_set=None):
referenced_candidates = (direct_referenced | ancestor_referenced) referenced_candidates = (direct_referenced | ancestor_referenced)
# We desire two pieces of information from the database from the following # We desire three pieces of information from the database from the following
# query: all of the image ids which are associated with this repository, # query: all of the image ids which are associated with this repository,
# and the storages which are associated with those images. In order to # the storages which are associated with those images and their docker IDs.
# fetch just this information, and bypass all of the peewee model parsing
# code, which is overkill for just two fields, we use a tuple query, and
# feed that directly to the dictionary tuple constructor which takes an
# iterable of tuples containing [(k, v), (k, v), ...]
unreferenced_candidates = (Image unreferenced_candidates = (Image
.select(Image.id, Image.storage) .select(Image.id, Image.storage, Image.docker_image_id, ImageStorage)
.join(ImageStorage)
.where(Image.id << candidates_orphans, .where(Image.id << candidates_orphans,
~(Image.id << referenced_candidates)) ~(Image.id << referenced_candidates)))
.tuples())
unreferecend_images_to_storages = dict(unreferenced_candidates) tuples = [(image.id, image.storage_id) for image in unreferenced_candidates]
to_remove = unreferecend_images_to_storages.keys() unreferenced_images_to_storage_ids = dict(tuples)
image_ids_to_remove = unreferenced_images_to_storage_ids.keys()
if len(to_remove) > 0: if len(image_ids_to_remove) > 0:
logger.info('Cleaning up unreferenced images: %s', to_remove) logger.info('Cleaning up unreferenced images: %s', image_ids_to_remove)
storage_id_whitelist = set(unreferecend_images_to_storages.values()) storage_id_whitelist = set(unreferenced_images_to_storage_ids.values())
# Lookup any derived images for the images to remove. # Lookup any derived images for the images to remove.
derived = DerivedStorageForImage.select().where( derived = DerivedStorageForImage.select().where(
DerivedStorageForImage.source_image << to_remove) DerivedStorageForImage.source_image << image_ids_to_remove)
has_derived = False has_derived = False
for derived_image in derived: for derived_image in derived:
@ -207,21 +204,22 @@ def garbage_collect_repo(repo, extra_candidate_set=None):
try: try:
(DerivedStorageForImage (DerivedStorageForImage
.delete() .delete()
.where(DerivedStorageForImage.source_image << to_remove) .where(DerivedStorageForImage.source_image << image_ids_to_remove)
.execute()) .execute())
except IntegrityError: except IntegrityError:
logger.info('Could not GC derived images %s; will try again soon', to_remove) logger.info('Could not GC derived images %s; will try again soon', image_ids_to_remove)
return False return False
try: try:
Image.delete().where(Image.id << to_remove).execute() Image.delete().where(Image.id << image_ids_to_remove).execute()
except IntegrityError: except IntegrityError:
logger.info('Could not GC images %s; will try again soon', to_remove) logger.info('Could not GC images %s; will try again soon', image_ids_to_remove)
return False return False
if len(to_remove) > 0: if len(image_ids_to_remove) > 0:
logger.info('Garbage collecting storage for images: %s', to_remove) logger.info('Garbage collecting storage for images: %s', image_ids_to_remove)
storage.garbage_collect_storage(storage_id_whitelist) storage.garbage_collect_storage(storage_id_whitelist)
config.secscan_api.delete_layers(unreferenced_candidates)
return True return True

View file

@ -547,6 +547,19 @@ class TestSecurityScanner(unittest.TestCase):
notification = model.notification.create_repo_notification(repo, 'vulnerability_found', 'quay_notification', {}, {'level': 0}) notification = model.notification.create_repo_notification(repo, 'vulnerability_found', 'quay_notification', {}, {'level': 0})
self.assertFalse(VulnerabilityFoundEvent().should_perform(event_data, notification)) self.assertFalse(VulnerabilityFoundEvent().should_perform(event_data, notification))
def test_garbage_collection(self):
requests_made = set()
@urlmatch(netloc=r'(.*\.)?mockclairservice', path=r'/v1/layers/(.+)')
def delete_layer(url, request):
requests_made.add(request.method)
with HTTMock(delete_layer):
# Purge a repository.
model.repository.purge_repository('devtable', 'simple')
# Ensure that DELETE is called (and only DELETE) for the layers in the security scanner.
self.assertEquals(set(['DELETE']), requests_made)
def test_notification_worker(self): def test_notification_worker(self):
pages_called = [] pages_called = []

View file

@ -28,6 +28,7 @@ class APIRequestFailure(Exception):
_API_METHOD_INSERT = 'layers' _API_METHOD_INSERT = 'layers'
_API_METHOD_GET_LAYER = 'layers/%s' _API_METHOD_GET_LAYER = 'layers/%s'
_API_METHOD_DELETE_LAYER = 'layers/%s'
_API_METHOD_MARK_NOTIFICATION_READ = 'notifications/%s' _API_METHOD_MARK_NOTIFICATION_READ = 'notifications/%s'
_API_METHOD_GET_NOTIFICATION = 'notifications/%s' _API_METHOD_GET_NOTIFICATION = 'notifications/%s'
_API_METHOD_PING = 'metrics' _API_METHOD_PING = 'metrics'
@ -122,6 +123,26 @@ class SecurityScannerAPI(object):
'Layer': layer_request, 'Layer': layer_request,
} }
def delete_layers(self, layers):
""" Deletes the given layers in the security scanner. """
if self._config is None:
# Not configured.
return
for layer in layers:
layer_id = '%s.%s' % (layer.docker_image_id, layer.storage.uuid)
self.delete_layer(layer_id)
def delete_layer(self, layer_id):
""" Calls DELETE on the layer with the given ID in the security scanner, removing it from
its database.
"""
try:
response = self._call('DELETE', _API_METHOD_DELETE_LAYER % layer_id)
return response.status_code / 100 == 2
except requests.exceptions.RequestException:
logger.exception('Failed to delete layer: %s', layer_id)
return False
def ping(self): def ping(self):
""" Calls GET on the metrics endpoint of the security scanner to ensure it is running """ Calls GET on the metrics endpoint of the security scanner to ensure it is running