Add GC of layers in Clair
Fixes https://www.pivotaltracker.com/story/show/135583207
This commit is contained in:
parent
0aa6e6cd58
commit
49872838ab
5 changed files with 57 additions and 23 deletions
1
app.py
1
app.py
|
@ -236,6 +236,7 @@ else:
|
||||||
database.configure(app.config)
|
database.configure(app.config)
|
||||||
model.config.app_config = app.config
|
model.config.app_config = app.config
|
||||||
model.config.store = storage
|
model.config.store = storage
|
||||||
|
model.config.secscan_api = secscan_api
|
||||||
|
|
||||||
@login_manager.user_loader
|
@login_manager.user_loader
|
||||||
def load_user(user_uuid):
|
def load_user(user_uuid):
|
||||||
|
|
|
@ -107,6 +107,7 @@ class Config(object):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.app_config = None
|
self.app_config = None
|
||||||
self.store = None
|
self.store = None
|
||||||
|
self.secscan_api = None
|
||||||
|
|
||||||
|
|
||||||
config = Config()
|
config = Config()
|
||||||
|
|
|
@ -6,8 +6,8 @@ from peewee import JOIN_LEFT_OUTER, fn, SQL, IntegrityError
|
||||||
from cachetools import ttl_cache
|
from cachetools import ttl_cache
|
||||||
|
|
||||||
from data.model import (DataModelException, tag, db_transaction, storage, permission,
|
from data.model import (DataModelException, tag, db_transaction, storage, permission,
|
||||||
_basequery)
|
_basequery, config)
|
||||||
from data.database import (Repository, Namespace, RepositoryTag, Star, Image, User,
|
from data.database import (Repository, Namespace, RepositoryTag, Star, Image, User, ImageStorage,
|
||||||
Visibility, RepositoryPermission, RepositoryActionCount,
|
Visibility, RepositoryPermission, RepositoryActionCount,
|
||||||
Role, RepositoryAuthorizedEmail, TagManifest, DerivedStorageForImage,
|
Role, RepositoryAuthorizedEmail, TagManifest, DerivedStorageForImage,
|
||||||
Label, TagManifestLabel, db_for_update, get_epoch_timestamp,
|
Label, TagManifestLabel, db_for_update, get_epoch_timestamp,
|
||||||
|
@ -173,29 +173,26 @@ def garbage_collect_repo(repo, extra_candidate_set=None):
|
||||||
|
|
||||||
referenced_candidates = (direct_referenced | ancestor_referenced)
|
referenced_candidates = (direct_referenced | ancestor_referenced)
|
||||||
|
|
||||||
# We desire two pieces of information from the database from the following
|
# We desire three pieces of information from the database from the following
|
||||||
# query: all of the image ids which are associated with this repository,
|
# query: all of the image ids which are associated with this repository,
|
||||||
# and the storages which are associated with those images. In order to
|
# the storages which are associated with those images and their docker IDs.
|
||||||
# fetch just this information, and bypass all of the peewee model parsing
|
|
||||||
# code, which is overkill for just two fields, we use a tuple query, and
|
|
||||||
# feed that directly to the dictionary tuple constructor which takes an
|
|
||||||
# iterable of tuples containing [(k, v), (k, v), ...]
|
|
||||||
unreferenced_candidates = (Image
|
unreferenced_candidates = (Image
|
||||||
.select(Image.id, Image.storage)
|
.select(Image.id, Image.storage, Image.docker_image_id, ImageStorage)
|
||||||
|
.join(ImageStorage)
|
||||||
.where(Image.id << candidates_orphans,
|
.where(Image.id << candidates_orphans,
|
||||||
~(Image.id << referenced_candidates))
|
~(Image.id << referenced_candidates)))
|
||||||
.tuples())
|
|
||||||
|
|
||||||
unreferecend_images_to_storages = dict(unreferenced_candidates)
|
tuples = [(image.id, image.storage_id) for image in unreferenced_candidates]
|
||||||
to_remove = unreferecend_images_to_storages.keys()
|
unreferenced_images_to_storage_ids = dict(tuples)
|
||||||
|
image_ids_to_remove = unreferenced_images_to_storage_ids.keys()
|
||||||
|
|
||||||
if len(to_remove) > 0:
|
if len(image_ids_to_remove) > 0:
|
||||||
logger.info('Cleaning up unreferenced images: %s', to_remove)
|
logger.info('Cleaning up unreferenced images: %s', image_ids_to_remove)
|
||||||
storage_id_whitelist = set(unreferecend_images_to_storages.values())
|
storage_id_whitelist = set(unreferenced_images_to_storage_ids.values())
|
||||||
|
|
||||||
# Lookup any derived images for the images to remove.
|
# Lookup any derived images for the images to remove.
|
||||||
derived = DerivedStorageForImage.select().where(
|
derived = DerivedStorageForImage.select().where(
|
||||||
DerivedStorageForImage.source_image << to_remove)
|
DerivedStorageForImage.source_image << image_ids_to_remove)
|
||||||
|
|
||||||
has_derived = False
|
has_derived = False
|
||||||
for derived_image in derived:
|
for derived_image in derived:
|
||||||
|
@ -207,21 +204,22 @@ def garbage_collect_repo(repo, extra_candidate_set=None):
|
||||||
try:
|
try:
|
||||||
(DerivedStorageForImage
|
(DerivedStorageForImage
|
||||||
.delete()
|
.delete()
|
||||||
.where(DerivedStorageForImage.source_image << to_remove)
|
.where(DerivedStorageForImage.source_image << image_ids_to_remove)
|
||||||
.execute())
|
.execute())
|
||||||
except IntegrityError:
|
except IntegrityError:
|
||||||
logger.info('Could not GC derived images %s; will try again soon', to_remove)
|
logger.info('Could not GC derived images %s; will try again soon', image_ids_to_remove)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
try:
|
try:
|
||||||
Image.delete().where(Image.id << to_remove).execute()
|
Image.delete().where(Image.id << image_ids_to_remove).execute()
|
||||||
except IntegrityError:
|
except IntegrityError:
|
||||||
logger.info('Could not GC images %s; will try again soon', to_remove)
|
logger.info('Could not GC images %s; will try again soon', image_ids_to_remove)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if len(to_remove) > 0:
|
if len(image_ids_to_remove) > 0:
|
||||||
logger.info('Garbage collecting storage for images: %s', to_remove)
|
logger.info('Garbage collecting storage for images: %s', image_ids_to_remove)
|
||||||
storage.garbage_collect_storage(storage_id_whitelist)
|
storage.garbage_collect_storage(storage_id_whitelist)
|
||||||
|
config.secscan_api.delete_layers(unreferenced_candidates)
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
|
@ -547,6 +547,19 @@ class TestSecurityScanner(unittest.TestCase):
|
||||||
notification = model.notification.create_repo_notification(repo, 'vulnerability_found', 'quay_notification', {}, {'level': 0})
|
notification = model.notification.create_repo_notification(repo, 'vulnerability_found', 'quay_notification', {}, {'level': 0})
|
||||||
self.assertFalse(VulnerabilityFoundEvent().should_perform(event_data, notification))
|
self.assertFalse(VulnerabilityFoundEvent().should_perform(event_data, notification))
|
||||||
|
|
||||||
|
def test_garbage_collection(self):
|
||||||
|
requests_made = set()
|
||||||
|
|
||||||
|
@urlmatch(netloc=r'(.*\.)?mockclairservice', path=r'/v1/layers/(.+)')
|
||||||
|
def delete_layer(url, request):
|
||||||
|
requests_made.add(request.method)
|
||||||
|
|
||||||
|
with HTTMock(delete_layer):
|
||||||
|
# Purge a repository.
|
||||||
|
model.repository.purge_repository('devtable', 'simple')
|
||||||
|
|
||||||
|
# Ensure that DELETE is called (and only DELETE) for the layers in the security scanner.
|
||||||
|
self.assertEquals(set(['DELETE']), requests_made)
|
||||||
|
|
||||||
def test_notification_worker(self):
|
def test_notification_worker(self):
|
||||||
pages_called = []
|
pages_called = []
|
||||||
|
|
|
@ -28,6 +28,7 @@ class APIRequestFailure(Exception):
|
||||||
|
|
||||||
_API_METHOD_INSERT = 'layers'
|
_API_METHOD_INSERT = 'layers'
|
||||||
_API_METHOD_GET_LAYER = 'layers/%s'
|
_API_METHOD_GET_LAYER = 'layers/%s'
|
||||||
|
_API_METHOD_DELETE_LAYER = 'layers/%s'
|
||||||
_API_METHOD_MARK_NOTIFICATION_READ = 'notifications/%s'
|
_API_METHOD_MARK_NOTIFICATION_READ = 'notifications/%s'
|
||||||
_API_METHOD_GET_NOTIFICATION = 'notifications/%s'
|
_API_METHOD_GET_NOTIFICATION = 'notifications/%s'
|
||||||
_API_METHOD_PING = 'metrics'
|
_API_METHOD_PING = 'metrics'
|
||||||
|
@ -122,6 +123,26 @@ class SecurityScannerAPI(object):
|
||||||
'Layer': layer_request,
|
'Layer': layer_request,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def delete_layers(self, layers):
|
||||||
|
""" Deletes the given layers in the security scanner. """
|
||||||
|
if self._config is None:
|
||||||
|
# Not configured.
|
||||||
|
return
|
||||||
|
|
||||||
|
for layer in layers:
|
||||||
|
layer_id = '%s.%s' % (layer.docker_image_id, layer.storage.uuid)
|
||||||
|
self.delete_layer(layer_id)
|
||||||
|
|
||||||
|
def delete_layer(self, layer_id):
|
||||||
|
""" Calls DELETE on the layer with the given ID in the security scanner, removing it from
|
||||||
|
its database.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
response = self._call('DELETE', _API_METHOD_DELETE_LAYER % layer_id)
|
||||||
|
return response.status_code / 100 == 2
|
||||||
|
except requests.exceptions.RequestException:
|
||||||
|
logger.exception('Failed to delete layer: %s', layer_id)
|
||||||
|
return False
|
||||||
|
|
||||||
def ping(self):
|
def ping(self):
|
||||||
""" Calls GET on the metrics endpoint of the security scanner to ensure it is running
|
""" Calls GET on the metrics endpoint of the security scanner to ensure it is running
|
||||||
|
|
Reference in a new issue