From d609e6a1c451e49005170f76dc3cfd2720690170 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Thu, 22 Dec 2016 14:55:26 -0500 Subject: [PATCH] Security scanner garbage collection support Adds support for calling GC in the security scanner for any layers+storage removed by GC on the Quay side --- app.py | 1 + test/test_secscan.py | 29 ++++++++++++++++++++++ util/secscan/api.py | 58 +++++++++++++++++++++++++++++--------------- util/secscan/fake.py | 22 ++++++++++++++--- 4 files changed, 87 insertions(+), 23 deletions(-) diff --git a/app.py b/app.py index c6c7eecbf..953af51d8 100644 --- a/app.py +++ b/app.py @@ -236,6 +236,7 @@ else: database.configure(app.config) model.config.app_config = app.config model.config.store = storage +model.config.register_image_cleanup_callback(secscan_api.cleanup_layers) @login_manager.user_loader def load_user(user_uuid): diff --git a/test/test_secscan.py b/test/test_secscan.py index bb6fee94b..6e8c119b4 100644 --- a/test/test_secscan.py +++ b/test/test_secscan.py @@ -650,5 +650,34 @@ class TestSecurityScanner(unittest.TestCase): self.assertIsNotNone(notification_queue.get()) + def test_layer_gc(self): + layer = model.tag.get_tag_image(ADMIN_ACCESS_USER, SIMPLE_REPO, 'latest', include_storage=True) + + # Delete the prod tag so that only the `latest` tag remains. + model.tag.delete_tag(ADMIN_ACCESS_USER, SIMPLE_REPO, 'prod') + + with fake_security_scanner() as security_scanner: + # Analyze the layer. + analyzer = LayerAnalyzer(app.config, self.api) + analyzer.analyze_recursively(layer) + + layer = model.tag.get_tag_image(ADMIN_ACCESS_USER, SIMPLE_REPO, 'latest') + self.assertAnalyzed(layer, security_scanner, True, 1) + self.assertTrue(security_scanner.has_layer(security_scanner.layer_id(layer))) + + namespace_user = model.user.get_user(ADMIN_ACCESS_USER) + model.user.change_user_tag_expiration(namespace_user, 0) + + # Delete the tag in the repository and GC. + model.tag.delete_tag(ADMIN_ACCESS_USER, SIMPLE_REPO, 'latest') + time.sleep(1) + + repo = model.repository.get_repository(ADMIN_ACCESS_USER, SIMPLE_REPO) + model.repository.garbage_collect_repo(repo) + + # Ensure that the security scanner no longer has the image. + self.assertFalse(security_scanner.has_layer(security_scanner.layer_id(layer))) + + if __name__ == '__main__': unittest.main() diff --git a/util/secscan/api.py b/util/secscan/api.py index c2f62f3c2..432c2cdcd 100644 --- a/util/secscan/api.py +++ b/util/secscan/api.py @@ -1,8 +1,10 @@ import logging + +from urlparse import urljoin + import requests from flask import url_for -from urlparse import urljoin from data.database import CloseForLongOperation from data import model @@ -40,11 +42,17 @@ class APIRequestFailure(Exception): _API_METHOD_INSERT = 'layers' _API_METHOD_GET_LAYER = 'layers/%s' +_API_METHOD_DELETE_LAYER = 'layers/%s' _API_METHOD_MARK_NOTIFICATION_READ = 'notifications/%s' _API_METHOD_GET_NOTIFICATION = 'notifications/%s' _API_METHOD_PING = 'metrics' +def compute_layer_id(layer): + """ Returns the ID for the layer in the security scanner. """ + return '%s.%s' % (layer.docker_image_id, layer.storage.uuid) + + class SecurityScannerAPI(object): """ Helper class for talking to the Security Scan service (Clair). """ def __init__(self, app, config, storage, client=None, skip_validation=False): @@ -62,7 +70,6 @@ class SecurityScannerAPI(object): self._default_storage_locations = config['DISTRIBUTED_STORAGE_PREFERENCE'] self._target_version = config.get('SECURITY_SCANNER_ENGINE_VERSION_TARGET', 2) - def _get_image_url_and_auth(self, image): """ Returns a tuple of the url and the auth header value that must be used to fetch the layer data itself. If the image can't be addressed, we return @@ -74,8 +81,8 @@ class SecurityScannerAPI(object): if not self._storage.exists(locations, path): locations = get_storage_locations(image.storage.uuid) if not locations or not self._storage.exists(locations, path): - logger.warning('Could not find a valid location to download layer %s.%s out of %s', - image.docker_image_id, image.storage.uuid, locations) + logger.warning('Could not find a valid location to download layer %s out of %s', + compute_layer_id(image), locations) return None, None uri = self._storage.get_direct_download_url(locations, path) @@ -106,17 +113,16 @@ class SecurityScannerAPI(object): return uri, auth_header - - def _new_analyze_request(self, image): - """ Create the request body to submit the given image for analysis. If the image's URL cannot + def _new_analyze_request(self, layer): + """ Create the request body to submit the given layer for analysis. If the layer's URL cannot be found, returns None. """ - url, auth_header = self._get_image_url_and_auth(image) + url, auth_header = self._get_image_url_and_auth(layer) if url is None: return None layer_request = { - 'Name': '%s.%s' % (image.docker_image_id, image.storage.uuid), + 'Name': compute_layer_id(layer), 'Path': url, 'Format': 'Docker', } @@ -126,14 +132,23 @@ class SecurityScannerAPI(object): 'Authorization': auth_header, } - if image.parent.docker_image_id and image.parent.storage.uuid: - layer_request['ParentName'] = '%s.%s' % (image.parent.docker_image_id, - image.parent.storage.uuid) + if layer.parent.docker_image_id and layer.parent.storage.uuid: + layer_request['ParentName'] = compute_layer_id(layer.parent) return { 'Layer': layer_request, } + def cleanup_layers(self, layers): + """ Callback invoked by garbage collection to cleanup any layers that no longer + need to be stored in the security scanner. + """ + if self._config is None: + # Security scanner not enabled. + return + + for layer in layers: + self.delete_layer(layer) def ping(self): """ Calls GET on the metrics endpoint of the security scanner to ensure it is running @@ -151,6 +166,17 @@ class SecurityScannerAPI(object): logger.exception('Exception when trying to connect to security scanner endpoint') raise Exception('Exception when trying to connect to security scanner endpoint') + def delete_layer(self, layer): + """ Calls DELETE on the given layer in the security scanner, removing it from + its database. + """ + layer_id = compute_layer_id(layer) + try: + response = self._call('DELETE', _API_METHOD_DELETE_LAYER % layer_id) + return response.status_code / 100 == 2 + except requests.exceptions.RequestException: + logger.exception('Failed to delete layer: %s', layer_id) + return False def analyze_layer(self, layer): """ Posts the given layer to the security scanner for analysis, blocking until complete. @@ -201,7 +227,6 @@ class SecurityScannerAPI(object): # Return the parsed API version. return json_response['Layer']['IndexedByVersion'] - def check_layer_vulnerable(self, layer_id, cve_name): """ Checks to see if the layer with the given ID is vulnerable to the specified CVE. """ layer_data = self._get_layer_data(layer_id, include_vulnerabilities=True) @@ -215,7 +240,6 @@ class SecurityScannerAPI(object): return False - def get_notification(self, notification_name, layer_limit=100, page=None): """ Gets the data for a specific notification, with optional page token. Returns a tuple of the data (None on failure) and whether to retry. @@ -245,7 +269,6 @@ class SecurityScannerAPI(object): return json_response, False - def mark_notification_read(self, notification_name): """ Marks a security scanner notification as read. """ try: @@ -255,13 +278,11 @@ class SecurityScannerAPI(object): logger.exception('Failed to mark notification as read: %s', notification_name) return False - def get_layer_data(self, layer, include_features=False, include_vulnerabilities=False): """ Returns the layer data for the specified layer. On error, returns None. """ - layer_id = '%s.%s' % (layer.docker_image_id, layer.storage.uuid) + layer_id = compute_layer_id(layer) return self._get_layer_data(layer_id, include_features, include_vulnerabilities) - def _get_layer_data(self, layer_id, include_features=False, include_vulnerabilities=False): try: params = {} @@ -288,7 +309,6 @@ class SecurityScannerAPI(object): return json_response - def _call(self, method, relative_url, params=None, body=None): """ Issues an HTTP call to the sec API at the given relative URL. This function disconnects from the database while awaiting a response diff --git a/util/secscan/fake.py b/util/secscan/fake.py index a741868c5..e09a54242 100644 --- a/util/secscan/fake.py +++ b/util/secscan/fake.py @@ -5,7 +5,7 @@ import urlparse from contextlib import contextmanager from httmock import urlmatch, HTTMock, all_requests -from util.secscan.api import UNKNOWN_PARENT_LAYER_ERROR_MSG +from util.secscan.api import UNKNOWN_PARENT_LAYER_ERROR_MSG, compute_layer_id @contextmanager def fake_security_scanner(hostname='fakesecurityscanner'): @@ -72,7 +72,7 @@ class FakeSecurityScanner(object): def layer_id(self, layer): """ Returns the Quay Security Scanner layer ID for the given layer (Image row). """ - return '%s.%s' % (layer.docker_image_id, layer.storage.uuid) + return compute_layer_id(layer) def add_layer(self, layer_id): """ Adds a layer to the security scanner, with no features or vulnerabilities. """ @@ -172,6 +172,20 @@ class FakeSecurityScanner(object): 'content': json.dumps({'Layer': layer_data}), } + @urlmatch(netloc=r'(.*\.)?' + self.hostname, path=r'/v1/layers/(.+)', method='DELETE') + def remove_layer_mock(url, _): + layer_id = url.path[len('/v1/layers/'):] + if not layer_id in self.layers: + return { + 'status_code': 404, + 'content': json.dumps({'Error': {'Message': 'Unknown layer'}}), + } + + self.layers.pop(layer_id) + return { + 'status_code': 204, 'content': '', + } + @urlmatch(netloc=r'(.*\.)?' + self.hostname, path=r'/v1/layers', method='POST') def post_layer_mock(_, request): body_data = json.loads(request.body) @@ -274,5 +288,5 @@ class FakeSecurityScanner(object): def response_content(url, _): raise Exception('Unknown endpoint: ' + str(url)) - return [get_layer_mock, post_layer_mock, get_notification, delete_notification, - response_content] + return [get_layer_mock, post_layer_mock, remove_layer_mock, get_notification, + delete_notification, response_content]