diff --git a/test/test_secscan.py b/test/test_secscan.py index 9a1a3018e..bb6fee94b 100644 --- a/test/test_secscan.py +++ b/test/test_secscan.py @@ -8,7 +8,7 @@ from data.database import Image, IMAGE_NOT_SCANNED_ENGINE_VERSION from endpoints.notificationevent import VulnerabilityFoundEvent from endpoints.v2 import v2_bp from initdb import setup_database_for_testing, finished_database_for_testing -from util.secscan.api import SecurityScannerAPI, AnalyzeLayerException +from util.secscan.api import SecurityScannerAPI from util.secscan.analyzer import LayerAnalyzer from util.secscan.fake import fake_security_scanner from util.secscan.notifier import process_notification_data @@ -131,7 +131,7 @@ class TestSecurityScanner(unittest.TestCase): def test_analyze_layer_failure(self): - """ Tests that failing to analyze a layer marks it as not analyzed. """ + """ Tests that failing to analyze a layer (because it 422s) marks it as analyzed but failed. """ layer = model.tag.get_tag_image(ADMIN_ACCESS_USER, SIMPLE_REPO, 'latest', include_storage=True) self.assertFalse(layer.security_indexed) @@ -148,6 +148,8 @@ class TestSecurityScanner(unittest.TestCase): def test_analyze_layer_internal_error(self): + """ Tests that failing to analyze a layer (because it 500s) marks it as not analyzed. """ + layer = model.tag.get_tag_image(ADMIN_ACCESS_USER, SIMPLE_REPO, 'latest', include_storage=True) self.assertFalse(layer.security_indexed) self.assertEquals(-1, layer.security_indexed_engine) @@ -162,7 +164,29 @@ class TestSecurityScanner(unittest.TestCase): self.assertAnalyzed(layer, security_scanner, False, -1) - def test_analyze_layer_missing_parent(self): + def test_analyze_layer_error(self): + """ Tests that failing to analyze a layer (because it 400s) marks it as analyzed but failed. """ + + layer = model.tag.get_tag_image(ADMIN_ACCESS_USER, SIMPLE_REPO, 'latest', include_storage=True) + self.assertFalse(layer.security_indexed) + self.assertEquals(-1, layer.security_indexed_engine) + + with fake_security_scanner() as security_scanner: + # Make is so trying to analyze the parent will fail with an error. + security_scanner.set_error_layer_id(security_scanner.layer_id(layer.parent)) + + # Try to the layer and its parents, but with one request causing an error. + analyzer = LayerAnalyzer(app.config, self.api) + analyzer.analyze_recursively(layer) + + # Make sure it is marked as analyzed, but in a failed state. + layer = model.tag.get_tag_image(ADMIN_ACCESS_USER, SIMPLE_REPO, 'latest') + self.assertAnalyzed(layer, security_scanner, False, 1) + + + def test_analyze_layer_missing_parent_handled(self): + """ Tests that a missing parent causes an automatic reanalysis, which succeeds. """ + layer = model.tag.get_tag_image(ADMIN_ACCESS_USER, SIMPLE_REPO, 'latest', include_storage=True) self.assertFalse(layer.security_indexed) self.assertEquals(-1, layer.security_indexed_engine) @@ -176,19 +200,81 @@ class TestSecurityScanner(unittest.TestCase): layer = model.tag.get_tag_image(ADMIN_ACCESS_USER, SIMPLE_REPO, 'latest') self.assertAnalyzed(layer, security_scanner, True, 1) - # Mark the layer as not analyzed and delete its parent layer from the security scanner. + # Mark the layer as not yet scanned. layer.security_indexed_engine = IMAGE_NOT_SCANNED_ENGINE_VERSION layer.security_indexed = False layer.save() + # Remove the layer's parent entirely from the security scanner. security_scanner.remove_layer(security_scanner.layer_id(layer.parent)) - # Try to analyze again; this should fail because the parent is missing. - with self.assertRaisesRegexp(AnalyzeLayerException, 'Bad request to security scanner'): - analyzer.analyze_recursively(layer) + # Analyze again, which should properly re-analyze the missing parent and this layer. + analyzer.analyze_recursively(layer) + + layer = model.tag.get_tag_image(ADMIN_ACCESS_USER, SIMPLE_REPO, 'latest') + self.assertAnalyzed(layer, security_scanner, True, 1) + + + def test_analyze_layer_invalid_parent(self): + """ Tests that trying to reanalyze a parent that is invalid causes the layer to be marked + as analyzed, but failed. + """ + + layer = model.tag.get_tag_image(ADMIN_ACCESS_USER, SIMPLE_REPO, 'latest', include_storage=True) + self.assertFalse(layer.security_indexed) + self.assertEquals(-1, layer.security_indexed_engine) + + with fake_security_scanner() as security_scanner: + # Analyze the layer and its parents. + analyzer = LayerAnalyzer(app.config, self.api) + analyzer.analyze_recursively(layer) + + # Make sure it was analyzed. + layer = model.tag.get_tag_image(ADMIN_ACCESS_USER, SIMPLE_REPO, 'latest') + self.assertAnalyzed(layer, security_scanner, True, 1) + + # Mark the layer as not yet scanned. + layer.security_indexed_engine = IMAGE_NOT_SCANNED_ENGINE_VERSION + layer.security_indexed = False + layer.save() + + # Remove the layer's parent entirely from the security scanner. + security_scanner.remove_layer(security_scanner.layer_id(layer.parent)) + + # Make is so trying to analyze the parent will fail. + security_scanner.set_error_layer_id(security_scanner.layer_id(layer.parent)) + + # Try to analyze again, which should try to reindex the parent and fail. + analyzer.analyze_recursively(layer) + + layer = model.tag.get_tag_image(ADMIN_ACCESS_USER, SIMPLE_REPO, 'latest') + self.assertAnalyzed(layer, security_scanner, False, 1) + + + def test_analyze_layer_unsupported_parent(self): + """ Tests that attempting to analyze a layer whose parent is unanalyzable, results in the layer + being marked as analyzed, but failed. + """ + + layer = model.tag.get_tag_image(ADMIN_ACCESS_USER, SIMPLE_REPO, 'latest', include_storage=True) + self.assertFalse(layer.security_indexed) + self.assertEquals(-1, layer.security_indexed_engine) + + with fake_security_scanner() as security_scanner: + # Make is so trying to analyze the parent will fail. + security_scanner.set_fail_layer_id(security_scanner.layer_id(layer.parent)) + + # Attempt to the layer and its parents. This should mark the layer itself as unanalyzable. + analyzer = LayerAnalyzer(app.config, self.api) + analyzer.analyze_recursively(layer) + + layer = model.tag.get_tag_image(ADMIN_ACCESS_USER, SIMPLE_REPO, 'latest') + self.assertAnalyzed(layer, security_scanner, False, 1) def test_analyze_layer_missing_storage(self): + """ Tests trying to analyze a layer with missing storage. """ + layer = model.tag.get_tag_image(ADMIN_ACCESS_USER, SIMPLE_REPO, 'latest', include_storage=True) self.assertFalse(layer.security_indexed) self.assertEquals(-1, layer.security_indexed_engine) @@ -199,16 +285,16 @@ class TestSecurityScanner(unittest.TestCase): storage.remove(locations, path) storage.remove(locations, 'all_files_exist') - with fake_security_scanner(): + with fake_security_scanner() as security_scanner: analyzer = LayerAnalyzer(app.config, self.api) analyzer.analyze_recursively(layer) layer = model.tag.get_tag_image(ADMIN_ACCESS_USER, SIMPLE_REPO, 'latest') - self.assertEquals(False, layer.security_indexed) - self.assertEquals(1, layer.security_indexed_engine) + self.assertAnalyzed(layer, security_scanner, False, 1) - def assert_analyze_layer_notify(self, security_indexed_engine, security_indexed, expect_notification): + def assert_analyze_layer_notify(self, security_indexed_engine, security_indexed, + expect_notification): layer = model.tag.get_tag_image(ADMIN_ACCESS_USER, SIMPLE_REPO, 'latest', include_storage=True) self.assertFalse(layer.security_indexed) self.assertEquals(-1, layer.security_indexed_engine) @@ -218,7 +304,8 @@ class TestSecurityScanner(unittest.TestCase): # Add a repo event for the layer. repo = model.repository.get_repository(ADMIN_ACCESS_USER, SIMPLE_REPO) - model.notification.create_repo_notification(repo, 'vulnerability_found', 'quay_notification', {}, {'level': 100}) + model.notification.create_repo_notification(repo, 'vulnerability_found', + 'quay_notification', {}, {'level': 100}) # Update the layer's state before analyzing. layer.security_indexed_engine = security_indexed_engine @@ -285,7 +372,8 @@ class TestSecurityScanner(unittest.TestCase): # Add a repo event for the layer. repo = model.repository.get_repository(ADMIN_ACCESS_USER, SIMPLE_REPO) - model.notification.create_repo_notification(repo, 'vulnerability_found', 'quay_notification', {}, {'level': 100}) + model.notification.create_repo_notification(repo, 'vulnerability_found', 'quay_notification', + {}, {'level': 100}) # Ensure that there are no event queue items for the layer. self.assertIsNone(notification_queue.get()) @@ -314,7 +402,8 @@ class TestSecurityScanner(unittest.TestCase): # Add a repo event for the layer. repo = model.repository.get_repository(ADMIN_ACCESS_USER, SIMPLE_REPO) - model.notification.create_repo_notification(repo, 'vulnerability_found', 'quay_notification', {}, {'level': 100}) + model.notification.create_repo_notification(repo, 'vulnerability_found', 'quay_notification', + {}, {'level': 100}) # Ensure that there are no event queue items for the layer. self.assertIsNone(notification_queue.get()) @@ -343,7 +432,8 @@ class TestSecurityScanner(unittest.TestCase): # Add a repo event for the layer. repo = model.repository.get_repository(ADMIN_ACCESS_USER, SIMPLE_REPO) - model.notification.create_repo_notification(repo, 'vulnerability_found', 'quay_notification', {}, {'level': 100}) + model.notification.create_repo_notification(repo, 'vulnerability_found', 'quay_notification', + {}, {'level': 100}) # Ensure that there are no event queue items for the layer. self.assertIsNone(notification_queue.get()) @@ -389,7 +479,8 @@ class TestSecurityScanner(unittest.TestCase): # Add a repo event for the layer. repo = model.repository.get_repository(ADMIN_ACCESS_USER, SIMPLE_REPO) - model.notification.create_repo_notification(repo, 'vulnerability_found', 'quay_notification', {}, {'level': 100}) + model.notification.create_repo_notification(repo, 'vulnerability_found', 'quay_notification', + {}, {'level': 100}) # Ensure that there are no event queue items for the layer. self.assertIsNone(notification_queue.get()) diff --git a/util/secscan/analyzer.py b/util/secscan/analyzer.py index fdbba29be..924aff3c1 100644 --- a/util/secscan/analyzer.py +++ b/util/secscan/analyzer.py @@ -6,54 +6,83 @@ import features from collections import defaultdict from endpoints.notificationhelper import spawn_notification -from data.database import Image, ExternalNotificationEvent, IMAGE_NOT_SCANNED_ENGINE_VERSION +from data.database import ExternalNotificationEvent, IMAGE_NOT_SCANNED_ENGINE_VERSION, Image from data.model.tag import filter_tags_have_repository_event, get_tags_for_image from data.model.image import set_secscan_status, get_image_with_storage_and_parent_base -from util.secscan.api import APIRequestFailure +from util.secscan.api import (APIRequestFailure, AnalyzeLayerException, MissingParentLayerException, + InvalidLayerException, AnalyzeLayerRetryException) from util.morecollections import AttrDict logger = logging.getLogger(__name__) +class PreemptedException(Exception): + """ Exception raised if another worker analyzed the image before this worker was able to do so. + """ + + class LayerAnalyzer(object): """ Helper class to perform analysis of a layer via the security scanner. """ def __init__(self, config, api): self._api = api self._target_version = config.get('SECURITY_SCANNER_ENGINE_VERSION_TARGET', 2) - def analyze_recursively(self, layer): - """ Analyzes a layer and all its parents. - - Return a tuple of two bools: - - The first one tells us if the layer and its parents analyzed successfully. - - The second one is set to False when another call pre-empted the candidate's analysis - for us. + """ Analyzes a layer and all its parents. Raises a PreemptedException if the analysis was + preempted by another worker. """ - if layer.parent_id and layer.parent.security_indexed_engine < self._target_version: - # The image has a parent that is not analyzed yet with this engine. - # Get the parent to get it's own parent and recurse. + try: + self._analyze_recursively_and_check(layer) + except MissingParentLayerException: + # The parent layer of this layer was missing. Force a reanalyze. + try: + self._analyze_recursively_and_check(layer, force_parents=True) + except MissingParentLayerException: + # Parent is still missing... mark the layer as invalid. + if not set_secscan_status(layer, False, self._target_version): + raise PreemptedException + + def _analyze_recursively_and_check(self, layer, force_parents=False): + """ Analyzes a layer and all its parents, optionally forcing parents to be reanalyzed, + and checking for various exceptions that can occur during analysis. + """ + try: + self._analyze_recursively(layer, force_parents=force_parents) + except InvalidLayerException: + # One of the parent layers is invalid, so this layer is invalid as well. + if not set_secscan_status(layer, False, self._target_version): + raise PreemptedException + except AnalyzeLayerRetryException: + # Something went wrong when trying to analyze the layer, but we should retry, so leave + # the layer unindexed. Another worker will come along and handle it. + pass + except MissingParentLayerException: + # Pass upward, as missing parent is handled in the analyze_recursively method. + raise + except AnalyzeLayerException: + # Something went wrong when trying to analyze the layer and we cannot retry, so mark the + # layer as invalid. + if not set_secscan_status(layer, False, self._target_version): + raise PreemptedException + + def _analyze_recursively(self, layer, force_parents=False): + # Check if there is a parent layer that needs to be analyzed. + if layer.parent_id and (force_parents or + layer.parent.security_indexed_engine < self._target_version): try: base_query = get_image_with_storage_and_parent_base() parent_layer = base_query.where(Image.id == layer.parent_id).get() except Image.DoesNotExist: logger.warning("Image %s has Image %s as parent but doesn't exist.", layer.id, layer.parent_id) + raise AnalyzeLayerException('Parent image not found') - return False, set_secscan_status(layer, False, self._target_version) + self._analyze_recursively(parent_layer, force_parents=force_parents) - cont, _ = self.analyze_recursively(parent_layer) - if not cont: - # The analysis failed for some reason and did not mark the layer as failed, - # thus we should not try to analyze the children of that layer. - # Interrupt the recursive analysis and return as no-one pre-empted us. - return False, True + # Analyze the layer itself. + self._analyze(layer, force_parents=force_parents) - # Now we know all parents are analyzed. - return self._analyze(layer) - - - def _analyze(self, layer): + def _analyze(self, layer, force_parents=False): """ Analyzes a single layer. Return a tuple of two bools: @@ -63,33 +92,30 @@ class LayerAnalyzer(object): """ # If the parent couldn't be analyzed with the target version or higher, we can't analyze # this image. Mark it as failed with the current target version. - if (layer.parent_id and not layer.parent.security_indexed and - layer.parent.security_indexed_engine >= self._target_version): - return True, set_secscan_status(layer, False, self._target_version) + if not force_parents and (layer.parent_id and not layer.parent.security_indexed and + layer.parent.security_indexed_engine >= self._target_version): + if not set_secscan_status(layer, False, self._target_version): + raise PreemptedException + + # Nothing more to do. + return # Analyze the image. previously_security_indexed_successfully = layer.security_indexed previous_security_indexed_engine = layer.security_indexed_engine logger.info('Analyzing layer %s', layer.docker_image_id) - (analyzed_version, should_requeue) = self._api.analyze_layer(layer) + analyzed_version = self._api.analyze_layer(layer) - # If analysis failed, then determine whether we need to requeue. - if not analyzed_version: - if should_requeue: - # If the layer needs to be requeued, return that the children cannot be analyzed (at this - # time) and there was no collision with another worker. - return False, False - else: - # If the layer cannot be requeued, we allow the children to be analyzed, because the code - # path above will mark them as not analyzable, and we mark the image itself as not being - # analyzable. - return True, set_secscan_status(layer, False, self._target_version) - - # Mark the image as analyzed. logger.info('Analyzed layer %s successfully with version %s', layer.docker_image_id, analyzed_version) - set_status = set_secscan_status(layer, True, analyzed_version) + + # Mark the image as analyzed. + if not set_secscan_status(layer, True, analyzed_version): + # If the image was previously successfully marked as resolved, then set_secscan_status + # might return False because we're not changing it (since this is a fixup). + if not previously_security_indexed_successfully: + raise PreemptedException # If we are the one who've done the job successfully first, then we need to decide if we should # send notifications. Notifications are sent if: @@ -99,9 +125,7 @@ class LayerAnalyzer(object): # feature set in the security scanner, notifications will be spammy. is_new_image = previous_security_indexed_engine == IMAGE_NOT_SCANNED_ENGINE_VERSION is_existing_image_unindexed = not is_new_image and not previously_security_indexed_successfully - if (features.SECURITY_NOTIFICATIONS and set_status and - (is_new_image or is_existing_image_unindexed)): - + if (features.SECURITY_NOTIFICATIONS and (is_new_image or is_existing_image_unindexed)): # Get the tags of the layer we analyzed. repository_map = defaultdict(list) event = ExternalNotificationEvent.get(name='vulnerability_found') @@ -152,5 +176,3 @@ class LayerAnalyzer(object): }) spawn_notification(repository, 'vulnerability_found', event_data) - - return True, set_status diff --git a/util/secscan/api.py b/util/secscan/api.py index 2e04067ea..c2f62f3c2 100644 --- a/util/secscan/api.py +++ b/util/secscan/api.py @@ -15,12 +15,24 @@ from util import get_app_url TOKEN_VALIDITY_LIFETIME_S = 60 # Amount of time the security scanner has to call the layer URL +UNKNOWN_PARENT_LAYER_ERROR_MSG = 'worker: parent layer is unknown, it must be processed first' logger = logging.getLogger(__name__) class AnalyzeLayerException(Exception): - """ Exception raised when a layer fails to analyze due to a *client-side* issue. """ + """ Exception raised when a layer fails to analyze due to a request issue. """ + +class AnalyzeLayerRetryException(Exception): + """ Exception raised when a layer fails to analyze due to a request issue, and the request should + be retried. + """ + +class MissingParentLayerException(AnalyzeLayerException): + """ Exception raised when the parent of the layer is missing from the security scanner. """ + +class InvalidLayerException(AnalyzeLayerException): + """ Exception raised when the layer itself cannot be handled by the security scanner. """ class APIRequestFailure(Exception): """ Exception raised when there is a failure to conduct an API request. """ @@ -142,12 +154,12 @@ class SecurityScannerAPI(object): def analyze_layer(self, layer): """ Posts the given layer to the security scanner for analysis, blocking until complete. - Returns a tuple containing the analysis version (on success, None on failure) and - whether the request should be retried. + Returns the analysis version on success or raises an exception deriving from + AnalyzeLayerException on failure. Callers should handle all cases of AnalyzeLayerException. """ request = self._new_analyze_request(layer) if not request: - return None, False + raise AnalyzeLayerException logger.info('Analyzing layer %s', request['Layer']['Name']) try: @@ -155,13 +167,13 @@ class SecurityScannerAPI(object): json_response = response.json() except requests.exceptions.Timeout: logger.exception('Timeout when trying to post layer data response for %s', layer.id) - return None, True + raise AnalyzeLayerRetryException except requests.exceptions.ConnectionError: logger.exception('Connection error when trying to post layer data response for %s', layer.id) - return None, True + raise AnalyzeLayerRetryException except (requests.exceptions.RequestException, ValueError) as re: - logger.exception('Failed to post layer data response for %s', layer.id) - return None, False + logger.exception('Failed to post layer data response for %s: %s', layer.id, re) + raise AnalyzeLayerException # Handle any errors from the security scanner. if response.status_code != 201: @@ -171,17 +183,23 @@ class SecurityScannerAPI(object): # 400 means the layer could not be analyzed due to a bad request. if response.status_code == 400: - logger.error('Bad request when calling security scanner for layer %s: %s', - response.status_code, json_response) - raise AnalyzeLayerException('Bad request to security scanner') + if message == UNKNOWN_PARENT_LAYER_ERROR_MSG: + raise MissingParentLayerException('Bad request to security scanner: %s' % message) + else: + raise AnalyzeLayerException('Bad request to security scanner: %s' % message) # 422 means that the layer could not be analyzed: - # - the layer could not be extracted (manifest?) + # - the layer could not be extracted (might be a manifest or an invalid .tar.gz) # - the layer operating system / package manager is unsupported - return None, response.status_code != 422 + elif response.status_code == 422: + raise InvalidLayerException - api_version = json_response['Layer']['IndexedByVersion'] - return api_version, False + # Otherwise, it is some other error and we should retry. + else: + raise AnalyzeLayerRetryException + + # Return the parsed API version. + return json_response['Layer']['IndexedByVersion'] def check_layer_vulnerable(self, layer_id, cve_name): diff --git a/util/secscan/fake.py b/util/secscan/fake.py index 6b1efc894..a741868c5 100644 --- a/util/secscan/fake.py +++ b/util/secscan/fake.py @@ -5,6 +5,7 @@ import urlparse from contextlib import contextmanager from httmock import urlmatch, HTTMock, all_requests +from util.secscan.api import UNKNOWN_PARENT_LAYER_ERROR_MSG @contextmanager def fake_security_scanner(hostname='fakesecurityscanner'): @@ -29,6 +30,7 @@ class FakeSecurityScanner(object): self.fail_layer_id = None self.internal_error_layer_id = None + self.error_layer_id = None def set_fail_layer_id(self, fail_layer_id): """ Sets a layer ID that, if encountered when the analyze call is made, causes a 422 @@ -42,6 +44,12 @@ class FakeSecurityScanner(object): """ self.internal_error_layer_id = internal_error_layer_id + def set_error_layer_id(self, error_layer_id): + """ Sets a layer ID that, if encountered when the analyze call is made, causes a 400 + to be raised. + """ + self.error_layer_id = error_layer_id + def has_layer(self, layer_id): """ Returns true if the layer with the given ID has been analyzed. """ return layer_id in self.layers @@ -192,6 +200,12 @@ class FakeSecurityScanner(object): 'content': json.dumps({'Error': {'Message': 'Cannot analyze'}}), } + if layer['Name'] == self.error_layer_id: + return { + 'status_code': 400, + 'content': json.dumps({'Error': {'Message': 'Some sort of error'}}), + } + parent_id = layer.get('ParentName', None) parent_layer = None @@ -200,7 +214,7 @@ class FakeSecurityScanner(object): if parent_layer is None: return { 'status_code': 400, - 'content': json.dumps({'Error': {'Message': 'Unknown parent'}}), + 'content': json.dumps({'Error': {'Message': UNKNOWN_PARENT_LAYER_ERROR_MSG}}), } self.add_layer(layer['Name']) diff --git a/workers/securityworker.py b/workers/securityworker.py index 4a2aab14a..a4d72c5ec 100644 --- a/workers/securityworker.py +++ b/workers/securityworker.py @@ -9,7 +9,7 @@ from data.database import UseThenDisconnect from data.model.image import (get_images_eligible_for_scan, get_max_id_for_sec_scan, get_min_id_for_sec_scan, get_image_id) from util.secscan.api import SecurityConfigValidator -from util.secscan.analyzer import LayerAnalyzer +from util.secscan.analyzer import LayerAnalyzer, PreemptedException from util.migrate.allocator import yield_random_entries from endpoints.v2 import v2_bp @@ -48,8 +48,9 @@ class SecurityWorker(Worker): with UseThenDisconnect(app.config): for candidate, abt in yield_random_entries(batch_query, get_image_id(), BATCH_SIZE, max_id, self._min_id): - _, continue_batch = self._analyzer.analyze_recursively(candidate) - if not continue_batch: + try: + self._analyzer.analyze_recursively(candidate) + except PreemptedException: logger.info('Another worker pre-empted us for layer: %s', candidate.id) abt.set()