import logging import logging.config import features from collections import defaultdict from endpoints.notificationhelper import spawn_notification from data.database import Image, ExternalNotificationEvent from data.model.tag import filter_tags_have_repository_event, get_tags_for_image from data.model.image import set_secscan_status, get_image_with_storage_and_parent_base from util.secscan.api import APIRequestFailure from util.morecollections import AttrDict logger = logging.getLogger(__name__) class LayerAnalyzer(object): """ Helper class to perform analysis of a layer via the security scanner. """ def __init__(self, config, api): self._api = api self._target_version = config.get('SECURITY_SCANNER_ENGINE_VERSION_TARGET', 2) def analyze_recursively(self, layer): """ Analyzes a layer and all its parents. Return a tuple of two bools: - The first one tells us if the layer and its parents analyzed successfully. - The second one is set to False when another call pre-empted the candidate's analysis for us. """ if layer.parent_id and layer.parent.security_indexed_engine < self._target_version: # The image has a parent that is not analyzed yet with this engine. # Get the parent to get it's own parent and recurse. try: base_query = get_image_with_storage_and_parent_base() parent_layer = base_query.where(Image.id == layer.parent_id).get() except Image.DoesNotExist: logger.warning("Image %s has Image %s as parent but doesn't exist.", layer.id, layer.parent_id) return False, set_secscan_status(layer, False, self._target_version) cont, _ = self.analyze_recursively(parent_layer) if not cont: # The analysis failed for some reason and did not mark the layer as failed, # thus we should not try to analyze the children of that layer. # Interrupt the recursive analysis and return as no-one pre-empted us. return False, True # Now we know all parents are analyzed. return self._analyze(layer) def _analyze(self, layer): """ Analyzes a single layer. Return a tuple of two bools: - The first one tells us if we should evaluate its children. - The second one is set to False when another worker pre-empted the candidate's analysis for us. """ # If the parent couldn't be analyzed with the target version or higher, we can't analyze # this image. Mark it as failed with the current target version. if (layer.parent_id and not layer.parent.security_indexed and layer.parent.security_indexed_engine >= self._target_version): return True, set_secscan_status(layer, False, self._target_version) # Analyze the image. logger.info('Analyzing layer %s', layer.docker_image_id) (analyzed_version, should_requeue) = self._api.analyze_layer(layer) # If analysis failed, then determine whether we need to requeue. if not analyzed_version: if should_requeue: # If the layer needs to be requeued, return that the children cannot be analyzed (at this # time) and there was no collision with another worker. return False, False else: # If the layer cannot be requeued, we allow the children to be analyzed, because the code # path above will mark them as not analyzable, and we mark the image itself as not being # analyzable. return True, set_secscan_status(layer, False, self._target_version) # Mark the image as analyzed. logger.info('Analyzed layer %s successfully with version %s', layer.docker_image_id, analyzed_version) set_status = set_secscan_status(layer, True, analyzed_version) # If we are the one who've done the job successfully first, get the vulnerabilities and # send notifications to the repos that have a tag on that layer. if features.SECURITY_NOTIFICATIONS and set_status: # Get the tags of the layer we analyzed. repository_map = defaultdict(list) event = ExternalNotificationEvent.get(name='vulnerability_found') matching = list(filter_tags_have_repository_event(get_tags_for_image(layer.id), event)) for tag in matching: repository_map[tag.repository_id].append(tag) # If there is at least one tag, # Lookup the vulnerabilities for the image, now that it is analyzed. if len(repository_map) > 0: logger.debug('Loading data for layer %s', layer.id) try: layer_data = self._api.get_layer_data(layer, include_vulnerabilities=True) except APIRequestFailure: layer_data = None if layer_data is not None: # Dispatch events for any detected vulnerabilities logger.debug('Got data for layer %s: %s', layer.id, layer_data) found_features = layer_data['Layer'].get('Features', []) for repository_id in repository_map: tags = repository_map[repository_id] for feature in found_features: if 'Vulnerabilities' not in feature: continue for vulnerability in feature.get('Vulnerabilities', []): event_data = { 'tags': [tag.name for tag in tags], 'vulnerability': { 'id': vulnerability['Name'], 'description': vulnerability.get('Description', None), 'link': vulnerability.get('Link', None), 'has_fix': 'FixedBy' in vulnerability, # TODO: Change this key name if/when we change the event format. 'priority': vulnerability.get('Severity', 'Unknown'), }, } # TODO(jzelinskie): remove when more endpoints have been converted to using # interfaces repository = AttrDict({ 'namespace_name': tags[0].repository.namespace_user.username, 'name': tags[0].repository.name, }) spawn_notification(repository, 'vulnerability_found', event_data) return True, set_status