179 lines
		
	
	
	
		
			7.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			179 lines
		
	
	
	
		
			7.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import logging
 | |
| import logging.config
 | |
| 
 | |
| from collections import defaultdict
 | |
| 
 | |
| import features
 | |
| 
 | |
| from endpoints.notificationhelper import spawn_notification
 | |
| from data.database import ExternalNotificationEvent, IMAGE_NOT_SCANNED_ENGINE_VERSION, Image
 | |
| from data.model.tag import filter_tags_have_repository_event, get_tags_for_image
 | |
| from data.model.image import set_secscan_status, get_image_with_storage_and_parent_base
 | |
| from util.secscan.api import (APIRequestFailure, AnalyzeLayerException, MissingParentLayerException,
 | |
|                               InvalidLayerException, AnalyzeLayerRetryException)
 | |
| from util.morecollections import AttrDict
 | |
| 
 | |
| 
 | |
| logger = logging.getLogger(__name__)
 | |
| 
 | |
| 
 | |
| class PreemptedException(Exception):
 | |
|   """ Exception raised if another worker analyzed the image before this worker was able to do so.
 | |
|   """
 | |
| 
 | |
| 
 | |
| class LayerAnalyzer(object):
 | |
|   """ Helper class to perform analysis of a layer via the security scanner. """
 | |
|   def __init__(self, config, api):
 | |
|     self._api = api
 | |
|     self._target_version = config.get('SECURITY_SCANNER_ENGINE_VERSION_TARGET', 2)
 | |
| 
 | |
|   def analyze_recursively(self, layer):
 | |
|     """ Analyzes a layer and all its parents. Raises a PreemptedException if the analysis was
 | |
|         preempted by another worker.
 | |
|     """
 | |
|     try:
 | |
|       self._analyze_recursively_and_check(layer)
 | |
|     except MissingParentLayerException:
 | |
|       # The parent layer of this layer was missing. Force a reanalyze.
 | |
|       try:
 | |
|         self._analyze_recursively_and_check(layer, force_parents=True)
 | |
|       except MissingParentLayerException:
 | |
|         # Parent is still missing... mark the layer as invalid.
 | |
|         if not set_secscan_status(layer, False, self._target_version):
 | |
|           raise PreemptedException
 | |
| 
 | |
|   def _analyze_recursively_and_check(self, layer, force_parents=False):
 | |
|     """ Analyzes a layer and all its parents, optionally forcing parents to be reanalyzed,
 | |
|         and checking for various exceptions that can occur during analysis.
 | |
|     """
 | |
|     try:
 | |
|       self._analyze_recursively(layer, force_parents=force_parents)
 | |
|     except InvalidLayerException:
 | |
|       # One of the parent layers is invalid, so this layer is invalid as well.
 | |
|       if not set_secscan_status(layer, False, self._target_version):
 | |
|         raise PreemptedException
 | |
|     except AnalyzeLayerRetryException:
 | |
|       # Something went wrong when trying to analyze the layer, but we should retry, so leave
 | |
|       # the layer unindexed. Another worker will come along and handle it.
 | |
|       pass
 | |
|     except MissingParentLayerException:
 | |
|       # Pass upward, as missing parent is handled in the analyze_recursively method.
 | |
|       raise
 | |
|     except AnalyzeLayerException:
 | |
|       # Something went wrong when trying to analyze the layer and we cannot retry, so mark the
 | |
|       # layer as invalid.
 | |
|       if not set_secscan_status(layer, False, self._target_version):
 | |
|         raise PreemptedException
 | |
| 
 | |
|   def _analyze_recursively(self, layer, force_parents=False):
 | |
|     # Check if there is a parent layer that needs to be analyzed.
 | |
|     if layer.parent_id and (force_parents or
 | |
|                             layer.parent.security_indexed_engine < self._target_version):
 | |
|       try:
 | |
|         base_query = get_image_with_storage_and_parent_base()
 | |
|         parent_layer = base_query.where(Image.id == layer.parent_id).get()
 | |
|       except Image.DoesNotExist:
 | |
|         logger.warning("Image %s has Image %s as parent but doesn't exist.", layer.id,
 | |
|                        layer.parent_id)
 | |
|         raise AnalyzeLayerException('Parent image not found')
 | |
| 
 | |
|       self._analyze_recursively(parent_layer, force_parents=force_parents)
 | |
| 
 | |
|     # Analyze the layer itself.
 | |
|     self._analyze(layer, force_parents=force_parents)
 | |
| 
 | |
|   def _analyze(self, layer, force_parents=False):
 | |
|     """ Analyzes a single layer.
 | |
| 
 | |
|         Return a tuple of two bools:
 | |
|           - The first one tells us if we should evaluate its children.
 | |
|           - The second one is set to False when another worker pre-empted the candidate's analysis
 | |
|             for us.
 | |
|     """
 | |
|     # If the parent couldn't be analyzed with the target version or higher, we can't analyze
 | |
|     # this image. Mark it as failed with the current target version.
 | |
|     if not force_parents and (layer.parent_id and not layer.parent.security_indexed and
 | |
|                               layer.parent.security_indexed_engine >= self._target_version):
 | |
|       if not set_secscan_status(layer, False, self._target_version):
 | |
|         raise PreemptedException
 | |
| 
 | |
|       # Nothing more to do.
 | |
|       return
 | |
| 
 | |
|     # Analyze the image.
 | |
|     previously_security_indexed_successfully = layer.security_indexed
 | |
|     previous_security_indexed_engine = layer.security_indexed_engine
 | |
| 
 | |
|     logger.info('Analyzing layer %s', layer.docker_image_id)
 | |
|     analyzed_version = self._api.analyze_layer(layer)
 | |
| 
 | |
|     logger.info('Analyzed layer %s successfully with version %s', layer.docker_image_id,
 | |
|                 analyzed_version)
 | |
| 
 | |
|     # Mark the image as analyzed.
 | |
|     if not set_secscan_status(layer, True, analyzed_version):
 | |
|       # If the image was previously successfully marked as resolved, then set_secscan_status
 | |
|       # might return False because we're not changing it (since this is a fixup).
 | |
|       if not previously_security_indexed_successfully:
 | |
|         raise PreemptedException
 | |
| 
 | |
|     # If we are the one who've done the job successfully first, then we need to decide if we should
 | |
|     # send notifications. Notifications are sent if:
 | |
|     #  1) This is a new layer
 | |
|     #  2) This is an existing layer that previously did not index properly
 | |
|     # We don't always send notifications as if we are re-indexing a successful layer for a newer
 | |
|     # feature set in the security scanner, notifications will be spammy.
 | |
|     is_new_image = previous_security_indexed_engine == IMAGE_NOT_SCANNED_ENGINE_VERSION
 | |
|     is_existing_image_unindexed = not is_new_image and not previously_security_indexed_successfully
 | |
|     if (features.SECURITY_NOTIFICATIONS and (is_new_image or is_existing_image_unindexed)):
 | |
|       # Get the tags of the layer we analyzed.
 | |
|       repository_map = defaultdict(list)
 | |
|       event = ExternalNotificationEvent.get(name='vulnerability_found')
 | |
|       matching = list(filter_tags_have_repository_event(get_tags_for_image(layer.id), event))
 | |
| 
 | |
|       for tag in matching:
 | |
|         repository_map[tag.repository_id].append(tag)
 | |
| 
 | |
|       # If there is at least one tag,
 | |
|       # Lookup the vulnerabilities for the image, now that it is analyzed.
 | |
|       if len(repository_map) > 0:
 | |
|         logger.debug('Loading data for layer %s', layer.id)
 | |
|         try:
 | |
|           layer_data = self._api.get_layer_data(layer, include_vulnerabilities=True)
 | |
|         except APIRequestFailure:
 | |
|           layer_data = None
 | |
| 
 | |
|         if layer_data is not None:
 | |
|           # Dispatch events for any detected vulnerabilities
 | |
|           logger.debug('Got data for layer %s: %s', layer.id, layer_data)
 | |
|           found_features = layer_data['Layer'].get('Features', [])
 | |
|           for repository_id in repository_map:
 | |
|             tags = repository_map[repository_id]
 | |
| 
 | |
|             for feature in found_features:
 | |
|               if 'Vulnerabilities' not in feature:
 | |
|                 continue
 | |
| 
 | |
|               for vulnerability in feature.get('Vulnerabilities', []):
 | |
|                 event_data = {
 | |
|                   'tags': [tag.name for tag in tags],
 | |
|                   'vulnerability': {
 | |
|                     'id': vulnerability['Name'],
 | |
|                     'description': vulnerability.get('Description', None),
 | |
|                     'link': vulnerability.get('Link', None),
 | |
|                     'has_fix': 'FixedBy' in vulnerability,
 | |
| 
 | |
|                     # TODO: Change this key name if/when we change the event format.
 | |
|                     'priority': vulnerability.get('Severity', 'Unknown'),
 | |
|                   },
 | |
|                 }
 | |
| 
 | |
|                 # TODO(jzelinskie): remove when more endpoints have been converted to using
 | |
|                 # interfaces
 | |
|                 repository = AttrDict({
 | |
|                   'namespace_name': tags[0].repository.namespace_user.username,
 | |
|                   'name': tags[0].repository.name,
 | |
|                 })
 | |
| 
 | |
|                 spawn_notification(repository, 'vulnerability_found', event_data)
 |