quay/util/secscan/analyzer.py

import logging
import logging.config

import features

from collections import defaultdict

from endpoints.notificationhelper import spawn_notification
from data.database import Image, ExternalNotificationEvent
from data.model.tag import filter_tags_have_repository_event, get_tags_for_image
from data.model.image import set_secscan_status, get_image_with_storage_and_parent_base
from util.secscan.api import APIRequestFailure
from util.morecollections import AttrDict

logger = logging.getLogger(__name__)


class LayerAnalyzer(object):
  """ Helper class to perform analysis of a layer via the security scanner. """
  def __init__(self, config, api):
    self._api = api
    self._target_version = config.get('SECURITY_SCANNER_ENGINE_VERSION_TARGET', 2)


  def analyze_recursively(self, layer):
    """ Analyzes a layer and all its parents.

        Return a tuple of two bools:
          - The first one tells us if the layer and its parents analyzed successfully.
          - The second one is set to False when another call pre-empted the candidate's analysis
            for us.
    """
    if layer.parent_id and layer.parent.security_indexed_engine < self._target_version:
      # The image has a parent that is not analyzed yet with this engine.
      # Get the parent to get it's own parent and recurse.
      try:
        base_query = get_image_with_storage_and_parent_base()
        parent_layer = base_query.where(Image.id == layer.parent_id).get()
      except Image.DoesNotExist:
        logger.warning("Image %s has Image %s as parent but doesn't exist.", layer.id,
                       layer.parent_id)

        return False, set_secscan_status(layer, False, self._target_version)

      cont, _ = self.analyze_recursively(parent_layer)
      if not cont:
        # The analysis failed for some reason and did not mark the layer as failed,
        # thus we should not try to analyze the children of that layer.
        # Interrupt the recursive analysis and return as no-one pre-empted us.
        return False, True

    # Now we know all parents are analyzed.
    return self._analyze(layer)


  def _analyze(self, layer):
    """ Analyzes a single layer.

        Return a tuple of two bools:
          - The first one tells us if we should evaluate its children.
          - The second one is set to False when another worker pre-empted the candidate's analysis
            for us.
    """
    # If the parent couldn't be analyzed with the target version or higher, we can't analyze
    # this image. Mark it as failed with the current target version.
    if (layer.parent_id and not layer.parent.security_indexed and
        layer.parent.security_indexed_engine >= self._target_version):
      return True, set_secscan_status(layer, False, self._target_version)

    # Analyze the image.
    logger.info('Analyzing layer %s', layer.docker_image_id)
    (analyzed_version, should_requeue) = self._api.analyze_layer(layer)

    # If analysis failed, then determine whether we need to requeue.
    if not analyzed_version:
      if should_requeue:
        # If the layer needs to be requeued, return that the children cannot be analyzed (at this
        # time) and there was no collision with another worker.
        return False, False
      else:
        # If the layer cannot be requeued, we allow the children to be analyzed, because the code
        # path above will mark them as not analyzable, and we mark the image itself as not being
        # analyzable.
        return True, set_secscan_status(layer, False, self._target_version)

    # Mark the image as analyzed.
    logger.info('Analyzed layer %s successfully with version %s', layer.docker_image_id,
                analyzed_version)
    set_status = set_secscan_status(layer, True, analyzed_version)

    # If we are the one who've done the job successfully first, get the vulnerabilities and
    # send notifications to the repos that have a tag on that layer.
    if features.SECURITY_NOTIFICATIONS and set_status:
      # Get the tags of the layer we analyzed.
      repository_map = defaultdict(list)
      event = ExternalNotificationEvent.get(name='vulnerability_found')
      matching = list(filter_tags_have_repository_event(get_tags_for_image(layer.id), event))

      for tag in matching:
        repository_map[tag.repository_id].append(tag)

      # If there is at least one tag,
      # Lookup the vulnerabilities for the image, now that it is analyzed.
      if len(repository_map) > 0:
        logger.debug('Loading data for layer %s', layer.id)
        try:
          layer_data = self._api.get_layer_data(layer, include_vulnerabilities=True)
        except APIRequestFailure:
          layer_data = None

        if layer_data is not None:
          # Dispatch events for any detected vulnerabilities
          logger.debug('Got data for layer %s: %s', layer.id, layer_data)
          found_features = layer_data['Layer'].get('Features', [])
          for repository_id in repository_map:
            tags = repository_map[repository_id]

            for feature in found_features:
              if 'Vulnerabilities' not in feature:
                continue

              for vulnerability in feature.get('Vulnerabilities', []):
                event_data = {
                  'tags': [tag.name for tag in tags],
                  'vulnerability': {
                    'id': vulnerability['Name'],
                    'description': vulnerability.get('Description', None),
                    'link': vulnerability.get('Link', None),
                    'has_fix': 'FixedBy' in vulnerability,

                    # TODO: Change this key name if/when we change the event format.
                    'priority': vulnerability.get('Severity', 'Unknown'),
                  },
                }

                # TODO(jzelinskie): remove when more endpoints have been converted to using
                # interfaces
                repository = AttrDict({
                  'namespace_name': tags[0].repository.namespace_user.username,
                  'name': tags[0].repository.name,
                })

                spawn_notification(repository, 'vulnerability_found', event_data)

    return True, set_status