quay/util/secscan/analyzer.py

import logging
import logging.config

from collections import defaultdict

import features

from data.database import ExternalNotificationEvent, IMAGE_NOT_SCANNED_ENGINE_VERSION, Image
from data.model.tag import filter_tags_have_repository_event, get_tags_for_image
from data.model.image import set_secscan_status, get_image_with_storage_and_parent_base
from notifications import spawn_notification
from util.secscan import PRIORITY_LEVELS
from util.secscan.api import (APIRequestFailure, AnalyzeLayerException, MissingParentLayerException,
                              InvalidLayerException, AnalyzeLayerRetryException)
from util.morecollections import AttrDict


logger = logging.getLogger(__name__)


class PreemptedException(Exception):
  """ Exception raised if another worker analyzed the image before this worker was able to do so.
  """


class LayerAnalyzer(object):
  """ Helper class to perform analysis of a layer via the security scanner. """
  def __init__(self, config, api):
    self._api = api
    self._target_version = config.get('SECURITY_SCANNER_ENGINE_VERSION_TARGET', 2)

  def analyze_recursively(self, layer):
    """ Analyzes a layer and all its parents. Raises a PreemptedException if the analysis was
        preempted by another worker.
    """
    try:
      self._analyze_recursively_and_check(layer)
    except MissingParentLayerException:
      # The parent layer of this layer was missing. Force a reanalyze.
      try:
        self._analyze_recursively_and_check(layer, force_parents=True)
      except MissingParentLayerException:
        # Parent is still missing... mark the layer as invalid.
        if not set_secscan_status(layer, False, self._target_version):
          raise PreemptedException

  def _analyze_recursively_and_check(self, layer, force_parents=False):
    """ Analyzes a layer and all its parents, optionally forcing parents to be reanalyzed,
        and checking for various exceptions that can occur during analysis.
    """
    try:
      self._analyze_recursively(layer, force_parents=force_parents)
    except InvalidLayerException:
      # One of the parent layers is invalid, so this layer is invalid as well.
      if not set_secscan_status(layer, False, self._target_version):
        raise PreemptedException
    except AnalyzeLayerRetryException:
      # Something went wrong when trying to analyze the layer, but we should retry, so leave
      # the layer unindexed. Another worker will come along and handle it.
      raise APIRequestFailure
    except MissingParentLayerException:
      # Pass upward, as missing parent is handled in the analyze_recursively method.
      raise
    except AnalyzeLayerException:
      # Something went wrong when trying to analyze the layer and we cannot retry, so mark the
      # layer as invalid.
      logger.exception('Got exception when trying to analyze layer %s via security scanner',
                       layer.id)
      if not set_secscan_status(layer, False, self._target_version):
        raise PreemptedException

  def _analyze_recursively(self, layer, force_parents=False):
    # Check if there is a parent layer that needs to be analyzed.
    if layer.parent_id and (force_parents or
                            layer.parent.security_indexed_engine < self._target_version):
      try:
        base_query = get_image_with_storage_and_parent_base()
        parent_layer = base_query.where(Image.id == layer.parent_id).get()
      except Image.DoesNotExist:
        logger.warning("Image %s has Image %s as parent but doesn't exist.", layer.id,
                       layer.parent_id)
        raise AnalyzeLayerException('Parent image not found')

      self._analyze_recursively(parent_layer, force_parents=force_parents)

    # Analyze the layer itself.
    self._analyze(layer, force_parents=force_parents)

  def _analyze(self, layer, force_parents=False):
    """ Analyzes a single layer.

        Return a tuple of two bools:
          - The first one tells us if we should evaluate its children.
          - The second one is set to False when another worker pre-empted the candidate's analysis
            for us.
    """
    # If the parent couldn't be analyzed with the target version or higher, we can't analyze
    # this image. Mark it as failed with the current target version.
    if not force_parents and (layer.parent_id and not layer.parent.security_indexed and
                              layer.parent.security_indexed_engine >= self._target_version):
      if not set_secscan_status(layer, False, self._target_version):
        raise PreemptedException

      # Nothing more to do.
      return

    # Make sure the image's storage is not marked as uploading. If so, nothing more to do.
    if layer.storage.uploading:
      if not set_secscan_status(layer, False, self._target_version):
        raise PreemptedException

      # Nothing more to do.
      return

    # Analyze the image.
    previously_security_indexed_successfully = layer.security_indexed
    previous_security_indexed_engine = layer.security_indexed_engine

    logger.info('Analyzing layer %s', layer.docker_image_id)
    analyzed_version = self._api.analyze_layer(layer)

    logger.info('Analyzed layer %s successfully with version %s', layer.docker_image_id,
                analyzed_version)

    # Mark the image as analyzed.
    if not set_secscan_status(layer, True, analyzed_version):
      # If the image was previously successfully marked as resolved, then set_secscan_status
      # might return False because we're not changing it (since this is a fixup).
      if not previously_security_indexed_successfully:
        raise PreemptedException

    # If we are the one who've done the job successfully first, then we need to decide if we should
    # send notifications. Notifications are sent if:
    #  1) This is a new layer
    #  2) This is an existing layer that previously did not index properly
    # We don't always send notifications as if we are re-indexing a successful layer for a newer
    # feature set in the security scanner, notifications will be spammy.
    is_new_image = previous_security_indexed_engine == IMAGE_NOT_SCANNED_ENGINE_VERSION
    is_existing_image_unindexed = not is_new_image and not previously_security_indexed_successfully
    if (features.SECURITY_NOTIFICATIONS and (is_new_image or is_existing_image_unindexed)):
      # Get the tags of the layer we analyzed.
      repository_map = defaultdict(list)
      event = ExternalNotificationEvent.get(name='vulnerability_found')
      matching = list(filter_tags_have_repository_event(get_tags_for_image(layer.id), event))

      for tag in matching:
        repository_map[tag.repository_id].append(tag)

      # If there is at least one tag,
      # Lookup the vulnerabilities for the image, now that it is analyzed.
      if len(repository_map) > 0:
        logger.debug('Loading data for layer %s', layer.id)
        try:
          layer_data = self._api.get_layer_data(layer, include_vulnerabilities=True)
        except APIRequestFailure:
          raise

        if layer_data is not None:
          # Dispatch events for any detected vulnerabilities
          logger.debug('Got data for layer %s: %s', layer.id, layer_data)
          found_features = layer_data['Layer'].get('Features', [])
          for repository_id in repository_map:
            tags = repository_map[repository_id]
            vulnerabilities = dict()

            # Collect all the vulnerabilities found for the layer under each repository and send
            # as a batch notification.
            for feature in found_features:
              if 'Vulnerabilities' not in feature:
                continue

              for vulnerability in feature.get('Vulnerabilities', []):
                vuln_data = {
                  'id': vulnerability['Name'],
                  'description': vulnerability.get('Description', None),
                  'link': vulnerability.get('Link', None),
                  'has_fix': 'FixedBy' in vulnerability,

                  # TODO: Change this key name if/when we change the event format.
                  'priority': vulnerability.get('Severity', 'Unknown'),
                }

                vulnerabilities[vulnerability['Name']] = vuln_data

            # TODO: remove when more endpoints have been converted to using
            # interfaces
            repository = AttrDict({
              'namespace_name': tags[0].repository.namespace_user.username,
              'name': tags[0].repository.name,
            })

            repo_vulnerabilities = list(vulnerabilities.values())
            if not repo_vulnerabilities:
              continue

            priority_key = lambda v: PRIORITY_LEVELS.get(v['priority'], {}).get('index', 100)
            repo_vulnerabilities.sort(key=priority_key)

            event_data = {
              'tags': [tag.name for tag in tags],
              'vulnerabilities': repo_vulnerabilities,
              'vulnerability': repo_vulnerabilities[0], # For back-compat with existing events.
            }

            spawn_notification(repository, 'vulnerability_found', event_data)
initial import for Open Source 🎉 2019-11-12 16:09:47 +00:00			`import logging`
			`import logging.config`

			`from collections import defaultdict`

			`import features`

			`from data.database import ExternalNotificationEvent, IMAGE_NOT_SCANNED_ENGINE_VERSION, Image`
			`from data.model.tag import filter_tags_have_repository_event, get_tags_for_image`
			`from data.model.image import set_secscan_status, get_image_with_storage_and_parent_base`
			`from notifications import spawn_notification`
			`from util.secscan import PRIORITY_LEVELS`
			`from util.secscan.api import (APIRequestFailure, AnalyzeLayerException, MissingParentLayerException,`
			`InvalidLayerException, AnalyzeLayerRetryException)`
			`from util.morecollections import AttrDict`


			`logger = logging.getLogger(__name__)`


			`class PreemptedException(Exception):`
			`""" Exception raised if another worker analyzed the image before this worker was able to do so.`
			`"""`


			`class LayerAnalyzer(object):`
			`""" Helper class to perform analysis of a layer via the security scanner. """`
			`def __init__(self, config, api):`
			`self._api = api`
			`self._target_version = config.get('SECURITY_SCANNER_ENGINE_VERSION_TARGET', 2)`

			`def analyze_recursively(self, layer):`
			`""" Analyzes a layer and all its parents. Raises a PreemptedException if the analysis was`
			`preempted by another worker.`
			`"""`
			`try:`
			`self._analyze_recursively_and_check(layer)`
			`except MissingParentLayerException:`
			`# The parent layer of this layer was missing. Force a reanalyze.`
			`try:`
			`self._analyze_recursively_and_check(layer, force_parents=True)`
			`except MissingParentLayerException:`
			`# Parent is still missing... mark the layer as invalid.`
			`if not set_secscan_status(layer, False, self._target_version):`
			`raise PreemptedException`

			`def _analyze_recursively_and_check(self, layer, force_parents=False):`
			`""" Analyzes a layer and all its parents, optionally forcing parents to be reanalyzed,`
			`and checking for various exceptions that can occur during analysis.`
			`"""`
			`try:`
			`self._analyze_recursively(layer, force_parents=force_parents)`
			`except InvalidLayerException:`
			`# One of the parent layers is invalid, so this layer is invalid as well.`
			`if not set_secscan_status(layer, False, self._target_version):`
			`raise PreemptedException`
			`except AnalyzeLayerRetryException:`
			`# Something went wrong when trying to analyze the layer, but we should retry, so leave`
			`# the layer unindexed. Another worker will come along and handle it.`
			`raise APIRequestFailure`
			`except MissingParentLayerException:`
			`# Pass upward, as missing parent is handled in the analyze_recursively method.`
			`raise`
			`except AnalyzeLayerException:`
			`# Something went wrong when trying to analyze the layer and we cannot retry, so mark the`
			`# layer as invalid.`
			`logger.exception('Got exception when trying to analyze layer %s via security scanner',`
			`layer.id)`
			`if not set_secscan_status(layer, False, self._target_version):`
			`raise PreemptedException`

			`def _analyze_recursively(self, layer, force_parents=False):`
			`# Check if there is a parent layer that needs to be analyzed.`
			`if layer.parent_id and (force_parents or`
			`layer.parent.security_indexed_engine < self._target_version):`
			`try:`
			`base_query = get_image_with_storage_and_parent_base()`
			`parent_layer = base_query.where(Image.id == layer.parent_id).get()`
			`except Image.DoesNotExist:`
			`logger.warning("Image %s has Image %s as parent but doesn't exist.", layer.id,`
			`layer.parent_id)`
			`raise AnalyzeLayerException('Parent image not found')`

			`self._analyze_recursively(parent_layer, force_parents=force_parents)`

			`# Analyze the layer itself.`
			`self._analyze(layer, force_parents=force_parents)`

			`def _analyze(self, layer, force_parents=False):`
			`""" Analyzes a single layer.`

			`Return a tuple of two bools:`
			`- The first one tells us if we should evaluate its children.`
			`- The second one is set to False when another worker pre-empted the candidate's analysis`
			`for us.`
			`"""`
			`# If the parent couldn't be analyzed with the target version or higher, we can't analyze`
			`# this image. Mark it as failed with the current target version.`
			`if not force_parents and (layer.parent_id and not layer.parent.security_indexed and`
			`layer.parent.security_indexed_engine >= self._target_version):`
			`if not set_secscan_status(layer, False, self._target_version):`
			`raise PreemptedException`

			`# Nothing more to do.`
			`return`

			`# Make sure the image's storage is not marked as uploading. If so, nothing more to do.`
			`if layer.storage.uploading:`
			`if not set_secscan_status(layer, False, self._target_version):`
			`raise PreemptedException`

			`# Nothing more to do.`
			`return`

			`# Analyze the image.`
			`previously_security_indexed_successfully = layer.security_indexed`
			`previous_security_indexed_engine = layer.security_indexed_engine`

			`logger.info('Analyzing layer %s', layer.docker_image_id)`
			`analyzed_version = self._api.analyze_layer(layer)`

			`logger.info('Analyzed layer %s successfully with version %s', layer.docker_image_id,`
			`analyzed_version)`

			`# Mark the image as analyzed.`
			`if not set_secscan_status(layer, True, analyzed_version):`
			`# If the image was previously successfully marked as resolved, then set_secscan_status`
			`# might return False because we're not changing it (since this is a fixup).`
			`if not previously_security_indexed_successfully:`
			`raise PreemptedException`

			`# If we are the one who've done the job successfully first, then we need to decide if we should`
			`# send notifications. Notifications are sent if:`
			`# 1) This is a new layer`
			`# 2) This is an existing layer that previously did not index properly`
			`# We don't always send notifications as if we are re-indexing a successful layer for a newer`
			`# feature set in the security scanner, notifications will be spammy.`
			`is_new_image = previous_security_indexed_engine == IMAGE_NOT_SCANNED_ENGINE_VERSION`
			`is_existing_image_unindexed = not is_new_image and not previously_security_indexed_successfully`
			`if (features.SECURITY_NOTIFICATIONS and (is_new_image or is_existing_image_unindexed)):`
			`# Get the tags of the layer we analyzed.`
			`repository_map = defaultdict(list)`
			`event = ExternalNotificationEvent.get(name='vulnerability_found')`
			`matching = list(filter_tags_have_repository_event(get_tags_for_image(layer.id), event))`

			`for tag in matching:`
			`repository_map[tag.repository_id].append(tag)`

			`# If there is at least one tag,`
			`# Lookup the vulnerabilities for the image, now that it is analyzed.`
			`if len(repository_map) > 0:`
			`logger.debug('Loading data for layer %s', layer.id)`
			`try:`
			`layer_data = self._api.get_layer_data(layer, include_vulnerabilities=True)`
			`except APIRequestFailure:`
			`raise`

			`if layer_data is not None:`
			`# Dispatch events for any detected vulnerabilities`
			`logger.debug('Got data for layer %s: %s', layer.id, layer_data)`
			`found_features = layer_data['Layer'].get('Features', [])`
			`for repository_id in repository_map:`
			`tags = repository_map[repository_id]`
			`vulnerabilities = dict()`

			`# Collect all the vulnerabilities found for the layer under each repository and send`
			`# as a batch notification.`
			`for feature in found_features:`
			`if 'Vulnerabilities' not in feature:`
			`continue`

			`for vulnerability in feature.get('Vulnerabilities', []):`
			`vuln_data = {`
			`'id': vulnerability['Name'],`
			`'description': vulnerability.get('Description', None),`
			`'link': vulnerability.get('Link', None),`
			`'has_fix': 'FixedBy' in vulnerability,`

			`# TODO: Change this key name if/when we change the event format.`
			`'priority': vulnerability.get('Severity', 'Unknown'),`
			`}`

			`vulnerabilities[vulnerability['Name']] = vuln_data`

			`# TODO: remove when more endpoints have been converted to using`
			`# interfaces`
			`repository = AttrDict({`
			`'namespace_name': tags[0].repository.namespace_user.username,`
			`'name': tags[0].repository.name,`
			`})`

			`repo_vulnerabilities = list(vulnerabilities.values())`
			`if not repo_vulnerabilities:`
			`continue`

			`priority_key = lambda v: PRIORITY_LEVELS.get(v['priority'], {}).get('index', 100)`
			`repo_vulnerabilities.sort(key=priority_key)`

			`event_data = {`
			`'tags': [tag.name for tag in tags],`
			`'vulnerabilities': repo_vulnerabilities,`
			`'vulnerability': repo_vulnerabilities[0], # For back-compat with existing events.`
			`}`

			`spawn_notification(repository, 'vulnerability_found', event_data)`