This repository has been archived on 2020-03-24. You can view files and clone it, but cannot push or open issues or pull requests.
quay/util/secscan/analyzer.py
Kenny Lee Sin Cheong 203c0b76e0 Raise an APIRequestFailure exception when security scanner is unavailable
Put worker to sleep for the duration of the default indexing interval
when an APIRequestFailure occurs, when the API request fails due to a
connection error, timeout, or other ambiguous errors, from
analyze_layer or get_layer_data .
2017-05-24 11:04:44 -04:00

197 lines
8.7 KiB
Python

import logging
import logging.config
from collections import defaultdict
import features
from endpoints.notificationhelper import spawn_notification
from data.database import ExternalNotificationEvent, IMAGE_NOT_SCANNED_ENGINE_VERSION, Image
from data.model.tag import filter_tags_have_repository_event, get_tags_for_image
from data.model.image import set_secscan_status, get_image_with_storage_and_parent_base
from util.secscan import PRIORITY_LEVELS
from util.secscan.api import (APIRequestFailure, AnalyzeLayerException, MissingParentLayerException,
InvalidLayerException, AnalyzeLayerRetryException)
from util.morecollections import AttrDict
logger = logging.getLogger(__name__)
class PreemptedException(Exception):
""" Exception raised if another worker analyzed the image before this worker was able to do so.
"""
class LayerAnalyzer(object):
""" Helper class to perform analysis of a layer via the security scanner. """
def __init__(self, config, api):
self._api = api
self._target_version = config.get('SECURITY_SCANNER_ENGINE_VERSION_TARGET', 2)
def analyze_recursively(self, layer):
""" Analyzes a layer and all its parents. Raises a PreemptedException if the analysis was
preempted by another worker.
"""
try:
self._analyze_recursively_and_check(layer)
except MissingParentLayerException:
# The parent layer of this layer was missing. Force a reanalyze.
try:
self._analyze_recursively_and_check(layer, force_parents=True)
except MissingParentLayerException:
# Parent is still missing... mark the layer as invalid.
if not set_secscan_status(layer, False, self._target_version):
raise PreemptedException
def _analyze_recursively_and_check(self, layer, force_parents=False):
""" Analyzes a layer and all its parents, optionally forcing parents to be reanalyzed,
and checking for various exceptions that can occur during analysis.
"""
try:
self._analyze_recursively(layer, force_parents=force_parents)
except InvalidLayerException:
# One of the parent layers is invalid, so this layer is invalid as well.
if not set_secscan_status(layer, False, self._target_version):
raise PreemptedException
except AnalyzeLayerRetryException:
# Something went wrong when trying to analyze the layer, but we should retry, so leave
# the layer unindexed. Another worker will come along and handle it.
raise APIRequestFailure
except MissingParentLayerException:
# Pass upward, as missing parent is handled in the analyze_recursively method.
raise
except AnalyzeLayerException:
# Something went wrong when trying to analyze the layer and we cannot retry, so mark the
# layer as invalid.
logger.exception('Got exception when trying to analyze layer %s via security scanner',
layer.id)
if not set_secscan_status(layer, False, self._target_version):
raise PreemptedException
def _analyze_recursively(self, layer, force_parents=False):
# Check if there is a parent layer that needs to be analyzed.
if layer.parent_id and (force_parents or
layer.parent.security_indexed_engine < self._target_version):
try:
base_query = get_image_with_storage_and_parent_base()
parent_layer = base_query.where(Image.id == layer.parent_id).get()
except Image.DoesNotExist:
logger.warning("Image %s has Image %s as parent but doesn't exist.", layer.id,
layer.parent_id)
raise AnalyzeLayerException('Parent image not found')
self._analyze_recursively(parent_layer, force_parents=force_parents)
# Analyze the layer itself.
self._analyze(layer, force_parents=force_parents)
def _analyze(self, layer, force_parents=False):
""" Analyzes a single layer.
Return a tuple of two bools:
- The first one tells us if we should evaluate its children.
- The second one is set to False when another worker pre-empted the candidate's analysis
for us.
"""
# If the parent couldn't be analyzed with the target version or higher, we can't analyze
# this image. Mark it as failed with the current target version.
if not force_parents and (layer.parent_id and not layer.parent.security_indexed and
layer.parent.security_indexed_engine >= self._target_version):
if not set_secscan_status(layer, False, self._target_version):
raise PreemptedException
# Nothing more to do.
return
# Analyze the image.
previously_security_indexed_successfully = layer.security_indexed
previous_security_indexed_engine = layer.security_indexed_engine
logger.info('Analyzing layer %s', layer.docker_image_id)
analyzed_version = self._api.analyze_layer(layer)
logger.info('Analyzed layer %s successfully with version %s', layer.docker_image_id,
analyzed_version)
# Mark the image as analyzed.
if not set_secscan_status(layer, True, analyzed_version):
# If the image was previously successfully marked as resolved, then set_secscan_status
# might return False because we're not changing it (since this is a fixup).
if not previously_security_indexed_successfully:
raise PreemptedException
# If we are the one who've done the job successfully first, then we need to decide if we should
# send notifications. Notifications are sent if:
# 1) This is a new layer
# 2) This is an existing layer that previously did not index properly
# We don't always send notifications as if we are re-indexing a successful layer for a newer
# feature set in the security scanner, notifications will be spammy.
is_new_image = previous_security_indexed_engine == IMAGE_NOT_SCANNED_ENGINE_VERSION
is_existing_image_unindexed = not is_new_image and not previously_security_indexed_successfully
if (features.SECURITY_NOTIFICATIONS and (is_new_image or is_existing_image_unindexed)):
# Get the tags of the layer we analyzed.
repository_map = defaultdict(list)
event = ExternalNotificationEvent.get(name='vulnerability_found')
matching = list(filter_tags_have_repository_event(get_tags_for_image(layer.id), event))
for tag in matching:
repository_map[tag.repository_id].append(tag)
# If there is at least one tag,
# Lookup the vulnerabilities for the image, now that it is analyzed.
if len(repository_map) > 0:
logger.debug('Loading data for layer %s', layer.id)
try:
layer_data = self._api.get_layer_data(layer, include_vulnerabilities=True)
except APIRequestFailure:
raise
if layer_data is not None:
# Dispatch events for any detected vulnerabilities
logger.debug('Got data for layer %s: %s', layer.id, layer_data)
found_features = layer_data['Layer'].get('Features', [])
for repository_id in repository_map:
tags = repository_map[repository_id]
vulnerabilities = dict()
# Collect all the vulnerabilities found for the layer under each repository and send
# as a batch notification.
for feature in found_features:
if 'Vulnerabilities' not in feature:
continue
for vulnerability in feature.get('Vulnerabilities', []):
vuln_data = {
'id': vulnerability['Name'],
'description': vulnerability.get('Description', None),
'link': vulnerability.get('Link', None),
'has_fix': 'FixedBy' in vulnerability,
# TODO: Change this key name if/when we change the event format.
'priority': vulnerability.get('Severity', 'Unknown'),
}
vulnerabilities[vulnerability['Name']] = vuln_data
# TODO(jzelinskie): remove when more endpoints have been converted to using
# interfaces
repository = AttrDict({
'namespace_name': tags[0].repository.namespace_user.username,
'name': tags[0].repository.name,
})
repo_vulnerabilities = list(vulnerabilities.values())
if not repo_vulnerabilities:
continue
priority_key = lambda v: PRIORITY_LEVELS.get(v['priority'], {}).get('index', 100)
repo_vulnerabilities.sort(key=priority_key)
event_data = {
'tags': [tag.name for tag in tags],
'vulnerabilities': repo_vulnerabilities,
'vulnerability': repo_vulnerabilities[0], # For back-compat with existing events.
}
spawn_notification(repository, 'vulnerability_found', event_data)