Refactor the security worker and API calls and add a bunch of tests

This commit is contained in:
Joseph Schorr 2016-02-24 16:01:27 -05:00
parent 0183c519f7
commit c0374d71c9
17 changed files with 811 additions and 456 deletions

View file

@ -0,0 +1,90 @@
# NOTE: This objects are used directly in the external-notification-data and vulnerability-service
# on the frontend, so be careful with changing their existing keys.
PRIORITY_LEVELS = {
'Unknown': {
'title': 'Unknown',
'index': '6',
'level': 'info',
'description': 'Unknown is either a security problem that has not been assigned ' +
'to a priority yet or a priority that our system did not recognize',
'banner_required': False
},
'Negligible': {
'title': 'Negligible',
'index': '5',
'level': 'info',
'description': 'Negligible is technically a security problem, but is only theoretical ' +
'in nature, requires a very special situation, has almost no install base, ' +
'or does no real damage.',
'banner_required': False
},
'Low': {
'title': 'Low',
'index': '4',
'level': 'warning',
'description': 'Low is a security problem, but is hard to exploit due to environment, ' +
'requires a user-assisted attack, a small install base, or does very ' +
'little damage.',
'banner_required': False
},
'Medium': {
'title': 'Medium',
'value': 'Medium',
'index': '3',
'level': 'warning',
'description': 'Medium is a real security problem, and is exploitable for many people. ' +
'Includes network daemon denial of service attacks, cross-site scripting, ' +
'and gaining user privileges.',
'banner_required': False
},
'High': {
'title': 'High',
'value': 'High',
'index': '2',
'level': 'warning',
'description': 'High is a real problem, exploitable for many people in a default installation. ' +
'Includes serious remote denial of services, local root privilege escalations, ' +
'or data loss.',
'banner_required': False
},
'Critical': {
'title': 'Critical',
'value': 'Critical',
'index': '1',
'level': 'error',
'description': 'Critical is a world-burning problem, exploitable for nearly all people in ' +
'a installation of the package. Includes remote root privilege escalations, ' +
'or massive data loss.',
'banner_required': True
},
'Defcon1': {
'title': 'Defcon 1',
'value': 'Defcon1',
'index': '0',
'level': 'error',
'description': 'Defcon1 is a Critical problem which has been manually highlighted ' +
'by the Quay team. It requires immediate attention.',
'banner_required': True
}
}
def get_priority_for_index(index):
for priority in PRIORITY_LEVELS:
if PRIORITY_LEVELS[priority]['index'] == index:
return priority
return 'Unknown'

138
util/secscan/analyzer.py Normal file
View file

@ -0,0 +1,138 @@
import logging
import logging.config
from collections import defaultdict
from endpoints.notificationhelper import spawn_notification
from data.database import Image, ExternalNotificationEvent
from data.model.tag import filter_tags_have_repository_event, get_tags_for_image
from data.model.image import set_secscan_status, get_image_with_storage_and_parent_base
from util.secscan.api import APIRequestFailure
logger = logging.getLogger(__name__)
class LayerAnalyzer(object):
""" Helper class to perform analysis of a layer via the security scanner. """
def __init__(self, config, api):
secscan_config = config.get('SECURITY_SCANNER')
self._api = api
self._target_version = secscan_config['ENGINE_VERSION_TARGET']
def analyze_recursively(self, layer):
""" Analyzes a layer and all its parents.
Return a tuple of two bools:
- The first one tells us if the layer and its parents analyzed successfully.
- The second one is set to False when another call pre-empted the candidate's analysis
for us.
"""
if layer.parent_id and layer.parent.security_indexed_engine < self._target_version:
# The image has a parent that is not analyzed yet with this engine.
# Get the parent to get it's own parent and recurse.
try:
base_query = get_image_with_storage_and_parent_base()
parent_layer = base_query.where(Image.id == layer.parent_id).get()
except Image.DoesNotExist:
logger.warning("Image %s has Image %s as parent but doesn't exist.", layer.id,
layer.parent_id)
return False, set_secscan_status(layer, False, self._target_version)
cont, _ = self.analyze_recursively(parent_layer)
if not cont:
# The analysis failed for some reason and did not mark the layer as failed,
# thus we should not try to analyze the children of that layer.
# Interrupt the recursive analysis and return as no-one pre-empted us.
return False, True
# Now we know all parents are analyzed.
return self._analyze(layer)
def _analyze(self, layer):
""" Analyzes a single layer.
Return a tuple of two bools:
- The first one tells us if we should evaluate its children.
- The second one is set to False when another worker pre-empted the candidate's analysis
for us.
"""
# If the parent couldn't be analyzed with the target version or higher, we can't analyze
# this image. Mark it as failed with the current target version.
if (layer.parent_id and not layer.parent.security_indexed and
layer.parent.security_indexed_engine >= self._target_version):
return True, set_secscan_status(layer, False, self._target_version)
# Analyze the image.
logger.info('Analyzing layer %s', layer.docker_image_id)
(analyzed_version, should_requeue) = self._api.analyze_layer(layer)
# If analysis failed, then determine whether we need to requeue.
if not analyzed_version:
if should_requeue:
# If the layer needs to be requeued, return that the children cannot be analyzed (at this
# time) and there was no collision with another worker.
return False, False
else:
# If the layer cannot be requeued, we allow the children to be analyzed, because the code
# path above will mark them as not analyzable, and we mark the image itself as not being
# analyzable.
return True, set_secscan_status(layer, False, self._target_version)
# Mark the image as analyzed.
logger.info('Analyzed layer %s successfully with version %s', layer.docker_image_id,
analyzed_version)
set_status = set_secscan_status(layer, True, analyzed_version)
# If we are the one who've done the job successfully first, get the vulnerabilities and
# send notifications to the repos that have a tag on that layer.
if set_status:
# Get the tags of the layer we analyzed.
repository_map = defaultdict(list)
event = ExternalNotificationEvent.get(name='vulnerability_found')
matching = list(filter_tags_have_repository_event(get_tags_for_image(layer.id), event))
for tag in matching:
repository_map[tag.repository_id].append(tag)
# If there is at least one tag,
# Lookup the vulnerabilities for the image, now that it is analyzed.
if len(repository_map) > 0:
logger.debug('Loading data for layer %s', layer.id)
try:
layer_data = self._api.get_layer_data(layer, include_vulnerabilities=True)
except APIRequestFailure:
layer_data = None
if layer_data is not None:
# Dispatch events for any detected vulnerabilities
logger.debug('Got data for layer %s: %s', layer.id, layer_data)
found_features = layer_data['Layer']['Features']
for repository_id in repository_map:
tags = repository_map[repository_id]
for feature in found_features:
if 'Vulnerabilities' not in feature:
continue
for vulnerability in feature['Vulnerabilities']:
event_data = {
'tags': [tag.name for tag in tags],
'vulnerability': {
'id': vulnerability['Name'],
'description': vulnerability.get('Description', None),
'link': vulnerability.get('Link', None),
'has_fix': 'FixedBy' in vulnerability,
# TODO: Change this key name if/when we change the event format.
'priority': vulnerability.get('Severity', 'Unknown'),
},
}
spawn_notification(tags[0].repository, 'vulnerability_found', event_data)
return True, set_status

View file

@ -1,205 +1,180 @@
import features
import logging
import requests
from data.database import CloseForLongOperation
from data import model
from data.model.storage import get_storage_locations
from urlparse import urljoin
from util.secscan.validator import SecurityConfigValidator
logger = logging.getLogger(__name__)
# NOTE: This objects are used directly in the external-notification-data and vulnerability-service
# on the frontend, so be careful with changing their existing keys.
PRIORITY_LEVELS = {
'Unknown': {
'title': 'Unknown',
'index': '6',
'level': 'info',
class AnalyzeLayerException(Exception):
""" Exception raised when a layer fails to analyze due to a *client-side* issue. """
'description': 'Unknown is either a security problem that has not been assigned ' +
'to a priority yet or a priority that our system did not recognize',
'banner_required': False
},
'Negligible': {
'title': 'Negligible',
'index': '5',
'level': 'info',
'description': 'Negligible is technically a security problem, but is only theoretical ' +
'in nature, requires a very special situation, has almost no install base, ' +
'or does no real damage.',
'banner_required': False
},
'Low': {
'title': 'Low',
'index': '4',
'level': 'warning',
'description': 'Low is a security problem, but is hard to exploit due to environment, ' +
'requires a user-assisted attack, a small install base, or does very ' +
'little damage.',
'banner_required': False
},
'Medium': {
'title': 'Medium',
'value': 'Medium',
'index': '3',
'level': 'warning',
'description': 'Medium is a real security problem, and is exploitable for many people. ' +
'Includes network daemon denial of service attacks, cross-site scripting, ' +
'and gaining user privileges.',
'banner_required': False
},
'High': {
'title': 'High',
'value': 'High',
'index': '2',
'level': 'warning',
'description': 'High is a real problem, exploitable for many people in a default installation. ' +
'Includes serious remote denial of services, local root privilege escalations, ' +
'or data loss.',
'banner_required': False
},
'Critical': {
'title': 'Critical',
'value': 'Critical',
'index': '1',
'level': 'error',
'description': 'Critical is a world-burning problem, exploitable for nearly all people in ' +
'a installation of the package. Includes remote root privilege escalations, ' +
'or massive data loss.',
'banner_required': True
},
'Defcon1': {
'title': 'Defcon 1',
'value': 'Defcon1',
'index': '0',
'level': 'error',
'description': 'Defcon1 is a Critical problem which has been manually highlighted ' +
'by the Quay team. It requires immediate attention.',
'banner_required': True
}
}
class APIRequestFailure(Exception):
""" Exception raised when there is a failure to conduct an API request. """
def get_priority_for_index(index):
for priority in PRIORITY_LEVELS:
if PRIORITY_LEVELS[priority]['index'] == index:
return priority
return 'Unknown'
class SecurityConfigValidator(object):
def __init__(self, app, config_provider):
self._config_provider = config_provider
if not features.SECURITY_SCANNER:
return
self._security_config = app.config['SECURITY_SCANNER']
if self._security_config is None:
return
self._certificate = self._get_filepath('CA_CERTIFICATE_FILENAME') or False
self._public_key = self._get_filepath('PUBLIC_KEY_FILENAME')
self._private_key = self._get_filepath('PRIVATE_KEY_FILENAME')
if self._public_key and self._private_key:
self._keys = (self._public_key, self._private_key)
else:
self._keys = None
def _get_filepath(self, key):
config = self._security_config
if key in config:
with self._config_provider.get_volume_file(config[key]) as f:
return f.name
return None
def cert(self):
return self._certificate
def keypair(self):
return self._keys
def valid(self):
if not features.SECURITY_SCANNER:
return False
if not self._security_config:
logger.debug('Missing SECURITY_SCANNER block in configuration')
return False
if not 'ENDPOINT' in self._security_config:
logger.debug('Missing ENDPOINT field in SECURITY_SCANNER configuration')
return False
endpoint = self._security_config['ENDPOINT'] or ''
if not endpoint.startswith('http://') and not endpoint.startswith('https://'):
logger.debug('ENDPOINT field in SECURITY_SCANNER configuration must start with http or https')
return False
if endpoint.startswith('https://') and (self._certificate is False or self._keys is None):
logger.debug('Certificate and key pair required for talking to security worker over HTTPS')
return False
return True
_API_METHOD_INSERT = 'layers'
_API_METHOD_GET_LAYER = 'layers/%s'
_API_METHOD_GET_WITH_VULNERABILITIES_FLAG = '?vulnerabilities'
_API_METHOD_GET_WITH_FEATURES_FLAG = '?features'
class SecurityScannerAPI(object):
""" Helper class for talking to the Security Scan service (Clair). """
def __init__(self, app, config_provider):
self.app = app
def __init__(self, config, config_provider, storage):
self.config = config
self.config_provider = config_provider
self._storage = storage
self._security_config = None
config_validator = SecurityConfigValidator(app, config_provider)
config_validator = SecurityConfigValidator(config, config_provider)
if not config_validator.valid():
logger.warning('Invalid config provided to SecurityScannerAPI')
return
self._security_config = app.config.get('SECURITY_SCANNER')
self._default_storage_locations = config['DISTRIBUTED_STORAGE_PREFERENCE']
self._security_config = config.get('SECURITY_SCANNER')
self._target_version = self._security_config['ENGINE_VERSION_TARGET']
self._certificate = config_validator.cert()
self._keys = config_validator.keypair()
def check_layer_vulnerable(self, layer_id, cve_id):
""" Checks with Clair whether the given layer is vulnerable to the given CVE. """
try:
body = {
'LayersIDs': [layer_id]
def _get_image_url(self, image):
""" Gets the download URL for an image and if the storage doesn't exist,
returns None.
"""
path = model.storage.get_layer_path(image.storage)
locations = self._default_storage_locations
if not self._storage.exists(locations, path):
locations = get_storage_locations(image.storage.uuid)
if not locations or not self._storage.exists(locations, path):
logger.warning('Could not find a valid location to download layer %s.%s out of %s',
image.docker_image_id, image.storage.uuid, locations)
return None
uri = self._storage.get_direct_download_url(locations, path)
if uri is None:
# Handle local storage.
local_storage_enabled = False
for storage_type, _ in self.config.get('DISTRIBUTED_STORAGE_CONFIG', {}).values():
if storage_type == 'LocalStorage':
local_storage_enabled = True
if local_storage_enabled:
# TODO: fix to use the proper local storage path.
uri = path
else:
logger.warning('Could not get image URL and local storage was not enabled')
return None
return uri
def _new_analyze_request(self, image):
""" Create the request body to submit the given image for analysis. If the image's URL cannot
be found, returns None.
"""
url = self._get_image_url(image)
if url is None:
return None
request = {
'Layer': {
'Name': '%s.%s' % (image.docker_image_id, image.storage.uuid),
'Path': url,
'Format': 'Docker'
}
response = self.call('vulnerabilities/%s/affected-layers', body, cve_id)
except requests.exceptions.RequestException:
logger.exception('Got exception when trying to call Clair endpoint')
return False
}
if response.status_code != 200:
return False
if image.parent.docker_image_id and image.parent.storage.uuid:
request['Layer']['ParentName'] = '%s.%s' % (image.parent.docker_image_id,
image.parent.storage.uuid)
return request
def analyze_layer(self, layer):
""" Posts the given layer to the security scanner for analysis, blocking until complete.
Returns a tuple containing the analysis version (on success, None on failure) and
whether the request should be retried.
"""
request = self._new_analyze_request(layer)
if not request:
return None, False
logger.info('Analyzing layer %s', request['Layer']['Name'])
try:
response_data = response.json()
except ValueError:
logger.exception('Got exception when trying to parse Clair response')
return False
response = self._call(_API_METHOD_INSERT, request)
json_response = response.json()
except requests.exceptions.Timeout:
logger.exception('Timeout when trying to post layer data response for %s', layer.id)
return None, True
except requests.exceptions.ConnectionError:
logger.exception('Connection error when trying to post layer data response for %s', layer.id)
return None, True
except (requests.exceptions.RequestException, ValueError):
logger.exception('Failed to post layer data response for %s', layer.id)
return None, False
if (not layer_id in response_data or
not response_data[layer_id].get('Vulnerable', False)):
return False
# Handle any errors from the security scanner.
if response.status_code != 201:
message = json_response.get('Error').get('Message', '')
logger.warning('A warning event occurred when analyzing layer %s (status code %s): %s',
request['Layer']['Name'], response.status_code, message)
return True
# 400 means the layer could not be analyzed due to a bad request.
if response.status_code == 400:
logger.error('Bad request when calling security scanner for layer %s: %s',
response.status_code, json_response)
raise AnalyzeLayerException('Bad request to security scanner')
def call(self, relative_url, body=None, *args, **kwargs):
# 422 means that the layer could not be analyzed:
# - the layer could not be extracted (manifest?)
# - the layer operating system / package manager is unsupported
return None, response.status_code != 422
api_version = json_response['Layer']['IndexedByVersion']
return api_version, False
def get_layer_data(self, layer, include_features=False, include_vulnerabilities=False):
""" Returns the layer data for the specified layer. On error, returns None. """
layer_id = '%s.%s' % (layer.docker_image_id, layer.storage.uuid)
try:
flag = ''
if include_features:
flag = _API_METHOD_GET_WITH_FEATURES_FLAG
if include_vulnerabilities:
flag = _API_METHOD_GET_WITH_VULNERABILITIES_FLAG
response = self._call(_API_METHOD_GET_LAYER + flag, None, layer_id)
logger.debug('Got response %s for vulnerabilities for layer %s',
response.status_code, layer_id)
except requests.exceptions.Timeout:
raise APIRequestFailure('API call timed out')
except requests.exceptions.ConnectionError:
raise APIRequestFailure('Could not connect to security service')
except (requests.exceptions.RequestException, ValueError):
logger.exception('Failed to get layer data response for %s', layer.id)
raise APIRequestFailure()
if response.status_code == 404:
return None
return response.json()
def _call(self, relative_url, body=None, *args, **kwargs):
""" Issues an HTTP call to the sec API at the given relative URL.
This function disconnects from the database while awaiting a response
from the API server.
@ -211,14 +186,16 @@ class SecurityScannerAPI(object):
api_url = urljoin(security_config['ENDPOINT'], '/' + security_config['API_VERSION']) + '/'
url = urljoin(api_url, relative_url % args)
client = self.app.config['HTTPCLIENT']
client = self.config['HTTPCLIENT']
timeout = security_config.get('API_TIMEOUT_SECONDS', 1)
logger.debug('Looking up sec information: %s', url)
with CloseForLongOperation(self.app.config):
with CloseForLongOperation(self.config):
if body is not None:
logger.debug('POSTing security URL %s', url)
return client.post(url, json=body, params=kwargs, timeout=timeout, cert=self._keys,
verify=self._certificate)
else:
logger.debug('GETing security URL %s', url)
return client.get(url, params=kwargs, timeout=timeout, cert=self._keys,
verify=self._certificate)

65
util/secscan/validator.py Normal file
View file

@ -0,0 +1,65 @@
import features
import logging
logger = logging.getLogger(__name__)
class SecurityConfigValidator(object):
""" Helper class for validating the security scanner configuration. """
def __init__(self, config, config_provider):
self._config_provider = config_provider
if not features.SECURITY_SCANNER:
return
self._security_config = config['SECURITY_SCANNER']
if self._security_config is None:
return
self._certificate = self._get_filepath('CA_CERTIFICATE_FILENAME') or False
self._public_key = self._get_filepath('PUBLIC_KEY_FILENAME')
self._private_key = self._get_filepath('PRIVATE_KEY_FILENAME')
if self._public_key and self._private_key:
self._keys = (self._public_key, self._private_key)
else:
self._keys = None
def _get_filepath(self, key):
config = self._security_config
if key in config:
with self._config_provider.get_volume_file(config[key]) as f:
return f.name
return None
def cert(self):
return self._certificate
def keypair(self):
return self._keys
def valid(self):
if not features.SECURITY_SCANNER:
return False
if not self._security_config:
logger.debug('Missing SECURITY_SCANNER block in configuration')
return False
if not 'ENDPOINT' in self._security_config:
logger.debug('Missing ENDPOINT field in SECURITY_SCANNER configuration')
return False
endpoint = self._security_config['ENDPOINT'] or ''
if not endpoint.startswith('http://') and not endpoint.startswith('https://'):
logger.debug('ENDPOINT field in SECURITY_SCANNER configuration must start with http or https')
return False
if endpoint.startswith('https://') and (self._certificate is False or self._keys is None):
logger.debug('Certificate and key pair required for talking to security worker over HTTPS')
return False
return True