Refactor the security worker and API calls and add a bunch of tests
This commit is contained in:
parent
0183c519f7
commit
c0374d71c9
17 changed files with 811 additions and 456 deletions
|
@ -1,98 +1,37 @@
|
|||
import logging
|
||||
import logging.config
|
||||
|
||||
import requests
|
||||
import features
|
||||
import time
|
||||
|
||||
from peewee import fn
|
||||
from collections import defaultdict
|
||||
|
||||
from app import app, config_provider, storage, secscan_api
|
||||
from endpoints.notificationhelper import spawn_notification
|
||||
from app import app, config_provider, secscan_api
|
||||
from workers.worker import Worker
|
||||
from data import model
|
||||
from data.database import (Image, UseThenDisconnect, ExternalNotificationEvent)
|
||||
from data.model.tag import filter_tags_have_repository_event, get_tags_for_image
|
||||
from data.model.image import set_secscan_status, get_image_with_storage_and_parent_base
|
||||
from data.model.storage import get_storage_locations
|
||||
from data.database import Image, UseThenDisconnect
|
||||
from data.model.image import get_image_with_storage_and_parent_base
|
||||
from util.secscan.api import SecurityConfigValidator
|
||||
from util.secscan.analyzer import LayerAnalyzer
|
||||
from util.migrate.allocator import yield_random_entries
|
||||
|
||||
BATCH_SIZE = 50
|
||||
INDEXING_INTERVAL = 30
|
||||
API_METHOD_INSERT = '/v1/layers'
|
||||
API_METHOD_GET_WITH_VULNERABILITIES = '/v1/layers/%s?vulnerabilities'
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class SecurityWorker(Worker):
|
||||
def __init__(self):
|
||||
super(SecurityWorker, self).__init__()
|
||||
validator = SecurityConfigValidator(app, config_provider)
|
||||
validator = SecurityConfigValidator(app.config, config_provider)
|
||||
if validator.valid():
|
||||
secscan_config = app.config.get('SECURITY_SCANNER')
|
||||
self._api = secscan_config['ENDPOINT']
|
||||
self._target_version = secscan_config['ENGINE_VERSION_TARGET']
|
||||
self._default_storage_locations = app.config['DISTRIBUTED_STORAGE_PREFERENCE']
|
||||
self._cert = validator.cert()
|
||||
self._keys = validator.keypair()
|
||||
self._analyzer = LayerAnalyzer(app.config, secscan_api)
|
||||
|
||||
self.add_operation(self._index_images, INDEXING_INTERVAL)
|
||||
else:
|
||||
logger.warning('Failed to validate security scan configuration')
|
||||
|
||||
def _new_request(self, image):
|
||||
""" Create the request body to submit the given image for analysis. """
|
||||
url = self._get_image_url(image)
|
||||
if url is None:
|
||||
return None
|
||||
|
||||
request = {
|
||||
'Layer': {
|
||||
'Name': '%s.%s' % (image.docker_image_id, image.storage.uuid),
|
||||
'Path': url,
|
||||
'Format': 'Docker'
|
||||
}
|
||||
}
|
||||
|
||||
if image.parent.docker_image_id and image.parent.storage.uuid:
|
||||
request['Layer']['ParentName'] = '%s.%s' % (image.parent.docker_image_id,
|
||||
image.parent.storage.uuid)
|
||||
|
||||
return request
|
||||
|
||||
def _get_image_url(self, image):
|
||||
""" Gets the download URL for an image and if the storage doesn't exist,
|
||||
marks the image as unindexed. """
|
||||
path = model.storage.get_layer_path(image.storage)
|
||||
locations = self._default_storage_locations
|
||||
|
||||
if not storage.exists(locations, path):
|
||||
locations = get_storage_locations(image.storage.uuid)
|
||||
|
||||
if not locations or not storage.exists(locations, path):
|
||||
logger.warning('Could not find a valid location to download layer %s.%s',
|
||||
image.docker_image_id, image.storage.uuid)
|
||||
set_secscan_status(image, False, self._target_version)
|
||||
return None
|
||||
|
||||
uri = storage.get_direct_download_url(locations, path)
|
||||
if uri is None:
|
||||
# Handle local storage
|
||||
local_storage_enabled = False
|
||||
for storage_type, _ in app.config.get('DISTRIBUTED_STORAGE_CONFIG', {}).values():
|
||||
if storage_type == 'LocalStorage':
|
||||
local_storage_enabled = True
|
||||
|
||||
if local_storage_enabled:
|
||||
uri = path
|
||||
else:
|
||||
logger.warning('Could not get image URL and local storage was not enabled')
|
||||
return None
|
||||
|
||||
return uri
|
||||
|
||||
def _index_images(self):
|
||||
def batch_query():
|
||||
base_query = get_image_with_storage_and_parent_base()
|
||||
|
@ -106,144 +45,11 @@ class SecurityWorker(Worker):
|
|||
|
||||
with UseThenDisconnect(app.config):
|
||||
for candidate, abt in yield_random_entries(batch_query, Image.id, BATCH_SIZE, max_id, min_id):
|
||||
_, continue_batch = self._analyze_recursively(candidate)
|
||||
_, continue_batch = self._analyzer.analyze_recursively(candidate)
|
||||
if not continue_batch:
|
||||
logger.info('Another worker pre-empted us for layer: %s', candidate.id)
|
||||
abt.set()
|
||||
|
||||
def _analyze_recursively(self, layer):
|
||||
""" Analyzes a layer and all its parents """
|
||||
if layer.parent_id and layer.parent.security_indexed_engine < self._target_version:
|
||||
# The image has a parent that is not analyzed yet with this engine.
|
||||
# Get the parent to get it's own parent and recurse.
|
||||
try:
|
||||
base_query = get_image_with_storage_and_parent_base()
|
||||
parent_layer = base_query.where(Image.id == layer.parent_id).get()
|
||||
except Image.DoesNotExist:
|
||||
logger.warning("Image %s has Image %s as parent but doesn't exist.", layer.id,
|
||||
layer.parent_id)
|
||||
|
||||
return False, set_secscan_status(layer, False, self._target_version)
|
||||
|
||||
cont, _ = self._analyze_recursively(parent_layer)
|
||||
if not cont:
|
||||
# The analysis failed for some reason and did not mark the layer as failed,
|
||||
# thus we should not try to analyze the children of that layer.
|
||||
# Interrupt the recursive analysis and return as no-one pre-empted us.
|
||||
return False, True
|
||||
|
||||
# Now we know all parents are analyzed.
|
||||
return self._analyze(layer)
|
||||
|
||||
def _analyze(self, layer):
|
||||
""" Analyzes a single layer.
|
||||
Return two bools, the first one tells us if we should evaluate its children, the second
|
||||
one is set to False when another worker pre-empted the candidate's analysis for us. """
|
||||
|
||||
# If the parent couldn't be analyzed with the target version or higher, we can't analyze
|
||||
# this image. Mark it as failed with the current target version.
|
||||
if (layer.parent_id and not layer.parent.security_indexed and
|
||||
layer.parent.security_indexed_engine >= self._target_version):
|
||||
return True, set_secscan_status(layer, False, self._target_version)
|
||||
|
||||
request = self._new_request(layer)
|
||||
if request is None:
|
||||
return False, True
|
||||
|
||||
# Analyze the image.
|
||||
try:
|
||||
logger.info('Analyzing layer %s', request['Layer']['Name'])
|
||||
# Using invalid certificates doesn't return proper errors because of
|
||||
# https://github.com/shazow/urllib3/issues/556
|
||||
http_response = requests.post(self._api + API_METHOD_INSERT, json=request,
|
||||
cert=self._keys, verify=self._cert)
|
||||
json_response = http_response.json()
|
||||
except (requests.exceptions.RequestException, ValueError):
|
||||
logger.exception('An exception occurred when analyzing layer %s', request['Layer']['Name'])
|
||||
return False, True
|
||||
|
||||
# Handle any errors from the security scanner.
|
||||
if http_response.status_code != 201:
|
||||
message = json_response.get('Error').get('Message', '')
|
||||
logger.warning('A warning event occurred when analyzing layer %s (status code %s): %s',
|
||||
request['Layer']['Name'], http_response.status_code, message)
|
||||
|
||||
# 422 means that the layer could not be analyzed:
|
||||
# - the layer could not be extracted (manifest?)
|
||||
# - the layer operating system / package manager is unsupported
|
||||
# Set the layer as failed.
|
||||
if http_response.status_code == 422:
|
||||
return True, set_secscan_status(layer, False, self._target_version)
|
||||
else:
|
||||
return False, True
|
||||
|
||||
# Verify that the version matches.
|
||||
api_version = json_response['Layer']['IndexedByVersion']
|
||||
if api_version < self._target_version:
|
||||
logger.warning('An engine runs on version %d but the target version is %d', api_version,
|
||||
self._target_version)
|
||||
|
||||
# Mark the image as analyzed.
|
||||
logger.info('Analyzed layer %s successfully', request['Layer']['Name'])
|
||||
set_status = set_secscan_status(layer, True, api_version)
|
||||
|
||||
# If we are the one who've done the job successfully first, get the vulnerabilities and
|
||||
# send notifications to the repos that have a tag on that layer.
|
||||
# TODO(josephschorr): Adapt this depending on the new notification format we adopt.
|
||||
# if set_status:
|
||||
# # Get the tags of the layer we analyzed.
|
||||
# repository_map = defaultdict(list)
|
||||
# event = ExternalNotificationEvent.get(name='vulnerability_found')
|
||||
# matching = list(filter_tags_have_repository_event(get_tags_for_image(layer.id), event))
|
||||
#
|
||||
# for tag in matching:
|
||||
# repository_map[tag.repository_id].append(tag)
|
||||
#
|
||||
# # If there is at least one tag,
|
||||
# # Lookup the vulnerabilities for the image, now that it is analyzed.
|
||||
# if len(repository_map) > 0:
|
||||
# logger.debug('Loading vulnerabilities for layer %s', layer.id)
|
||||
# sec_data = self._get_vulnerabilities(layer)
|
||||
#
|
||||
# if sec_data is not None:
|
||||
# # Dispatch events for any detected vulnerabilities
|
||||
# logger.debug('Got vulnerabilities for layer %s: %s', layer.id, sec_data)
|
||||
#
|
||||
# for repository_id in repository_map:
|
||||
# tags = repository_map[repository_id]
|
||||
#
|
||||
# for vuln in sec_data['Vulnerabilities']:
|
||||
# event_data = {
|
||||
# 'tags': [tag.name for tag in tags],
|
||||
# 'vulnerability': {
|
||||
# 'id': vuln['Name'],
|
||||
# 'description': vuln['Description'],
|
||||
# 'link': vuln['Link'],
|
||||
# 'priority': vuln['Priority'],
|
||||
# },
|
||||
# }
|
||||
#
|
||||
# spawn_notification(tags[0].repository, 'vulnerability_found', event_data)
|
||||
|
||||
return True, set_status
|
||||
|
||||
def _get_vulnerabilities(self, layer):
|
||||
""" Returns the vulnerabilities detected (if any) or None on error. """
|
||||
try:
|
||||
response = secscan_api.call(self._api + API_METHOD_GET_WITH_VULNERABILITIES, None,
|
||||
'%s.%s' % (layer.docker_image_id, layer.storage.uuid))
|
||||
|
||||
logger.debug('Got response %s for vulnerabilities for layer %s',
|
||||
response.status_code, layer.id)
|
||||
|
||||
if response.status_code == 404:
|
||||
return None
|
||||
except (requests.exceptions.RequestException, ValueError):
|
||||
logger.exception('Failed to get vulnerability response for %s', layer.id)
|
||||
return None
|
||||
|
||||
return response.json()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if not features.SECURITY_SCANNER:
|
||||
|
|
Reference in a new issue