This repository has been archived on 2020-03-24. You can view files and clone it, but cannot push or open issues or pull requests.
quay/workers/securityworker.py

313 lines
11 KiB
Python
Raw Normal View History

import logging
2015-10-27 21:38:48 +00:00
import logging.config
import requests
import features
import time
import os
import random
2015-10-27 21:38:48 +00:00
from endpoints.notificationhelper import spawn_notification
from collections import defaultdict
from peewee import JOIN_LEFT_OUTER
2015-11-11 20:41:46 +00:00
from app import app, config_provider, storage, OVERRIDE_CONFIG_DIRECTORY, secscan_api
from workers.worker import Worker
2015-10-27 21:38:48 +00:00
from data.database import (Image, ImageStorage, ImageStorageLocation, ImageStoragePlacement,
db_random_func, UseThenDisconnect, RepositoryTag, Repository,
ExternalNotificationEvent, RepositoryNotification)
2015-11-11 20:41:46 +00:00
from util.secscan.api import SecurityConfigValidator
logger = logging.getLogger(__name__)
BATCH_SIZE = 20
INDEXING_INTERVAL = 10
2015-10-27 21:38:48 +00:00
API_METHOD_INSERT = '/v1/layers'
API_METHOD_VERSION = '/v1/versions/engine'
2015-11-11 20:41:46 +00:00
def _get_images_to_export_list(version):
Parent = Image.alias()
ParentImageStorage = ImageStorage.alias()
rimages = []
2015-11-11 20:41:46 +00:00
# Collect the images without parents
candidates = (Image
2015-11-13 02:59:52 +00:00
.select(Image.id, Image.docker_image_id, ImageStorage.uuid)
.join(ImageStorage)
.where(Image.security_indexed_engine < version,
Image.parent_id >> None,
ImageStorage.uploading == False)
.limit(BATCH_SIZE*10)
.alias('candidates'))
images = (Image
.select(candidates.c.id, candidates.c.docker_image_id, candidates.c.uuid)
.from_(candidates)
.order_by(db_random_func())
.tuples()
.limit(BATCH_SIZE))
for image in images:
rimages.append({'image_id': image[0],
'docker_image_id': image[1],
'storage_uuid': image[2],
'parent_docker_image_id': None,
'parent_storage_uuid': None})
2015-11-11 20:41:46 +00:00
# Collect the images with analyzed parents.
candidates = (Image
2015-11-13 02:59:52 +00:00
.select(Image.id,
Image.docker_image_id,
ImageStorage.uuid,
Parent.docker_image_id.alias('parent_docker_image_id'),
ParentImageStorage.uuid.alias('parent_storage_uuid'))
.join(Parent, on=(Image.parent_id == Parent.id))
.join(ParentImageStorage, on=(ParentImageStorage.id == Parent.storage))
.switch(Image)
.join(ImageStorage)
.where(Image.security_indexed_engine < version,
Parent.security_indexed == True,
Parent.security_indexed_engine >= version,
ImageStorage.uploading == False)
.limit(BATCH_SIZE*10)
.alias('candidates'))
images = (Image
.select(candidates.c.id,
candidates.c.docker_image_id,
candidates.c.uuid,
candidates.c.parent_docker_image_id,
candidates.c.parent_storage_uuid)
.from_(candidates)
.order_by(db_random_func())
.tuples()
.limit(BATCH_SIZE))
for image in images:
rimages.append({'image_id': image[0],
'docker_image_id': image[1],
'storage_uuid': image[2],
'parent_docker_image_id': image[3],
'parent_storage_uuid': image[4]})
2015-11-11 20:41:46 +00:00
# Shuffle the images, otherwise the images without parents will always be on the top
random.shuffle(rimages)
return rimages
def _get_storage_locations(uuid):
query = (ImageStoragePlacement
.select()
.join(ImageStorageLocation)
.switch(ImageStoragePlacement)
.join(ImageStorage, JOIN_LEFT_OUTER)
.where(ImageStorage.uuid == uuid))
2015-11-11 20:41:46 +00:00
return [location.location.name for location in query]
def _update_image(image, indexed, version):
query = (Image
.select()
.join(ImageStorage)
.where(Image.docker_image_id == image['docker_image_id'],
ImageStorage.uuid == image['storage_uuid']))
ids_to_update = [row.id for row in query]
if not ids_to_update:
return
2015-10-28 20:32:46 +00:00
(Image
2015-11-11 20:41:46 +00:00
.update(security_indexed=indexed, security_indexed_engine=version)
.where(Image.id << ids_to_update)
2015-11-11 20:41:46 +00:00
.execute())
class SecurityWorker(Worker):
def __init__(self):
super(SecurityWorker, self).__init__()
2015-11-12 22:47:19 +00:00
validator = SecurityConfigValidator(app, config_provider)
2015-11-11 20:41:46 +00:00
if validator.valid():
secscan_config = app.config.get('SECURITY_SCANNER')
self._api = secscan_config['ENDPOINT']
self._target_version = secscan_config['ENGINE_VERSION_TARGET']
self._default_storage_locations = app.config['DISTRIBUTED_STORAGE_PREFERENCE']
self._cert = validator.cert()
self._keys = validator.keypair()
self.add_operation(self._index_images, INDEXING_INTERVAL)
2015-11-12 22:02:18 +00:00
logger.warning('Failed to validate security scan configuration')
2015-11-11 20:41:46 +00:00
def _get_image_url(self, image):
""" Gets the download URL for an image and if the storage doesn't exist,
marks the image as unindexed. """
path = storage.image_layer_path(image['storage_uuid'])
locations = self._default_storage_locations
if not storage.exists(locations, path):
locations = _get_storage_locations(image['storage_uuid'])
2015-11-13 17:23:02 +00:00
if not locations or not storage.exists(locations, path):
2015-11-11 20:41:46 +00:00
logger.warning('Could not find a valid location to download layer %s',
image['docker_image_id']+'.'+image['storage_uuid'])
2015-11-12 22:47:19 +00:00
_update_image(image, False, self._target_version)
2015-11-11 20:41:46 +00:00
return None
uri = storage.get_direct_download_url(locations, path)
if uri is None:
2015-11-12 22:02:18 +00:00
# Handle local storage
local_storage_enabled = False
for storage_type, _ in app.config.get('DISTRIBUTED_STORAGE_CONFIG', {}).values():
if storage_type == 'LocalStorage':
local_storage_enabled = True
if local_storage_enabled:
uri = path
else:
logger.warning('Could not get image URL and local storage was not enabled')
return None
2015-11-11 20:41:46 +00:00
return uri
def _new_request(self, image):
url = self._get_image_url(image)
if url is None:
return None
request = {
2015-11-12 22:02:18 +00:00
'ID': '%s.%s' % (image['docker_image_id'], image['storage_uuid']),
2015-11-11 20:41:46 +00:00
'Path': url,
}
if image['parent_docker_image_id'] is not None and image['parent_storage_uuid'] is not None:
2015-11-12 22:02:18 +00:00
request['ParentID'] = '%s.%s' % (image['parent_docker_image_id'],
image['parent_storage_uuid'])
2015-11-11 20:41:46 +00:00
return request
def _analyze_image(self, image):
2015-11-12 22:47:19 +00:00
""" Analyzes an image by passing it to Clair. Returns the vulnerabilities detected
(if any) or None on error.
"""
2015-11-11 20:41:46 +00:00
request = self._new_request(image)
if request is None:
return None
2015-11-12 22:47:19 +00:00
# Analyze the image.
2015-11-11 20:41:46 +00:00
try:
logger.info('Analyzing %s', request['ID'])
# Using invalid certificates doesn't return proper errors because of
# https://github.com/shazow/urllib3/issues/556
httpResponse = requests.post(self._api + API_METHOD_INSERT, json=request,
cert=self._keys, verify=self._cert)
jsonResponse = httpResponse.json()
2015-11-12 22:02:18 +00:00
except (requests.exceptions.RequestException, ValueError):
2015-11-11 20:41:46 +00:00
logger.exception('An exception occurred when analyzing layer ID %s', request['ID'])
return None
# Handle any errors from the security scanner.
if httpResponse.status_code != 201:
2015-11-12 22:47:19 +00:00
if 'OS and/or package manager are not supported' in jsonResponse.get('Message', ''):
# The current engine could not index this layer
logger.warning('A warning event occurred when analyzing layer ID %s : %s',
request['ID'], jsonResponse['Message'])
# Hopefully, there is no version lower than the target one running
_update_image(image, False, self._target_version)
2015-11-11 20:41:46 +00:00
else:
2015-11-12 22:47:19 +00:00
logger.warning('Got non-201 when analyzing layer ID %s: %s', request['ID'], jsonResponse)
2015-11-11 20:41:46 +00:00
2015-11-12 22:47:19 +00:00
return None
2015-11-11 20:41:46 +00:00
2015-11-12 22:47:19 +00:00
# Verify that the version matches.
api_version = jsonResponse['Version']
if api_version < self._target_version:
logger.warning('An engine runs on version %d but the target version is %d')
2015-11-11 20:41:46 +00:00
2015-11-12 22:47:19 +00:00
# Mark the image as analyzed.
logger.debug('Layer %s analyzed successfully; Loading vulnerabilities for layer',
image['image_id'])
_update_image(image, True, api_version)
# Lookup the vulnerabilities for the image, now that it is analyzed.
2015-11-11 20:41:46 +00:00
try:
response = secscan_api.call('layers/%s/vulnerabilities', None, request['ID'])
logger.debug('Got response %s for vulnerabilities for layer %s',
response.status_code, image['image_id'])
if response.status_code == 404:
return None
2015-11-12 22:47:19 +00:00
except (requests.exceptions.RequestException, ValueError):
2015-11-11 20:41:46 +00:00
logger.exception('Failed to get vulnerability response for %s', image['image_id'])
return None
return response.json()
def _index_images(self):
2015-11-11 20:41:46 +00:00
logger.debug('Started indexing')
2015-10-27 21:38:48 +00:00
with UseThenDisconnect(app.config):
while True:
2015-11-12 22:47:19 +00:00
# Lookup the images to index.
2015-11-11 20:41:46 +00:00
images = []
try:
2015-11-11 20:41:46 +00:00
logger.debug('Looking up images to index')
images = _get_images_to_export_list(self._target_version)
except Image.DoesNotExist:
2015-11-11 20:41:46 +00:00
pass
2015-11-11 20:41:46 +00:00
if not images:
logger.debug('No more images left to analyze')
return
2015-10-28 20:32:46 +00:00
2015-11-12 22:47:19 +00:00
logger.debug('Found %d images to index', len(images))
2015-11-11 20:41:46 +00:00
for image in images:
2015-11-12 22:47:19 +00:00
# Analyze the image, retrieving the vulnerabilities (if any).
2015-11-11 20:41:46 +00:00
sec_data = self._analyze_image(image)
if sec_data is None:
2015-10-28 20:32:46 +00:00
continue
2015-11-13 18:16:33 +00:00
if not sec_data.get('Vulnerabilities'):
2015-10-28 20:32:46 +00:00
continue
2015-11-11 20:41:46 +00:00
# Dispatch events for any detected vulnerabilities
2015-11-12 22:47:19 +00:00
logger.debug('Got vulnerabilities for layer %s: %s', image['image_id'], sec_data)
2015-10-28 20:32:46 +00:00
event = ExternalNotificationEvent.get(name='vulnerability_found')
matching = (RepositoryTag
2015-11-11 20:41:46 +00:00
.select(RepositoryTag, Repository)
.distinct()
.join(Repository)
.join(RepositoryNotification)
.where(RepositoryNotification.event == event,
2015-11-12 22:47:19 +00:00
RepositoryTag.image == image['image_id'],
RepositoryTag.hidden == False,
RepositoryTag.lifetime_end_ts >> None))
2015-11-11 20:41:46 +00:00
2015-11-12 22:47:19 +00:00
repository_map = defaultdict(list)
2015-10-28 20:32:46 +00:00
for tag in matching:
repository_map[tag.repository_id].append(tag)
for repository_id in repository_map:
tags = repository_map[repository_id]
for vuln in sec_data['Vulnerabilities']:
event_data = {
'tags': [tag.name for tag in tags],
'vulnerability': {
'id': vuln['ID'],
'description': vuln['Description'],
'link': vuln['Link'],
'priority': vuln['Priority'],
},
}
spawn_notification(tags[0].repository, 'vulnerability_found', event_data)
if __name__ == '__main__':
if not features.SECURITY_SCANNER:
2015-11-10 18:07:47 +00:00
logger.debug('Security scanner disabled; skipping SecurityWorker')
while True:
time.sleep(100000)
2015-10-27 21:38:48 +00:00
logging.config.fileConfig('conf/logging_debug.conf', disable_existing_loggers=False)
worker = SecurityWorker()
worker.start()