This repository has been archived on 2020-03-24. You can view files and clone it, but cannot push or open issues or pull requests.
quay/workers/securityworker.py
2015-11-06 15:22:18 -05:00

218 lines
8.7 KiB
Python

import logging
import requests
import features
import time
import os
import random
from sys import exc_info
from peewee import JOIN_LEFT_OUTER
from app import app, storage, OVERRIDE_CONFIG_DIRECTORY
from workers.worker import Worker
from data.database import Image, ImageStorage, ImageStorageLocation, ImageStoragePlacement, db_random_func, UseThenDisconnect
logger = logging.getLogger(__name__)
BATCH_SIZE = 20
INDEXING_INTERVAL = 10
API_METHOD_INSERT = '/layers'
API_METHOD_VERSION = '/versions/engine'
def _get_image_to_export(version):
Parent = Image.alias()
ParentImageStorage = ImageStorage.alias()
rimages = []
# Without parent
candidates = (Image
.select(Image.docker_image_id, ImageStorage.uuid, ImageStorage.checksum)
.join(ImageStorage)
.where(Image.security_indexed_engine < version, Image.parent >> None, ImageStorage.uploading == False, ImageStorage.checksum != '')
.limit(BATCH_SIZE*10)
.alias('candidates'))
images = (Image
.select(candidates.c.docker_image_id, candidates.c.uuid, candidates.c.checksum)
.distinct()
.from_(candidates)
.order_by(db_random_func())
.tuples()
.limit(BATCH_SIZE))
for image in images:
rimages.append({'docker_image_id': image[0], 'storage_uuid': image[1], 'storage_checksum': image[2], 'parent_docker_image_id': None, 'parent_storage_uuid': None})
# With analyzed parent
candidates = (Image
.select(Image.docker_image_id, ImageStorage.uuid, ImageStorage.checksum, Parent.docker_image_id.alias('parent_docker_image_id'), ParentImageStorage.uuid.alias('parent_storage_uuid'))
.join(Parent, on=(Image.parent == Parent.id))
.join(ParentImageStorage, on=(ParentImageStorage.id == Parent.storage))
.switch(Image)
.join(ImageStorage)
.where(Image.security_indexed_engine < version, Parent.security_indexed == True, Parent.security_indexed_engine >= version, ImageStorage.uploading == False, ImageStorage.checksum != '')
.limit(BATCH_SIZE*10)
.alias('candidates'))
images = (Image
.select(candidates.c.docker_image_id, candidates.c.uuid, candidates.c.checksum, candidates.c.parent_docker_image_id, candidates.c.parent_storage_uuid)
.distinct()
.from_(candidates)
.order_by(db_random_func())
.tuples()
.limit(BATCH_SIZE))
for image in images:
rimages.append({'docker_image_id': image[0], 'storage_uuid': image[1], 'storage_checksum': image[2], 'parent_docker_image_id': None, 'parent_storage_uuid': None})
# Re-shuffle, otherwise the images without parents will always be on the top
random.shuffle(rimages)
return rimages
def _get_storage_locations(uuid):
query = (ImageStoragePlacement
.select()
.join(ImageStorageLocation)
.switch(ImageStoragePlacement)
.join(ImageStorage, JOIN_LEFT_OUTER)
.where(ImageStorage.uuid == uuid))
locations = list()
for location in query:
locations.append(location.location.name)
return locations
def _update_image(image, indexed, version):
query = (Image
.select()
.join(ImageStorage)
.where(Image.docker_image_id == image['docker_image_id'], ImageStorage.uuid == image['storage_uuid']))
updated_images = list()
for image in query:
updated_images.append(image.id)
query = (Image
.update(security_indexed=indexed, security_indexed_engine=version)
.where(Image.id << updated_images))
query.execute()
class SecurityWorker(Worker):
def __init__(self):
super(SecurityWorker, self).__init__()
if self._load_configuration():
self.add_operation(self._index_images, INDEXING_INTERVAL)
def _load_configuration(self):
# Load configuration
config = app.config.get('SECURITY_SCANNER')
if not config or not 'ENDPOINT' in config or not 'ENGINE_VERSION_TARGET' in config or not 'DISTRIBUTED_STORAGE_PREFERENCE' in app.config:
logger.exception('No configuration found for the security worker')
return False
self._api = config['ENDPOINT']
self._target_version = config['ENGINE_VERSION_TARGET']
self._default_storage_locations = app.config['DISTRIBUTED_STORAGE_PREFERENCE']
self._ca_verification = False
self._cert = None
if 'CA_CERTIFICATE_FILENAME' in config:
self._ca_verification = os.path.join(OVERRIDE_CONFIG_DIRECTORY, config['CA_CERTIFICATE_FILENAME'])
if not os.path.isfile(self._ca_verification):
logger.exception('Could not find configured CA file')
return False
if 'PRIVATE_KEY_FILENAME' in config and 'PUBLIC_KEY_FILENAME' in config:
self._cert = (
os.path.join(OVERRIDE_CONFIG_DIRECTORY, config['PUBLIC_KEY_FILENAME']),
os.path.join(OVERRIDE_CONFIG_DIRECTORY, config['PRIVATE_KEY_FILENAME']),
)
if not os.path.isfile(self._cert[0]) or not os.path.isfile(self._cert[1]):
logger.exception('Could not find configured key pair files')
return False
return True
def _index_images(self):
with UseThenDisconnect(app.config):
while True:
# Get images to analyze
try:
images = _get_image_to_export(self._target_version)
except Image.DoesNotExist:
logger.debug('No more image to analyze')
return
for img in images:
# Get layer storage URL
path = storage.image_layer_path(img['storage_uuid'])
locations = self._default_storage_locations
if not storage.exists(locations, path):
locations = _get_storage_locations(img['storage_uuid'])
if not storage.exists(locations, path):
logger.warning('Could not find a valid location to download layer %s', img['docker_image_id']+'.'+img['storage_uuid'])
# Mark as analyzed because that error is most likely to occur during the pre-process, with the database copy
# when images are actually removed on the real database (and therefore in S3)
_update_image(img, False, self._target_version)
continue
uri = storage.get_direct_download_url(locations, path)
if uri == None:
# Local storage hack
uri = path
# Forge request
request = {
'ID': img['docker_image_id']+'.'+img['storage_uuid'],
'TarSum': img['storage_checksum'],
'Path': uri
}
if img['parent_docker_image_id'] is not None:
request['ParentID'] = img['parent_docker_image_id']+'.'+img['parent_storage_uuid']
# Post request
try:
logger.info('Analyzing %s', request['ID'])
# Using invalid certificates doesn't return proper errors because of
# https://github.com/shazow/urllib3/issues/556
httpResponse = requests.post(self._api + API_METHOD_INSERT, json=request, cert=self._cert, verify=self._ca_verification)
except:
logger.exception('An exception occurred when analyzing layer ID %s : %s', request['ID'], exc_info()[0])
return
try:
jsonResponse = httpResponse.json()
except:
logger.exception('An exception occurred when analyzing layer ID %s : the response is not valid JSON (%s)', request['ID'], httpResponse.text)
return
if httpResponse.status_code == 201:
# The layer has been successfully indexed
api_version = jsonResponse['Version']
if api_version < self._target_version:
logger.warning('An engine runs on version %d but the target version is %d')
_update_image(img, True, api_version)
logger.info('Layer ID %s : analyzed successfully', request['ID'])
else:
if 'Message' in jsonResponse:
if 'OS and/or package manager are not supported' in jsonResponse['Message']:
# The current engine could not index this layer
logger.warning('A warning event occurred when analyzing layer ID %s : %s', request['ID'], jsonResponse['Message'])
# Hopefully, there is no version lower than the target one running
_update_image(img, False, self._target_version)
else:
logger.exception('An exception occurred when analyzing layer ID %s : %d %s', request['ID'], httpResponse.status_code, jsonResponse['Message'])
return
else:
logger.exception('An exception occurred when analyzing layer ID %s : %d', request['ID'], httpResponse.status_code)
return
if __name__ == '__main__':
logging.getLogger('requests').setLevel(logging.WARNING)
logging.getLogger('apscheduler').setLevel(logging.CRITICAL)
if not features.SECURITY_SCANNER:
logger.debug('Security scanner disabled; skipping')
while True:
time.sleep(100000)
worker = SecurityWorker()
worker.start()