This repository has been archived on 2020-03-24. You can view files and clone it, but cannot push or open issues or pull requests.
quay/workers/securityworker.py
Joseph Schorr 2d2662f53f Fix deleting repos and images under MySQL
MySQL doesn't handle constraints at the end of transactions, so deleting images currently fails. This removes the constraint and just leaves parent_id as an int
2015-11-09 14:42:05 -05:00

217 lines
8.7 KiB
Python

import logging
import requests
import features
import time
import os
import random
from sys import exc_info
from peewee import JOIN_LEFT_OUTER
from app import app, storage, OVERRIDE_CONFIG_DIRECTORY
from workers.worker import Worker
from data.database import Image, ImageStorage, ImageStorageLocation, ImageStoragePlacement, db_random_func, UseThenDisconnect
logger = logging.getLogger(__name__)
BATCH_SIZE = 20
INDEXING_INTERVAL = 10
API_METHOD_INSERT = '/layers'
API_METHOD_VERSION = '/versions/engine'
def _get_image_to_export(version):
Parent = Image.alias()
ParentImageStorage = ImageStorage.alias()
rimages = []
# Without parent
candidates = (Image
.select(Image.docker_image_id, ImageStorage.uuid, ImageStorage.checksum)
.join(ImageStorage)
.where(Image.security_indexed_engine < version, Image.parent_id >> None, ImageStorage.uploading == False, ImageStorage.checksum != '')
.limit(BATCH_SIZE*10)
.alias('candidates'))
images = (Image
.select(candidates.c.docker_image_id, candidates.c.uuid, candidates.c.checksum)
.from_(candidates)
.order_by(db_random_func())
.tuples()
.limit(BATCH_SIZE))
for image in images:
rimages.append({'docker_image_id': image[0], 'storage_uuid': image[1], 'storage_checksum': image[2], 'parent_docker_image_id': None, 'parent_storage_uuid': None})
# With analyzed parent
candidates = (Image
.select(Image.docker_image_id, ImageStorage.uuid, ImageStorage.checksum, Parent.docker_image_id.alias('parent_docker_image_id'), ParentImageStorage.uuid.alias('parent_storage_uuid'))
.join(Parent, on=(Image.parent_id == Parent.id))
.join(ParentImageStorage, on=(ParentImageStorage.id == Parent.storage))
.switch(Image)
.join(ImageStorage)
.where(Image.security_indexed_engine < version, Parent.security_indexed == True, Parent.security_indexed_engine >= version, ImageStorage.uploading == False, ImageStorage.checksum != '')
.limit(BATCH_SIZE*10)
.alias('candidates'))
images = (Image
.select(candidates.c.docker_image_id, candidates.c.uuid, candidates.c.checksum, candidates.c.parent_docker_image_id, candidates.c.parent_storage_uuid)
.from_(candidates)
.order_by(db_random_func())
.tuples()
.limit(BATCH_SIZE))
for image in images:
rimages.append({'docker_image_id': image[0], 'storage_uuid': image[1], 'storage_checksum': image[2], 'parent_docker_image_id': image[3], 'parent_storage_uuid': image[4]})
# Re-shuffle, otherwise the images without parents will always be on the top
random.shuffle(rimages)
return rimages
def _get_storage_locations(uuid):
query = (ImageStoragePlacement
.select()
.join(ImageStorageLocation)
.switch(ImageStoragePlacement)
.join(ImageStorage, JOIN_LEFT_OUTER)
.where(ImageStorage.uuid == uuid))
locations = list()
for location in query:
locations.append(location.location.name)
return locations
def _update_image(image, indexed, version):
query = (Image
.select()
.join(ImageStorage)
.where(Image.docker_image_id == image['docker_image_id'], ImageStorage.uuid == image['storage_uuid']))
updated_images = list()
for image in query:
updated_images.append(image.id)
query = (Image
.update(security_indexed=indexed, security_indexed_engine=version)
.where(Image.id << updated_images))
query.execute()
class SecurityWorker(Worker):
def __init__(self):
super(SecurityWorker, self).__init__()
if self._load_configuration():
self.add_operation(self._index_images, INDEXING_INTERVAL)
def _load_configuration(self):
# Load configuration
config = app.config.get('SECURITY_SCANNER')
if not config or not 'ENDPOINT' in config or not 'ENGINE_VERSION_TARGET' in config or not 'DISTRIBUTED_STORAGE_PREFERENCE' in app.config:
logger.exception('No configuration found for the security worker')
return False
self._api = config['ENDPOINT']
self._target_version = config['ENGINE_VERSION_TARGET']
self._default_storage_locations = app.config['DISTRIBUTED_STORAGE_PREFERENCE']
self._ca_verification = False
self._cert = None
if 'CA_CERTIFICATE_FILENAME' in config:
self._ca_verification = os.path.join(OVERRIDE_CONFIG_DIRECTORY, config['CA_CERTIFICATE_FILENAME'])
if not os.path.isfile(self._ca_verification):
logger.exception('Could not find configured CA file')
return False
if 'PRIVATE_KEY_FILENAME' in config and 'PUBLIC_KEY_FILENAME' in config:
self._cert = (
os.path.join(OVERRIDE_CONFIG_DIRECTORY, config['PUBLIC_KEY_FILENAME']),
os.path.join(OVERRIDE_CONFIG_DIRECTORY, config['PRIVATE_KEY_FILENAME']),
)
if not os.path.isfile(self._cert[0]) or not os.path.isfile(self._cert[1]):
logger.exception('Could not find configured key pair files')
return False
return True
def _index_images(self):
with UseThenDisconnect(app.config):
while True:
# Get images to analyze
try:
images = _get_image_to_export(self._target_version)
except Image.DoesNotExist:
logger.debug('No more image to analyze')
return
for img in images:
# Get layer storage URL
path = storage.image_layer_path(img['storage_uuid'])
locations = self._default_storage_locations
if not storage.exists(locations, path):
locations = _get_storage_locations(img['storage_uuid'])
if not storage.exists(locations, path):
logger.warning('Could not find a valid location to download layer %s', img['docker_image_id']+'.'+img['storage_uuid'])
# Mark as analyzed because that error is most likely to occur during the pre-process, with the database copy
# when images are actually removed on the real database (and therefore in S3)
_update_image(img, False, self._target_version)
continue
uri = storage.get_direct_download_url(locations, path)
if uri == None:
# Local storage hack
uri = path
# Forge request
request = {
'ID': img['docker_image_id']+'.'+img['storage_uuid'],
'TarSum': img['storage_checksum'],
'Path': uri
}
if img['parent_docker_image_id'] is not None and img['parent_storage_uuid'] is not None:
request['ParentID'] = img['parent_docker_image_id']+'.'+img['parent_storage_uuid']
# Post request
try:
logger.info('Analyzing %s', request['ID'])
# Using invalid certificates doesn't return proper errors because of
# https://github.com/shazow/urllib3/issues/556
httpResponse = requests.post(self._api + API_METHOD_INSERT, json=request, cert=self._cert, verify=self._ca_verification)
except:
logger.exception('An exception occurred when analyzing layer ID %s : %s', request['ID'], exc_info()[0])
return
try:
jsonResponse = httpResponse.json()
except:
logger.exception('An exception occurred when analyzing layer ID %s : the response is not valid JSON (%s)', request['ID'], httpResponse.text)
return
if httpResponse.status_code == 201:
# The layer has been successfully indexed
api_version = jsonResponse['Version']
if api_version < self._target_version:
logger.warning('An engine runs on version %d but the target version is %d')
_update_image(img, True, api_version)
logger.info('Layer ID %s : analyzed successfully', request['ID'])
else:
if 'Message' in jsonResponse:
if 'OS and/or package manager are not supported' in jsonResponse['Message']:
# The current engine could not index this layer
logger.warning('A warning event occurred when analyzing layer ID %s : %s', request['ID'], jsonResponse['Message'])
# Hopefully, there is no version lower than the target one running
_update_image(img, False, self._target_version)
else:
logger.exception('An exception occurred when analyzing layer ID %s : %d %s', request['ID'], httpResponse.status_code, jsonResponse['Message'])
return
else:
logger.exception('An exception occurred when analyzing layer ID %s : %d', request['ID'], httpResponse.status_code)
return
if __name__ == '__main__':
logging.getLogger('requests').setLevel(logging.WARNING)
logging.getLogger('apscheduler').setLevel(logging.CRITICAL)
if not features.SECURITY_SCANNER:
logger.debug('Security scanner disabled; skipping')
while True:
time.sleep(100000)
worker = SecurityWorker()
worker.start()