import logging import requests import features import time import os import random from sys import exc_info from peewee import JOIN_LEFT_OUTER from app import app, storage, OVERRIDE_CONFIG_DIRECTORY from workers.worker import Worker from data.database import Image, ImageStorage, ImageStorageLocation, ImageStoragePlacement, db_random_func, UseThenDisconnect logger = logging.getLogger(__name__) BATCH_SIZE = 20 INDEXING_INTERVAL = 10 API_METHOD_INSERT = '/layers' API_METHOD_VERSION = '/versions/engine' def _get_image_to_export(version): Parent = Image.alias() ParentImageStorage = ImageStorage.alias() rimages = [] # Without parent candidates = (Image .select(Image.docker_image_id, ImageStorage.uuid, ImageStorage.checksum) .join(ImageStorage) .where(Image.security_indexed_engine < version, Image.parent_id >> None, ImageStorage.uploading == False, ImageStorage.checksum != '') .limit(BATCH_SIZE*10) .alias('candidates')) images = (Image .select(candidates.c.docker_image_id, candidates.c.uuid, candidates.c.checksum) .from_(candidates) .order_by(db_random_func()) .tuples() .limit(BATCH_SIZE)) for image in images: rimages.append({'docker_image_id': image[0], 'storage_uuid': image[1], 'storage_checksum': image[2], 'parent_docker_image_id': None, 'parent_storage_uuid': None}) # With analyzed parent candidates = (Image .select(Image.docker_image_id, ImageStorage.uuid, ImageStorage.checksum, Parent.docker_image_id.alias('parent_docker_image_id'), ParentImageStorage.uuid.alias('parent_storage_uuid')) .join(Parent, on=(Image.parent_id == Parent.id)) .join(ParentImageStorage, on=(ParentImageStorage.id == Parent.storage)) .switch(Image) .join(ImageStorage) .where(Image.security_indexed_engine < version, Parent.security_indexed == True, Parent.security_indexed_engine >= version, ImageStorage.uploading == False, ImageStorage.checksum != '') .limit(BATCH_SIZE*10) .alias('candidates')) images = (Image .select(candidates.c.docker_image_id, candidates.c.uuid, candidates.c.checksum, candidates.c.parent_docker_image_id, candidates.c.parent_storage_uuid) .from_(candidates) .order_by(db_random_func()) .tuples() .limit(BATCH_SIZE)) for image in images: rimages.append({'docker_image_id': image[0], 'storage_uuid': image[1], 'storage_checksum': image[2], 'parent_docker_image_id': image[3], 'parent_storage_uuid': image[4]}) # Re-shuffle, otherwise the images without parents will always be on the top random.shuffle(rimages) return rimages def _get_storage_locations(uuid): query = (ImageStoragePlacement .select() .join(ImageStorageLocation) .switch(ImageStoragePlacement) .join(ImageStorage, JOIN_LEFT_OUTER) .where(ImageStorage.uuid == uuid)) locations = list() for location in query: locations.append(location.location.name) return locations def _update_image(image, indexed, version): query = (Image .select() .join(ImageStorage) .where(Image.docker_image_id == image['docker_image_id'], ImageStorage.uuid == image['storage_uuid'])) updated_images = list() for image in query: updated_images.append(image.id) query = (Image .update(security_indexed=indexed, security_indexed_engine=version) .where(Image.id << updated_images)) query.execute() class SecurityWorker(Worker): def __init__(self): super(SecurityWorker, self).__init__() if self._load_configuration(): self.add_operation(self._index_images, INDEXING_INTERVAL) def _load_configuration(self): # Load configuration config = app.config.get('SECURITY_SCANNER') if not config or not 'ENDPOINT' in config or not 'ENGINE_VERSION_TARGET' in config or not 'DISTRIBUTED_STORAGE_PREFERENCE' in app.config: logger.exception('No configuration found for the security worker') return False self._api = config['ENDPOINT'] self._target_version = config['ENGINE_VERSION_TARGET'] self._default_storage_locations = app.config['DISTRIBUTED_STORAGE_PREFERENCE'] self._ca_verification = False self._cert = None if 'CA_CERTIFICATE_FILENAME' in config: self._ca_verification = os.path.join(OVERRIDE_CONFIG_DIRECTORY, config['CA_CERTIFICATE_FILENAME']) if not os.path.isfile(self._ca_verification): logger.exception('Could not find configured CA file') return False if 'PRIVATE_KEY_FILENAME' in config and 'PUBLIC_KEY_FILENAME' in config: self._cert = ( os.path.join(OVERRIDE_CONFIG_DIRECTORY, config['PUBLIC_KEY_FILENAME']), os.path.join(OVERRIDE_CONFIG_DIRECTORY, config['PRIVATE_KEY_FILENAME']), ) if not os.path.isfile(self._cert[0]) or not os.path.isfile(self._cert[1]): logger.exception('Could not find configured key pair files') return False return True def _index_images(self): with UseThenDisconnect(app.config): while True: # Get images to analyze try: images = _get_image_to_export(self._target_version) except Image.DoesNotExist: logger.debug('No more image to analyze') return for img in images: # Get layer storage URL path = storage.image_layer_path(img['storage_uuid']) locations = self._default_storage_locations if not storage.exists(locations, path): locations = _get_storage_locations(img['storage_uuid']) if not storage.exists(locations, path): logger.warning('Could not find a valid location to download layer %s', img['docker_image_id']+'.'+img['storage_uuid']) # Mark as analyzed because that error is most likely to occur during the pre-process, with the database copy # when images are actually removed on the real database (and therefore in S3) _update_image(img, False, self._target_version) continue uri = storage.get_direct_download_url(locations, path) if uri == None: # Local storage hack uri = path # Forge request request = { 'ID': img['docker_image_id']+'.'+img['storage_uuid'], 'TarSum': img['storage_checksum'], 'Path': uri } if img['parent_docker_image_id'] is not None and img['parent_storage_uuid'] is not None: request['ParentID'] = img['parent_docker_image_id']+'.'+img['parent_storage_uuid'] # Post request try: logger.info('Analyzing %s', request['ID']) # Using invalid certificates doesn't return proper errors because of # https://github.com/shazow/urllib3/issues/556 httpResponse = requests.post(self._api + API_METHOD_INSERT, json=request, cert=self._cert, verify=self._ca_verification) except: logger.exception('An exception occurred when analyzing layer ID %s : %s', request['ID'], exc_info()[0]) return try: jsonResponse = httpResponse.json() except: logger.exception('An exception occurred when analyzing layer ID %s : the response is not valid JSON (%s)', request['ID'], httpResponse.text) return if httpResponse.status_code == 201: # The layer has been successfully indexed api_version = jsonResponse['Version'] if api_version < self._target_version: logger.warning('An engine runs on version %d but the target version is %d') _update_image(img, True, api_version) logger.info('Layer ID %s : analyzed successfully', request['ID']) else: if 'Message' in jsonResponse: if 'OS and/or package manager are not supported' in jsonResponse['Message']: # The current engine could not index this layer logger.warning('A warning event occurred when analyzing layer ID %s : %s', request['ID'], jsonResponse['Message']) # Hopefully, there is no version lower than the target one running _update_image(img, False, self._target_version) else: logger.exception('An exception occurred when analyzing layer ID %s : %d %s', request['ID'], httpResponse.status_code, jsonResponse['Message']) return else: logger.exception('An exception occurred when analyzing layer ID %s : %d', request['ID'], httpResponse.status_code) return if __name__ == '__main__': logging.getLogger('requests').setLevel(logging.WARNING) logging.getLogger('apscheduler').setLevel(logging.CRITICAL) if not features.SECURITY_SCANNER: logger.debug('Security scanner disabled; skipping') while True: time.sleep(100000) worker = SecurityWorker() worker.start()