From 407eaae137ec4ccf4819d8faa1c81690c19fb546 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Tue, 27 Oct 2015 17:38:48 -0400 Subject: [PATCH] WIP: Towards sec demo --- data/model/image.py | 13 ++++++ data/model/tag.py | 10 ++++ endpoints/sec.py | 55 ++++++++++++++++++++++ web.py | 2 + workers/securityworker.py | 96 +++++++++++++++++++++++++++++++++------ workers/worker.py | 2 +- 6 files changed, 164 insertions(+), 14 deletions(-) create mode 100644 endpoints/sec.py diff --git a/data/model/image.py b/data/model/image.py index 7b673ee2f..207887235 100644 --- a/data/model/image.py +++ b/data/model/image.py @@ -12,6 +12,19 @@ from data.database import (Image, Repository, ImageStoragePlacement, Namespace, logger = logging.getLogger(__name__) +def get_repository_images_recursive(docker_image_ids): + """ Returns a query matching the given docker image IDs, along with any which have the image IDs + as parents. + + Note: This is a DB intensive operation and should be used sparingly. + """ + inner_images = Image.select('%/' + Image.id + '/%').where(Image.docker_image_id << docker_image_ids) + + images = Image.select(Image.id).where(Image.docker_image_id << docker_image_ids) + recursive_images = Image.select(Image.id).where(Image.ancestors ** inner_images) + return recursive_images | images + + def get_parent_images(namespace_name, repository_name, image_obj): """ Returns a list of parent Image objects in chronilogical order. """ parents = image_obj.ancestors diff --git a/data/model/tag.py b/data/model/tag.py index cf6ddf3c6..535d6c533 100644 --- a/data/model/tag.py +++ b/data/model/tag.py @@ -12,6 +12,16 @@ def _tag_alive(query, now_ts=None): (RepositoryTag.lifetime_end_ts > now_ts)) +def get_matching_tags(docker_image_ids, *args): + """ Returns a query pointing to all tags that contain the given image(s). """ + return (RepositoryTag + .select(*args) + .distinct() + .join(Image) + .where(Image.id << image.get_repository_images_recursive(docker_image_ids), + RepositoryTag.lifetime_end_ts >> None)) + + def list_repository_tags(namespace_name, repository_name, include_hidden=False, include_storage=False): to_select = (RepositoryTag, Image) diff --git a/endpoints/sec.py b/endpoints/sec.py new file mode 100644 index 000000000..c8b0e342b --- /dev/null +++ b/endpoints/sec.py @@ -0,0 +1,55 @@ +import logging + +from flask import request, make_response, Blueprint +from data import model +from data.database import RepositoryNotification, Repository, ExternalNotificationEvent, RepositoryTag, Image +from endpoints.notificationhelper import spawn_notification +from collections import defaultdict + +logger = logging.getLogger(__name__) + +sec = Blueprint('sec', __name__) + +@sec.route('/notification', methods=['POST']) +def sec_notification(): + data = request.get_json() + print data + + # Find all tags that contain the layer(s) introducing the vulnerability. + # TODO: remove this check once fixed. + if not 'IntroducingLayersIDs' in data['Content']: + return make_response('Okay') + + layer_ids = data['Content']['IntroducingLayersIDs'] + tags = model.tag.get_matching_tags(layer_ids, RepositoryTag, Repository, Image) + + # For any repository that has a notification setup, issue a notification. + event = ExternalNotificationEvent.get(name='vulnerability_found') + + matching = (tags.switch(RepositoryTag) + .join(Repository) + .join(RepositoryNotification) + .where(RepositoryNotification.event == event)) + + repository_map = defaultdict(list) + + for tag in matching: + repository_map[tag.repository_id].append(tag) + + for repository_id in repository_map: + tags = repository_map[repository_id] + + # TODO(jschorr): Pull out the other metadata once added. + event_data = { + 'tags': [tag.name for tag in tags], + 'vulnerability': { + 'id': data['Name'], + 'description': 'Some description', + 'link': 'https://security-tracker.debian.org/tracker/CVE-FAKE-CVE', + 'priority': 'Medium', + }, + } + + spawn_notification(tags[0].repository, 'vulnerability_found', event_data) + + return make_response('Okay') diff --git a/web.py b/web.py index 96457d5c9..5430e7b93 100644 --- a/web.py +++ b/web.py @@ -11,6 +11,7 @@ from endpoints.oauthlogin import oauthlogin from endpoints.githubtrigger import githubtrigger from endpoints.gitlabtrigger import gitlabtrigger from endpoints.bitbuckettrigger import bitbuckettrigger +from endpoints.sec import sec if os.environ.get('DEBUGLOG') == 'true': logging.config.fileConfig('conf/logging_debug.conf', disable_existing_loggers=False) @@ -23,3 +24,4 @@ application.register_blueprint(bitbuckettrigger, url_prefix='/oauth1') application.register_blueprint(api_bp, url_prefix='/api') application.register_blueprint(webhooks, url_prefix='/webhooks') application.register_blueprint(realtime, url_prefix='/realtime') +application.register_blueprint(sec, url_prefix='/sec') diff --git a/workers/securityworker.py b/workers/securityworker.py index 29fdd3dcf..c6a940b4a 100644 --- a/workers/securityworker.py +++ b/workers/securityworker.py @@ -1,22 +1,29 @@ import logging +import logging.config + import requests import features import time import os import random +import json +from endpoints.notificationhelper import spawn_notification +from collections import defaultdict from sys import exc_info from peewee import JOIN_LEFT_OUTER -from app import app, storage, OVERRIDE_CONFIG_DIRECTORY +from app import app, storage, OVERRIDE_CONFIG_DIRECTORY, sec_endpoint from workers.worker import Worker -from data.database import Image, ImageStorage, ImageStorageLocation, ImageStoragePlacement, db_random_func, UseThenDisconnect +from data.database import (Image, ImageStorage, ImageStorageLocation, ImageStoragePlacement, + db_random_func, UseThenDisconnect, RepositoryTag, Repository, + ExternalNotificationEvent, RepositoryNotification) logger = logging.getLogger(__name__) BATCH_SIZE = 20 INDEXING_INTERVAL = 10 -API_METHOD_INSERT = '/layers' -API_METHOD_VERSION = '/versions/engine' +API_METHOD_INSERT = '/v1/layers' +API_METHOD_VERSION = '/v1/versions/engine' def _get_image_to_export(version): Parent = Image.alias() @@ -25,14 +32,14 @@ def _get_image_to_export(version): # Without parent candidates = (Image - .select(Image.docker_image_id, ImageStorage.uuid, ImageStorage.checksum) + .select(Image.id, Image.docker_image_id, ImageStorage.uuid, ImageStorage.checksum) .join(ImageStorage) .where(Image.security_indexed_engine < version, Image.parent >> None, ImageStorage.uploading == False, ImageStorage.checksum != '') .limit(BATCH_SIZE*10) .alias('candidates')) images = (Image - .select(candidates.c.docker_image_id, candidates.c.uuid, candidates.c.checksum) + .select(candidates.c.id, candidates.c.docker_image_id, candidates.c.uuid, candidates.c.checksum) .distinct() .from_(candidates) .order_by(db_random_func()) @@ -40,11 +47,11 @@ def _get_image_to_export(version): .limit(BATCH_SIZE)) for image in images: - rimages.append({'docker_image_id': image[0], 'storage_uuid': image[1], 'storage_checksum': image[2], 'parent_docker_image_id': None, 'parent_storage_uuid': None}) + rimages.append({'image_id': image[0], 'docker_image_id': image[1], 'storage_uuid': image[2], 'storage_checksum': image[3], 'parent_docker_image_id': None, 'parent_storage_uuid': None}) # With analyzed parent candidates = (Image - .select(Image.docker_image_id, ImageStorage.uuid, ImageStorage.checksum, Parent.docker_image_id.alias('parent_docker_image_id'), ParentImageStorage.uuid.alias('parent_storage_uuid')) + .select(Image.id, Image.docker_image_id, ImageStorage.uuid, ImageStorage.checksum, Parent.docker_image_id.alias('parent_docker_image_id'), ParentImageStorage.uuid.alias('parent_storage_uuid')) .join(Parent, on=(Image.parent == Parent.id)) .join(ParentImageStorage, on=(ParentImageStorage.id == Parent.storage)) .switch(Image) @@ -54,7 +61,7 @@ def _get_image_to_export(version): .alias('candidates')) images = (Image - .select(candidates.c.docker_image_id, candidates.c.uuid, candidates.c.checksum, candidates.c.parent_docker_image_id, candidates.c.parent_storage_uuid) + .select(candidates.c.id, candidates.c.docker_image_id, candidates.c.uuid, candidates.c.checksum, candidates.c.parent_docker_image_id, candidates.c.parent_storage_uuid) .distinct() .from_(candidates) .order_by(db_random_func()) @@ -62,7 +69,7 @@ def _get_image_to_export(version): .limit(BATCH_SIZE)) for image in images: - rimages.append({'docker_image_id': image[0], 'storage_uuid': image[1], 'storage_checksum': image[2], 'parent_docker_image_id': None, 'parent_storage_uuid': None}) + rimages.append({'image_id': image[0], 'docker_image_id': image[1], 'storage_uuid': image[2], 'storage_checksum': image[3], 'parent_docker_image_id': image[4], 'parent_storage_uuid': image[5]}) # Re-shuffle, otherwise the images without parents will always be on the top random.shuffle(rimages) @@ -134,15 +141,24 @@ class SecurityWorker(Worker): return True def _index_images(self): + logger.debug('Starting indexing') + with UseThenDisconnect(app.config): while True: + logger.debug('Looking up images to index') + # Get images to analyze try: images = _get_image_to_export(self._target_version) + if not images: + logger.debug('No more image to analyze') + return + except Image.DoesNotExist: logger.debug('No more image to analyze') return + logger.debug('Found images to index: %s', images) for img in images: # Get layer storage URL path = storage.image_layer_path(img['storage_uuid']) @@ -191,6 +207,62 @@ class SecurityWorker(Worker): logger.warning('An engine runs on version %d but the target version is %d') _update_image(img, True, api_version) logger.info('Layer ID %s : analyzed successfully', request['ID']) + + + # TODO(jschorr): Put this in a proper place, properly comment, unify with the + # callback code, etc. + try: + logger.debug('Loading vulnerabilities for layer %s', img['image_id']) + response = sec_endpoint.call_api('layers/%s/vulnerabilities', request['ID']) + except requests.exceptions.Timeout: + logger.debug('Timeout when calling Sec') + continue + except requests.exceptions.ConnectionError: + logger.debug('Connection error when calling Sec') + continue + + logger.debug('Got response %s for vulnerabilities for layer %s', response.status_code, img['image_id']) + + if response.status_code == 404: + continue + + sec_data = json.loads(response.text) + logger.debug('Got response vulnerabilities for layer %s: %s', img['image_id'], sec_data) + + if not sec_data['Vulnerabilities']: + continue + + event = ExternalNotificationEvent.get(name='vulnerability_found') + matching = (RepositoryTag + .select(RepositoryTag, Repository) + .distinct() + .where(RepositoryTag.image_id == img['id']) + .join(Repository) + .join(RepositoryNotification) + .where(RepositoryNotification.event == event)) + + repository_map = defaultdict(list) + + for tag in matching: + repository_map[tag.repository_id].append(tag) + + for repository_id in repository_map: + tags = repository_map[repository_id] + + for vuln in sec_data['Vulnerabilities']: + event_data = { + 'tags': [tag.name for tag in tags], + 'vulnerability': { + 'id': vuln['ID'], + 'description': vuln['Description'], + 'link': vuln['Link'], + 'priority': vuln['Priority'], + }, + } + + spawn_notification(tags[0].repository, 'vulnerability_found', event_data) + + else: if 'Message' in jsonResponse: if 'OS and/or package manager are not supported' in jsonResponse['Message']: @@ -206,13 +278,11 @@ class SecurityWorker(Worker): return if __name__ == '__main__': - logging.getLogger('requests').setLevel(logging.WARNING) - logging.getLogger('apscheduler').setLevel(logging.CRITICAL) - if not features.SECURITY_SCANNER: logger.debug('Security scanner disabled; skipping') while True: time.sleep(100000) + logging.config.fileConfig('conf/logging_debug.conf', disable_existing_loggers=False) worker = SecurityWorker() worker.start() diff --git a/workers/worker.py b/workers/worker.py index a9ea5d219..47dcaf9ef 100644 --- a/workers/worker.py +++ b/workers/worker.py @@ -61,7 +61,7 @@ class Worker(object): pass def start(self): - logging.config.fileConfig('conf/logging.conf', disable_existing_loggers=False) + logging.config.fileConfig('conf/logging_debug.conf', disable_existing_loggers=False) if not app.config.get('SETUP_COMPLETE', False): logger.info('Product setup is not yet complete; skipping worker startup')