diff --git a/data/model/image.py b/data/model/image.py index 8fcddc032..9421e29ab 100644 --- a/data/model/image.py +++ b/data/model/image.py @@ -12,18 +12,23 @@ from data.database import (Image, Repository, ImageStoragePlacement, Namespace, logger = logging.getLogger(__name__) -def get_repository_images_recursive(docker_image_ids): - """ Returns a query matching the given docker image IDs, along with any which have the image IDs - as parents. - - Note: This is a DB intensive operation and should be used sparingly. +def get_repository_image_and_deriving(docker_image_id, storage_uuid): + """ Returns all matching images with the given docker image ID and storage uuid, along with any + images which have the image ID as parents. """ - # TODO: test this on MySQL and Postgres - inner_images = Image.select(SQL('"%/" || id || "/%"')).where(Image.docker_image_id << docker_image_ids) + try: + image_found = (Image + .select() + .join(ImageStorage) + .where(Image.docker_image_id == docker_image_id, + ImageStorage.uuid == storage_uuid) + .get()) + except Image.DoesNotExist: + return Image.select().where(Image.id < 0) # Empty query - images = Image.select(Image.id).where(Image.docker_image_id << docker_image_ids) - recursive_images = Image.select(Image.id).where(Image.ancestors ** inner_images) - return recursive_images | images + ancestors_pattern = '%s%s/%%' % (image_found.ancestors, image_found.id) + return Image.select().where((Image.ancestors ** ancestors_pattern) | + (Image.id == image_found.id)) def get_parent_images(namespace_name, repository_name, image_obj): diff --git a/data/model/tag.py b/data/model/tag.py index 535d6c533..fcaa7f342 100644 --- a/data/model/tag.py +++ b/data/model/tag.py @@ -12,14 +12,17 @@ def _tag_alive(query, now_ts=None): (RepositoryTag.lifetime_end_ts > now_ts)) -def get_matching_tags(docker_image_ids, *args): - """ Returns a query pointing to all tags that contain the given image(s). """ +def get_matching_tags(docker_image_id, storage_uuid, *args): + """ Returns a query pointing to all tags that contain the image with the + given docker_image_id and storage_uuid. """ + image_query = image.get_repository_image_and_deriving(docker_image_id, storage_uuid) + return (RepositoryTag .select(*args) .distinct() .join(Image) - .where(Image.id << image.get_repository_images_recursive(docker_image_ids), - RepositoryTag.lifetime_end_ts >> None)) + .join(ImageStorage) + .where(Image.id << image_query, RepositoryTag.lifetime_end_ts >> None)) def list_repository_tags(namespace_name, repository_name, include_hidden=False, diff --git a/endpoints/api/secscan.py b/endpoints/api/secscan.py index ab3f73051..9a1773ccb 100644 --- a/endpoints/api/secscan.py +++ b/endpoints/api/secscan.py @@ -18,7 +18,7 @@ logger = logging.getLogger(__name__) def _call_security_api(relative_url, *args, **kwargs): """ Issues an HTTP call to the sec API at the given relative URL. """ try: - response = secscan_endpoint.call_api(relative_url, *args, **kwargs) + response = secscan_endpoint.call_api(relative_url, body=None, *args, **kwargs) except requests.exceptions.Timeout: raise DownstreamIssue(payload=dict(message='API call timed out')) except requests.exceptions.ConnectionError: diff --git a/endpoints/sec.py b/endpoints/sec.py deleted file mode 100644 index 2548e615c..000000000 --- a/endpoints/sec.py +++ /dev/null @@ -1,58 +0,0 @@ -import logging - -from flask import request, make_response, Blueprint -from data import model -from data.database import RepositoryNotification, Repository, ExternalNotificationEvent, RepositoryTag, Image -from endpoints.notificationhelper import spawn_notification -from collections import defaultdict - -logger = logging.getLogger(__name__) - -sec = Blueprint('sec', __name__) - -@sec.route('/notification', methods=['POST']) -def sec_notification(): - data = request.get_json() - - # Find all tags that contain the layer(s) introducing the vulnerability. - # TODO: remove this check once fixed. - if not 'IntroducingLayersIDs' in data['Content']: - return make_response('Okay') - - # TODO: fix this for the image_id.storage thing properly. - layer_ids = [full_id.split('.')[0] for full_id in data['Content']['IntroducingLayersIDs']] - if not layer_ids: - return make_response('Okay') - - tags = model.tag.get_matching_tags(layer_ids, RepositoryTag, Repository, Image) - - # For any repository that has a notification setup, issue a notification. - event = ExternalNotificationEvent.get(name='vulnerability_found') - - matching = (tags.switch(RepositoryTag) - .join(Repository) - .join(RepositoryNotification) - .where(RepositoryNotification.event == event)) - - repository_map = defaultdict(list) - - for tag in matching: - repository_map[tag.repository_id].append(tag) - - for repository_id in repository_map: - tags = repository_map[repository_id] - - # TODO(jschorr): Pull out the other metadata once added. - event_data = { - 'tags': [tag.name for tag in tags], - 'vulnerability': { - 'id': data['Name'], - 'description': 'Some description', - 'link': 'https://security-tracker.debian.org/tracker/CVE-FAKE-CVE', - 'priority': 'High', - }, - } - - spawn_notification(tags[0].repository, 'vulnerability_found', event_data) - - return make_response('Okay') diff --git a/endpoints/secscan.py b/endpoints/secscan.py new file mode 100644 index 000000000..874aec243 --- /dev/null +++ b/endpoints/secscan.py @@ -0,0 +1,88 @@ +import logging +import features + +from app import secscan_endpoint +from flask import request, make_response, Blueprint +from data import model +from data.database import (RepositoryNotification, Repository, ExternalNotificationEvent, + RepositoryTag, Image, ImageStorage) +from endpoints.common import route_show_if +from endpoints.notificationhelper import spawn_notification +from collections import defaultdict + +logger = logging.getLogger(__name__) +secscan = Blueprint('secscan', __name__) + +@route_show_if(features.SECURITY_SCANNER) +@secscan.route('/notification', methods=['POST']) +def secscan_notification(): + data = request.get_json() + logger.debug('Got notification from Clair: %s', data) + + # Find all tags that contain the layer(s) introducing the vulnerability. + content = data['Content'] + layer_ids = content.get('NewIntroducingLayersIDs', content.get('IntroducingLayersIDs', [])) + if not layer_ids: + return make_response('Okay') + + # TODO(jzelinkskie): Write a queueitem for these layer ids, and do the rest of this + # in a worker. + cve_id = data['Name'] + vulnerability = data['Content']['Vulnerability'] + priority = vulnerability['Priority'] + + # Lookup the external event for when we have vulnerabilities. + event = ExternalNotificationEvent.get(name='vulnerability_found') + + # For each layer, retrieving the matching tags and join with repository to determine which + # require new notifications. + tag_map = defaultdict(set) + repository_map = {} + + for layer_id in layer_ids: + (docker_image_id, storage_uuid) = layer_id.split('.', 2) + tags = model.tag.get_matching_tags(docker_image_id, storage_uuid, RepositoryTag, + Repository, Image, ImageStorage) + + # Additionally filter to tags only in repositories that have the event setup. + matching = (tags.switch(RepositoryTag) + .join(Repository) + .join(RepositoryNotification) + .where(RepositoryNotification.event == event)) + + check_map = {} + for tag in matching: + # Verify that the tag's root image has the vulnerability. + tag_layer_id = '%s.%s' % (tag.image.docker_image_id, tag.image.storage.uuid) + logger.debug('Checking if layer %s is vulnerable to %s', tag_layer_id, cve_id) + + if not tag_layer_id in check_map: + is_vulerable = secscan_endpoint.check_layer_vulnerable(tag_layer_id, cve_id) + check_map[tag_layer_id] = is_vulerable + + logger.debug('Result of layer %s is vulnerable to %s check: %s', tag_layer_id, cve_id, + check_map[tag_layer_id]) + + if check_map[tag_layer_id]: + # Add the vulnerable tag to the list. + tag_map[tag.repository_id].add(tag.name) + repository_map[tag.repository_id] = tag.repository + + # For each of the tags found, issue a notification. + for repository_id in tag_map: + tags = tag_map[repository_id] + event_data = { + 'tags': list(tags), + 'vulnerability': { + 'id': data['Name'], + 'description': vulnerability['Description'], + 'link': vulnerability['Link'], + 'priority': priority, + }, + } + + # TODO: only add this notification if the repository's event(s) defined meet the priority + # minimum. + spawn_notification(repository_map[repository_id], 'vulnerability_found', event_data) + + return make_response('Okay') diff --git a/util/secscan/secscanendpoint.py b/util/secscan/secscanendpoint.py index 7f759219b..2cd24fab1 100644 --- a/util/secscan/secscanendpoint.py +++ b/util/secscan/secscanendpoint.py @@ -1,7 +1,6 @@ import features import logging import requests -import json from urlparse import urljoin @@ -36,7 +35,33 @@ class SecurityScanEndpoint(object): return None - def call_api(self, relative_url, *args, **kwargs): + def check_layer_vulnerable(self, layer_id, cve_id): + """ Checks with Clair whether the given layer is vulnerable to the given CVE. """ + try: + body = { + 'LayersIDs': [layer_id] + } + response = self.call_api('vulnerabilities/%s/affected-layers', body, cve_id) + except requests.exceptions.RequestException: + logger.exception('Got exception when trying to call Clair endpoint') + return False + + if response.status_code != 200: + return False + + try: + response_data = response.json() + except ValueError: + logger.exception('Got exception when trying to parse Clair response') + return False + + if (not layer_id in response_data or + not response_data[layer_id].get('Vulnerable', False)): + return False + + return True + + def call_api(self, relative_url, body=None, *args, **kwargs): """ Issues an HTTP call to the sec API at the given relative URL. """ security_config = self.security_config api_url = urljoin(security_config['ENDPOINT'], '/' + security_config['API_VERSION']) + '/' @@ -46,5 +71,9 @@ class SecurityScanEndpoint(object): timeout = security_config.get('API_TIMEOUT_SECONDS', 1) logger.debug('Looking up sec information: %s', url) - return client.get(url, params=kwargs, timeout=timeout, cert=self.keys, - verify=self.certificate) \ No newline at end of file + if body is not None: + return client.post(url, json=body, params=kwargs, timeout=timeout, cert=self.keys, + verify=self.certificate) + else: + return client.get(url, params=kwargs, timeout=timeout, cert=self.keys, + verify=self.certificate) \ No newline at end of file diff --git a/web.py b/web.py index 5430e7b93..445c2fa5b 100644 --- a/web.py +++ b/web.py @@ -11,7 +11,7 @@ from endpoints.oauthlogin import oauthlogin from endpoints.githubtrigger import githubtrigger from endpoints.gitlabtrigger import gitlabtrigger from endpoints.bitbuckettrigger import bitbuckettrigger -from endpoints.sec import sec +from endpoints.secscan import secscan if os.environ.get('DEBUGLOG') == 'true': logging.config.fileConfig('conf/logging_debug.conf', disable_existing_loggers=False) @@ -24,4 +24,4 @@ application.register_blueprint(bitbuckettrigger, url_prefix='/oauth1') application.register_blueprint(api_bp, url_prefix='/api') application.register_blueprint(webhooks, url_prefix='/webhooks') application.register_blueprint(realtime, url_prefix='/realtime') -application.register_blueprint(sec, url_prefix='/sec') +application.register_blueprint(secscan, url_prefix='/secscan')