Security scanner flow changes and auto-retry

Changes the security scanner code to raise exceptions now for non-successful operations. One of the new exceptions raised is MissingParentLayerException, which, when raised, will cause the security worker to perform a full rescan of all parent images for the current layer, before trying once more to scan the current layer. This should allow the system to be "self-healing" in the case where the security scanner engine somehow loses or corrupts a parent layer.
This commit is contained in:
Joseph Schorr 2016-12-15 16:27:24 -05:00
parent 9fa16679f8
commit 405eca074c
5 changed files with 228 additions and 82 deletions

View file

@ -15,12 +15,24 @@ from util import get_app_url
TOKEN_VALIDITY_LIFETIME_S = 60 # Amount of time the security scanner has to call the layer URL
UNKNOWN_PARENT_LAYER_ERROR_MSG = 'worker: parent layer is unknown, it must be processed first'
logger = logging.getLogger(__name__)
class AnalyzeLayerException(Exception):
""" Exception raised when a layer fails to analyze due to a *client-side* issue. """
""" Exception raised when a layer fails to analyze due to a request issue. """
class AnalyzeLayerRetryException(Exception):
""" Exception raised when a layer fails to analyze due to a request issue, and the request should
be retried.
"""
class MissingParentLayerException(AnalyzeLayerException):
""" Exception raised when the parent of the layer is missing from the security scanner. """
class InvalidLayerException(AnalyzeLayerException):
""" Exception raised when the layer itself cannot be handled by the security scanner. """
class APIRequestFailure(Exception):
""" Exception raised when there is a failure to conduct an API request. """
@ -142,12 +154,12 @@ class SecurityScannerAPI(object):
def analyze_layer(self, layer):
""" Posts the given layer to the security scanner for analysis, blocking until complete.
Returns a tuple containing the analysis version (on success, None on failure) and
whether the request should be retried.
Returns the analysis version on success or raises an exception deriving from
AnalyzeLayerException on failure. Callers should handle all cases of AnalyzeLayerException.
"""
request = self._new_analyze_request(layer)
if not request:
return None, False
raise AnalyzeLayerException
logger.info('Analyzing layer %s', request['Layer']['Name'])
try:
@ -155,13 +167,13 @@ class SecurityScannerAPI(object):
json_response = response.json()
except requests.exceptions.Timeout:
logger.exception('Timeout when trying to post layer data response for %s', layer.id)
return None, True
raise AnalyzeLayerRetryException
except requests.exceptions.ConnectionError:
logger.exception('Connection error when trying to post layer data response for %s', layer.id)
return None, True
raise AnalyzeLayerRetryException
except (requests.exceptions.RequestException, ValueError) as re:
logger.exception('Failed to post layer data response for %s', layer.id)
return None, False
logger.exception('Failed to post layer data response for %s: %s', layer.id, re)
raise AnalyzeLayerException
# Handle any errors from the security scanner.
if response.status_code != 201:
@ -171,17 +183,23 @@ class SecurityScannerAPI(object):
# 400 means the layer could not be analyzed due to a bad request.
if response.status_code == 400:
logger.error('Bad request when calling security scanner for layer %s: %s',
response.status_code, json_response)
raise AnalyzeLayerException('Bad request to security scanner')
if message == UNKNOWN_PARENT_LAYER_ERROR_MSG:
raise MissingParentLayerException('Bad request to security scanner: %s' % message)
else:
raise AnalyzeLayerException('Bad request to security scanner: %s' % message)
# 422 means that the layer could not be analyzed:
# - the layer could not be extracted (manifest?)
# - the layer could not be extracted (might be a manifest or an invalid .tar.gz)
# - the layer operating system / package manager is unsupported
return None, response.status_code != 422
elif response.status_code == 422:
raise InvalidLayerException
api_version = json_response['Layer']['IndexedByVersion']
return api_version, False
# Otherwise, it is some other error and we should retry.
else:
raise AnalyzeLayerRetryException
# Return the parsed API version.
return json_response['Layer']['IndexedByVersion']
def check_layer_vulnerable(self, layer_id, cve_name):