Merge pull request #2330 from jzelinskie/two-clair

Generic failover library and QSS read-only failover
2017-02-06 15:45:21 -05:00 · 2017-02-06 15:45:21 -05:00 · c5441923f7
commit c5441923f7
parent 94cf7a5065 c2c6bc1e90
10 changed files with 287 additions and 117 deletions
--- a/config.py
+++ b/config.py
@ -310,6 +310,9 @@ class DefaultConfig(object):
  # If specified, the endpoint to be used for all POST calls to the security scanner.
  SECURITY_SCANNER_ENDPOINT_BATCH = None
  # If specified, GET requests that return non-200 will be retried at the following instances.
  SECURITY_SCANNER_READONLY_FAILOVER_ENDPOINTS = []
  # The indexing engine version running inside the security scanner.
  SECURITY_SCANNER_ENGINE_VERSION_TARGET = 2
--- a/test/test_api_usage.py
+++ b/test/test_api_usage.py
@ -117,6 +117,16 @@ FAKE_APPLICATION_CLIENT_ID = 'deadbeef'
 CSRF_TOKEN_KEY = '_csrf_token'
 CSRF_TOKEN = '123csrfforme'
 class ConfigForTesting(object):
  def __enter__(self):
    config_provider.reset_for_test()
    return config_provider
  def __exit__(self, type, value, traceback):
    config_provider.reset_for_test()
 class ApiTestCase(unittest.TestCase):
  maxDiff = None
@ -4299,6 +4309,36 @@ class TestRepositoryImageSecurity(ApiTestCase):
      self.assertEquals('scanned', image_response['status'])
      self.assertEquals(1, image_response['data']['Layer']['IndexedByVersion'])
  def test_get_vulnerabilities_read_failover(self):
    with ConfigForTesting():
      self.login(ADMIN_ACCESS_USER)
      # Get a layer and mark it as indexed.
      layer = model.tag.get_tag_image(ADMIN_ACCESS_USER, 'simple', 'latest')
      layer.security_indexed = True
      layer.security_indexed_engine = app.config['SECURITY_SCANNER_ENGINE_VERSION_TARGET']
      layer.save()
      with fake_security_scanner(hostname='failoverscanner') as security_scanner:
        # Query the wrong security scanner URL without failover.
        self.getResponse(RepositoryImageSecurity,
                         params=dict(repository=ADMIN_ACCESS_USER + '/simple',
                                     imageid=layer.docker_image_id, vulnerabilities='true'),
                         expected_code=520)
        # Set the failover URL.
        app.config['SECURITY_SCANNER_READONLY_FAILOVER_ENDPOINTS'] = ['http://failoverscanner']
        # Configure the API to return 200 for this layer.
        layer_id = security_scanner.layer_id(layer)
        security_scanner.set_ok_layer_id(layer_id)
        # Call the API and succeed on failover.
        self.getResponse(RepositoryImageSecurity,
                         params=dict(repository=ADMIN_ACCESS_USER + '/simple',
                                     imageid=layer.docker_image_id, vulnerabilities='true'),
                         expected_code=200)
 class TestSuperUserCustomCertificates(ApiTestCase):
  def test_custom_certificates(self):
--- a/test/test_suconfig_api.py
+++ b/test/test_suconfig_api.py
@ -1,4 +1,4 @@
-from test.test_api_usage import ApiTestCase, READ_ACCESS_USER, ADMIN_ACCESS_USER
+from test.test_api_usage import ApiTestCase, READ_ACCESS_USER, ADMIN_ACCESS_USER, ConfigForTesting
 from endpoints.api.suconfig import (SuperUserRegistryStatus, SuperUserConfig, SuperUserConfigFile,
                                    SuperUserCreateInitialSuperUser, SuperUserConfigValidate)
 from app import config_provider, all_queues
@ -8,16 +8,6 @@ from data import model
 import unittest
 class ConfigForTesting(object):
  def __enter__(self):
    config_provider.reset_for_test()
    return config_provider
  def __exit__(self, type, value, traceback):
    config_provider.reset_for_test()
 class TestSuperUserRegistryStatus(ApiTestCase):
  def test_registry_status(self):
    with ConfigForTesting():
@ -190,4 +180,4 @@ class TestSuperUserConfig(ApiTestCase):
 if __name__ == '__main__':
-  unittest.main()
+  unittest.main()
--- a/util/failover.py
+++ b/util/failover.py
@ -0,0 +1,46 @@
 import logging
 from functools import wraps
 logger = logging.getLogger(__name__)
 class FailoverException(Exception):
  """ Exception raised when an operation should be retried by the failover decorator. """
  def __init__(self, message):
    super(FailoverException, self).__init__()
    self.message = message
 def failover(func):
  """ Wraps a function such that it can be retried on specified failures.
      Raises FailoverException when all failovers are exhausted.
      Example:
      @failover
      def get_google(scheme, use_www=False):
        www = 'www.' if use_www else ''
        r = requests.get(scheme + '://' + www + 'google.com')
        if r.status_code != 200:
          raise FailoverException('non 200 response from Google' )
        return r
      def GooglePingTest():
        r = get_google(
          (('http'), {'use_www': False}),
          (('http'), {'use_www': True}),
          (('https'), {'use_www': False}),
          (('https'), {'use_www': True}),
        )
        print('Successfully contacted ' + r.url)
  """
  @wraps(func)
  def wrapper(*args_sets):
    for arg_set in args_sets:
      try:
        return func(*arg_set[0], **arg_set[1])
      except FailoverException as ex:
        logger.debug('failing over: %s', ex.message)
        continue
    raise FailoverException('exhausted all possible failovers')
  return wrapper
--- a/util/secscan/init.py
+++ b/util/secscan/init.py
@ -1,98 +1,98 @@
 # NOTE: This objects are used directly in the external-notification-data and vulnerability-service
 # on the frontend, so be careful with changing their existing keys.
 PRIORITY_LEVELS = {
- 'Unknown': {
+  'Unknown': {
-   'title': 'Unknown',
+    'title': 'Unknown',
-   'index': 6,
+    'index': 6,
-   'level': 'info',
+    'level': 'info',
-   'color': '#9B9B9B',
+    'color': '#9B9B9B',
-   'score': 0,
+    'score': 0,
-   'description': 'Unknown is either a security problem that has not been assigned ' +
+    'description': 'Unknown is either a security problem that has not been assigned to a priority' +
-                  'to a priority yet or a priority that our system did not recognize',
+                   ' yet or a priority that our system did not recognize',
-   'banner_required': False
+    'banner_required': False
- },
+  },
- 'Negligible': {
+  'Negligible': {
-   'title': 'Negligible',
+    'title': 'Negligible',
-   'index': 5,
+    'index': 5,
-   'level': 'info',
+    'level': 'info',
-   'color': '#9B9B9B',
+    'color': '#9B9B9B',
-   'score': 1,
+    'score': 1,
-   'description': 'Negligible is technically a security problem, but is only theoretical ' +
+    'description': 'Negligible is technically a security problem, but is only theoretical ' +
-                  'in nature, requires a very special situation, has almost no install base, ' +
+                   'in nature, requires a very special situation, has almost no install base, ' +
-                  'or does no real damage.',
+                   'or does no real damage.',
-   'banner_required': False
+    'banner_required': False
- },
+  },
- 'Low': {
+  'Low': {
-   'title': 'Low',
+    'title': 'Low',
-   'index': 4,
+    'index': 4,
-   'level': 'warning',
+    'level': 'warning',
-   'color': '#F8CA1C',
+    'color': '#F8CA1C',
-   'score': 3,
+    'score': 3,
-   'description': 'Low is a security problem, but is hard to exploit due to environment, ' +
+    'description': 'Low is a security problem, but is hard to exploit due to environment, ' +
-                  'requires a user-assisted attack, a small install base, or does very ' +
+                   'requires a user-assisted attack, a small install base, or does very little' +
-                  'little damage.',
+                   ' damage.',
-   'banner_required': False
+    'banner_required': False
- },
+  },
- 'Medium': {
+  'Medium': {
-   'title': 'Medium',
+    'title': 'Medium',
-   'value': 'Medium',
+    'value': 'Medium',
-   'index': 3,
+    'index': 3,
-   'level': 'warning',
+    'level': 'warning',
-   'color': '#FCA657',
+    'color': '#FCA657',
-   'score': 6,
+    'score': 6,
-   'description': 'Medium is a real security problem, and is exploitable for many people. ' +
+    'description': 'Medium is a real security problem, and is exploitable for many people. ' +
-                  'Includes network daemon denial of service attacks, cross-site scripting, ' +
+                   'Includes network daemon denial of service attacks, cross-site scripting, and ' +
-                  'and gaining user privileges.',
+                   'gaining user privileges.',
-   'banner_required': False
+    'banner_required': False
- },
+  },
- 'High': {
+  'High': {
-   'title': 'High',
+    'title': 'High',
-   'value': 'High',
+    'value': 'High',
-   'index': 2,
+    'index': 2,
-   'level': 'warning',
+    'level': 'warning',
-   'color': '#F77454',
+    'color': '#F77454',
-   'score': 9,
+    'score': 9,
-   'description': 'High is a real problem, exploitable for many people in a default installation. ' +
+    'description': 'High is a real problem, exploitable for many people in a default ' +
-                  'Includes serious remote denial of services, local root privilege escalations, ' +
+                   'installation. Includes serious remote denial of services, local root ' +
-                  'or data loss.',
+                   'privilege escalations, or data loss.',
-   'banner_required': False
+    'banner_required': False
- },
+  },
- 'Critical': {
+  'Critical': {
-   'title': 'Critical',
+    'title': 'Critical',
-   'value': 'Critical',
+    'value': 'Critical',
-   'index': 1,
+    'index': 1,
-   'level': 'error',
+    'level': 'error',
-   'color': '#D64456',
+    'color': '#D64456',
-   'score': 10,
+    'score': 10,
-   'description': 'Critical is a world-burning problem, exploitable for nearly all people in ' +
+    'description': 'Critical is a world-burning problem, exploitable for nearly all people in ' +
-                  'a installation of the package. Includes remote root privilege escalations, ' +
+                   'a installation of the package. Includes remote root privilege escalations, ' +
-                  'or massive data loss.',
+                   'or massive data loss.',
-   'banner_required': False
+    'banner_required': False
- },
+  },
- 'Defcon1': {
+  'Defcon1': {
-   'title': 'Defcon 1',
+    'title': 'Defcon 1',
-   'value': 'Defcon1',
+    'value': 'Defcon1',
-   'index': 0,
+    'index': 0,
-   'level': 'error',
+    'level': 'error',
-   'color': 'black',
+    'color': 'black',
-   'score': 11,
+    'score': 11,
-   'description': 'Defcon1 is a Critical problem which has been manually highlighted ' +
+    'description': 'Defcon1 is a Critical problem which has been manually highlighted by the Quay' +
-                  'by the Quay team. It requires immediate attention.',
+                   ' team. It requires immediate attention.',
-   'banner_required': True
+    'banner_required': True
- }
+  }
 }
--- a/util/secscan/analyzer.py
+++ b/util/secscan/analyzer.py
@ -1,10 +1,10 @@
 import logging
 import logging.config
 import features
 from collections import defaultdict
 import features
 from endpoints.notificationhelper import spawn_notification
 from data.database import ExternalNotificationEvent, IMAGE_NOT_SCANNED_ENGINE_VERSION, Image
 from data.model.tag import filter_tags_have_repository_event, get_tags_for_image
@ -13,6 +13,7 @@ from util.secscan.api import (APIRequestFailure, AnalyzeLayerException, MissingP
                              InvalidLayerException, AnalyzeLayerRetryException)
 from util.morecollections import AttrDict
 logger = logging.getLogger(__name__)
--- a/util/secscan/api.py
+++ b/util/secscan/api.py
@ -9,6 +9,7 @@ from flask import url_for
 from data.database import CloseForLongOperation
 from data import model
 from data.model.storage import get_storage_locations
 from util.failover import failover, FailoverException
 from util.secscan.validator import SecurityConfigValidator
 from util.security.instancekeys import InstanceKeys
 from util.security.registry_jwt import generate_bearer_token, build_context_and_subject
@ -19,6 +20,9 @@ TOKEN_VALIDITY_LIFETIME_S = 60  # Amount of time the security scanner has to cal
 UNKNOWN_PARENT_LAYER_ERROR_MSG = 'worker: parent layer is unknown, it must be processed first'
 MITM_CERT_PATH = '/conf/mitm.cert'
 DEFAULT_HTTP_HEADERS = {'Connection': 'close'}
 logger = logging.getLogger(__name__)
@ -309,32 +313,56 @@ class SecurityScannerAPI(object):
    return json_response
-  def _call(self, method, relative_url, params=None, body=None):
+  def _request(self, method, endpoint, path, body, params, timeout):
-    """ Issues an HTTP call to the sec API at the given relative URL.
+    """ Issues an HTTP request to the security endpoint. """
-        This function disconnects from the database while awaiting a response
+    if self._config is None:
-        from the API server.
+      raise Exception('Cannot call unconfigured security system')
    url = _join_api_url(endpoint, self._config.get('SECURITY_SCANNER_API_VERSION', 'v1'), path)
    signer_proxy_url = self._config.get('JWTPROXY_SIGNER', 'localhost:8080')
    logger.debug('%sing security URL %s', method.upper(), url)
    return self._client.request(method, url, json=body, params=params, timeout=timeout,
                                verify=MITM_CERT_PATH, headers=DEFAULT_HTTP_HEADERS,
                                proxies={'https': 'https://' + signer_proxy_url,
                                         'http': 'http://' + signer_proxy_url})
  def _call(self, method, path, params=None, body=None):
    """ Issues an HTTP request to the security endpoint handling the logic of using an alternative
        BATCH endpoint for non-GET requests and failover for GET requests.
    """
    if self._config is None:
      raise Exception('Cannot call unconfigured security system')
-    client = self._client
+    timeout = self._config['SECURITY_SCANNER_API_TIMEOUT_SECONDS']
    headers = {'Connection': 'close'}
    timeout = self._config.get('SECURITY_SCANNER_API_TIMEOUT_SECONDS', 10)
    endpoint = self._config['SECURITY_SCANNER_ENDPOINT']
    if method != 'GET':
      timeout = self._config.get('SECURITY_SCANNER_API_BATCH_TIMEOUT_SECONDS', timeout)
      endpoint = self._config.get('SECURITY_SCANNER_ENDPOINT_BATCH') or endpoint
    api_url = urljoin(endpoint, '/' + self._config.get('SECURITY_SCANNER_API_VERSION', 'v1')) + '/'
    url = urljoin(api_url, relative_url)
    signer_proxy_url = self._config.get('JWTPROXY_SIGNER', 'localhost:8080')
    with CloseForLongOperation(self._config):
-      logger.debug('%sing security URL %s', method.upper(), url)
+      # If the request isn't a read, attempt to use a batch stack and do not fail over.
-      return client.request(method, url, json=body, params=params, timeout=timeout,
+      if method != 'GET':
-                            verify='/conf/mitm.cert', headers=headers,
+        if self._config.get('SECURITY_SCANNER_ENDPOINT_BATCH') is not None:
-                            proxies={
+          endpoint = self._config['SECURITY_SCANNER_ENDPOINT_BATCH']
-                              'https': 'https://' + signer_proxy_url,
+          timeout = self._config.get('SECURITY_SCANNER_API_BATCH_TIMEOUT_SECONDS') or timeout
-                              'http': 'http://' + signer_proxy_url
+        return self._request(method, endpoint, path, body, params, timeout)
-                            })
+
      # The request is read-only and can failover.
      all_endpoints = [endpoint] + self._config['SECURITY_SCANNER_READONLY_FAILOVER_ENDPOINTS']
      try:
        return  _failover_read_request(*[((self._request, endpoint, path, body, params, timeout), {})
                                        for endpoint in all_endpoints])
      except FailoverException:
        raise APIRequestFailure()
 def _join_api_url(endpoint, api_version, path):
  pathless_url = urljoin(endpoint, '/' + api_version) + '/'
  return urljoin(pathless_url, path)
@failover
 def _failover_read_request(request_fn, endpoint, path, body, params, timeout):
  """ This function auto-retries read-only requests until they return a 2xx status code. """
  resp = request_fn('GET', endpoint, path, body, params, timeout)
  if resp.status_code / 100 != 2:
    raise FailoverException('status code was not 2xx')
  return resp
--- a/util/secscan/fake.py
+++ b/util/secscan/fake.py
@ -28,10 +28,17 @@ class FakeSecurityScanner(object):
    self.notifications = {}
    self.layer_vulns = {}
    self.ok_layer_id = None
    self.fail_layer_id = None
    self.internal_error_layer_id = None
    self.error_layer_id = None
  def set_ok_layer_id(self, ok_layer_id):
    """ Sets a layer ID that, if encountered when the analyze call is made, causes a 200
        to be immediately returned.
    """
    self.ok_layer_id = ok_layer_id
  def set_fail_layer_id(self, fail_layer_id):
    """ Sets a layer ID that, if encountered when the analyze call is made, causes a 422
        to be raised.
@ -167,6 +174,12 @@ class FakeSecurityScanner(object):
    @urlmatch(netloc=r'(.*\.)?' + self.hostname, path=r'/v1/layers/(.+)', method='GET')
    def get_layer_mock(url, request):
      layer_id = url.path[len('/v1/layers/'):]
      if layer_id == self.ok_layer_id:
        return {
          'status_code': 200,
          'content': json.dumps({'Layer': {}}),
        }
      if layer_id == self.internal_error_layer_id:
        return {
          'status_code': 500,
@ -305,7 +318,10 @@ class FakeSecurityScanner(object):
    @all_requests
    def response_content(url, _):
-      raise Exception('Unknown endpoint: ' + str(url))
+      return {
        'status_code': 500,
        'content': '',
      }
    return [get_layer_mock, post_layer_mock, remove_layer_mock, get_notification,
            delete_notification, response_content]
--- a/util/secscan/validator.py
+++ b/util/secscan/validator.py
@ -1,6 +1,8 @@
 import features
 import logging
 import features
 logger = logging.getLogger(__name__)
--- a/util/test/test_failover.py
+++ b/util/test/test_failover.py
@ -0,0 +1,44 @@
 import pytest
 from util.failover import failover, FailoverException
 class Counter(object):
  """ Wraps a counter in an object so that it'll be passed by reference. """
  def __init__(self):
    self.calls = 0
  def increment(self):
    self.calls += 1
@failover
 def my_failover_func(i, should_raise=None):
  """ Increments a counter and raises an exception when told. """
  i.increment()
  if should_raise is not None:
    raise should_raise()
  raise FailoverException('incrementing')
@pytest.mark.parametrize('stop_on,exception', [
  (10, None),
  (5, IndexError),
 ])
 def test_readonly_failover(stop_on, exception):
  """ Generates failover arguments and checks against a counter to ensure that
      the failover function has been called the proper amount of times and stops
      at unhandled exceptions.
  """
  counter = Counter()
  arg_sets = []
  for i in xrange(stop_on):
    should_raise = exception if exception is not None and i == stop_on-1 else None
    arg_sets.append(((counter,), {'should_raise': should_raise}))
  if exception is not None:
    with pytest.raises(exception):
      my_failover_func(*arg_sets)
  else:
    my_failover_func(*arg_sets)
  assert counter.calls == stop_on