Merge pull request #2330 from jzelinskie/two-clair

Generic failover library and QSS read-only failover
This commit is contained in:
Jimmy Zelinskie 2017-02-06 15:45:21 -05:00 committed by GitHub
commit c5441923f7
10 changed files with 287 additions and 117 deletions

View file

@ -310,6 +310,9 @@ class DefaultConfig(object):
# If specified, the endpoint to be used for all POST calls to the security scanner.
SECURITY_SCANNER_ENDPOINT_BATCH = None
# If specified, GET requests that return non-200 will be retried at the following instances.
SECURITY_SCANNER_READONLY_FAILOVER_ENDPOINTS = []
# The indexing engine version running inside the security scanner.
SECURITY_SCANNER_ENGINE_VERSION_TARGET = 2

View file

@ -117,6 +117,16 @@ FAKE_APPLICATION_CLIENT_ID = 'deadbeef'
CSRF_TOKEN_KEY = '_csrf_token'
CSRF_TOKEN = '123csrfforme'
class ConfigForTesting(object):
def __enter__(self):
config_provider.reset_for_test()
return config_provider
def __exit__(self, type, value, traceback):
config_provider.reset_for_test()
class ApiTestCase(unittest.TestCase):
maxDiff = None
@ -4299,6 +4309,36 @@ class TestRepositoryImageSecurity(ApiTestCase):
self.assertEquals('scanned', image_response['status'])
self.assertEquals(1, image_response['data']['Layer']['IndexedByVersion'])
def test_get_vulnerabilities_read_failover(self):
with ConfigForTesting():
self.login(ADMIN_ACCESS_USER)
# Get a layer and mark it as indexed.
layer = model.tag.get_tag_image(ADMIN_ACCESS_USER, 'simple', 'latest')
layer.security_indexed = True
layer.security_indexed_engine = app.config['SECURITY_SCANNER_ENGINE_VERSION_TARGET']
layer.save()
with fake_security_scanner(hostname='failoverscanner') as security_scanner:
# Query the wrong security scanner URL without failover.
self.getResponse(RepositoryImageSecurity,
params=dict(repository=ADMIN_ACCESS_USER + '/simple',
imageid=layer.docker_image_id, vulnerabilities='true'),
expected_code=520)
# Set the failover URL.
app.config['SECURITY_SCANNER_READONLY_FAILOVER_ENDPOINTS'] = ['http://failoverscanner']
# Configure the API to return 200 for this layer.
layer_id = security_scanner.layer_id(layer)
security_scanner.set_ok_layer_id(layer_id)
# Call the API and succeed on failover.
self.getResponse(RepositoryImageSecurity,
params=dict(repository=ADMIN_ACCESS_USER + '/simple',
imageid=layer.docker_image_id, vulnerabilities='true'),
expected_code=200)
class TestSuperUserCustomCertificates(ApiTestCase):
def test_custom_certificates(self):

View file

@ -1,4 +1,4 @@
from test.test_api_usage import ApiTestCase, READ_ACCESS_USER, ADMIN_ACCESS_USER
from test.test_api_usage import ApiTestCase, READ_ACCESS_USER, ADMIN_ACCESS_USER, ConfigForTesting
from endpoints.api.suconfig import (SuperUserRegistryStatus, SuperUserConfig, SuperUserConfigFile,
SuperUserCreateInitialSuperUser, SuperUserConfigValidate)
from app import config_provider, all_queues
@ -8,16 +8,6 @@ from data import model
import unittest
class ConfigForTesting(object):
def __enter__(self):
config_provider.reset_for_test()
return config_provider
def __exit__(self, type, value, traceback):
config_provider.reset_for_test()
class TestSuperUserRegistryStatus(ApiTestCase):
def test_registry_status(self):
with ConfigForTesting():

46
util/failover.py Normal file
View file

@ -0,0 +1,46 @@
import logging
from functools import wraps
logger = logging.getLogger(__name__)
class FailoverException(Exception):
""" Exception raised when an operation should be retried by the failover decorator. """
def __init__(self, message):
super(FailoverException, self).__init__()
self.message = message
def failover(func):
""" Wraps a function such that it can be retried on specified failures.
Raises FailoverException when all failovers are exhausted.
Example:
@failover
def get_google(scheme, use_www=False):
www = 'www.' if use_www else ''
r = requests.get(scheme + '://' + www + 'google.com')
if r.status_code != 200:
raise FailoverException('non 200 response from Google' )
return r
def GooglePingTest():
r = get_google(
(('http'), {'use_www': False}),
(('http'), {'use_www': True}),
(('https'), {'use_www': False}),
(('https'), {'use_www': True}),
)
print('Successfully contacted ' + r.url)
"""
@wraps(func)
def wrapper(*args_sets):
for arg_set in args_sets:
try:
return func(*arg_set[0], **arg_set[1])
except FailoverException as ex:
logger.debug('failing over: %s', ex.message)
continue
raise FailoverException('exhausted all possible failovers')
return wrapper

View file

@ -1,98 +1,98 @@
# NOTE: This objects are used directly in the external-notification-data and vulnerability-service
# on the frontend, so be careful with changing their existing keys.
PRIORITY_LEVELS = {
'Unknown': {
'title': 'Unknown',
'index': 6,
'level': 'info',
'color': '#9B9B9B',
'score': 0,
'Unknown': {
'title': 'Unknown',
'index': 6,
'level': 'info',
'color': '#9B9B9B',
'score': 0,
'description': 'Unknown is either a security problem that has not been assigned ' +
'to a priority yet or a priority that our system did not recognize',
'banner_required': False
},
'description': 'Unknown is either a security problem that has not been assigned to a priority' +
' yet or a priority that our system did not recognize',
'banner_required': False
},
'Negligible': {
'title': 'Negligible',
'index': 5,
'level': 'info',
'color': '#9B9B9B',
'score': 1,
'Negligible': {
'title': 'Negligible',
'index': 5,
'level': 'info',
'color': '#9B9B9B',
'score': 1,
'description': 'Negligible is technically a security problem, but is only theoretical ' +
'in nature, requires a very special situation, has almost no install base, ' +
'or does no real damage.',
'banner_required': False
},
'description': 'Negligible is technically a security problem, but is only theoretical ' +
'in nature, requires a very special situation, has almost no install base, ' +
'or does no real damage.',
'banner_required': False
},
'Low': {
'title': 'Low',
'index': 4,
'level': 'warning',
'color': '#F8CA1C',
'score': 3,
'Low': {
'title': 'Low',
'index': 4,
'level': 'warning',
'color': '#F8CA1C',
'score': 3,
'description': 'Low is a security problem, but is hard to exploit due to environment, ' +
'requires a user-assisted attack, a small install base, or does very ' +
'little damage.',
'banner_required': False
},
'description': 'Low is a security problem, but is hard to exploit due to environment, ' +
'requires a user-assisted attack, a small install base, or does very little' +
' damage.',
'banner_required': False
},
'Medium': {
'title': 'Medium',
'value': 'Medium',
'index': 3,
'level': 'warning',
'color': '#FCA657',
'score': 6,
'Medium': {
'title': 'Medium',
'value': 'Medium',
'index': 3,
'level': 'warning',
'color': '#FCA657',
'score': 6,
'description': 'Medium is a real security problem, and is exploitable for many people. ' +
'Includes network daemon denial of service attacks, cross-site scripting, ' +
'and gaining user privileges.',
'banner_required': False
},
'description': 'Medium is a real security problem, and is exploitable for many people. ' +
'Includes network daemon denial of service attacks, cross-site scripting, and ' +
'gaining user privileges.',
'banner_required': False
},
'High': {
'title': 'High',
'value': 'High',
'index': 2,
'level': 'warning',
'color': '#F77454',
'score': 9,
'High': {
'title': 'High',
'value': 'High',
'index': 2,
'level': 'warning',
'color': '#F77454',
'score': 9,
'description': 'High is a real problem, exploitable for many people in a default installation. ' +
'Includes serious remote denial of services, local root privilege escalations, ' +
'or data loss.',
'banner_required': False
},
'description': 'High is a real problem, exploitable for many people in a default ' +
'installation. Includes serious remote denial of services, local root ' +
'privilege escalations, or data loss.',
'banner_required': False
},
'Critical': {
'title': 'Critical',
'value': 'Critical',
'index': 1,
'level': 'error',
'color': '#D64456',
'score': 10,
'Critical': {
'title': 'Critical',
'value': 'Critical',
'index': 1,
'level': 'error',
'color': '#D64456',
'score': 10,
'description': 'Critical is a world-burning problem, exploitable for nearly all people in ' +
'a installation of the package. Includes remote root privilege escalations, ' +
'or massive data loss.',
'banner_required': False
},
'description': 'Critical is a world-burning problem, exploitable for nearly all people in ' +
'a installation of the package. Includes remote root privilege escalations, ' +
'or massive data loss.',
'banner_required': False
},
'Defcon1': {
'title': 'Defcon 1',
'value': 'Defcon1',
'index': 0,
'level': 'error',
'color': 'black',
'score': 11,
'Defcon1': {
'title': 'Defcon 1',
'value': 'Defcon1',
'index': 0,
'level': 'error',
'color': 'black',
'score': 11,
'description': 'Defcon1 is a Critical problem which has been manually highlighted ' +
'by the Quay team. It requires immediate attention.',
'banner_required': True
}
'description': 'Defcon1 is a Critical problem which has been manually highlighted by the Quay' +
' team. It requires immediate attention.',
'banner_required': True
}
}

View file

@ -1,10 +1,10 @@
import logging
import logging.config
import features
from collections import defaultdict
import features
from endpoints.notificationhelper import spawn_notification
from data.database import ExternalNotificationEvent, IMAGE_NOT_SCANNED_ENGINE_VERSION, Image
from data.model.tag import filter_tags_have_repository_event, get_tags_for_image
@ -13,6 +13,7 @@ from util.secscan.api import (APIRequestFailure, AnalyzeLayerException, MissingP
InvalidLayerException, AnalyzeLayerRetryException)
from util.morecollections import AttrDict
logger = logging.getLogger(__name__)

View file

@ -9,6 +9,7 @@ from flask import url_for
from data.database import CloseForLongOperation
from data import model
from data.model.storage import get_storage_locations
from util.failover import failover, FailoverException
from util.secscan.validator import SecurityConfigValidator
from util.security.instancekeys import InstanceKeys
from util.security.registry_jwt import generate_bearer_token, build_context_and_subject
@ -19,6 +20,9 @@ TOKEN_VALIDITY_LIFETIME_S = 60 # Amount of time the security scanner has to cal
UNKNOWN_PARENT_LAYER_ERROR_MSG = 'worker: parent layer is unknown, it must be processed first'
MITM_CERT_PATH = '/conf/mitm.cert'
DEFAULT_HTTP_HEADERS = {'Connection': 'close'}
logger = logging.getLogger(__name__)
@ -309,32 +313,56 @@ class SecurityScannerAPI(object):
return json_response
def _call(self, method, relative_url, params=None, body=None):
""" Issues an HTTP call to the sec API at the given relative URL.
This function disconnects from the database while awaiting a response
from the API server.
def _request(self, method, endpoint, path, body, params, timeout):
""" Issues an HTTP request to the security endpoint. """
if self._config is None:
raise Exception('Cannot call unconfigured security system')
url = _join_api_url(endpoint, self._config.get('SECURITY_SCANNER_API_VERSION', 'v1'), path)
signer_proxy_url = self._config.get('JWTPROXY_SIGNER', 'localhost:8080')
logger.debug('%sing security URL %s', method.upper(), url)
return self._client.request(method, url, json=body, params=params, timeout=timeout,
verify=MITM_CERT_PATH, headers=DEFAULT_HTTP_HEADERS,
proxies={'https': 'https://' + signer_proxy_url,
'http': 'http://' + signer_proxy_url})
def _call(self, method, path, params=None, body=None):
""" Issues an HTTP request to the security endpoint handling the logic of using an alternative
BATCH endpoint for non-GET requests and failover for GET requests.
"""
if self._config is None:
raise Exception('Cannot call unconfigured security system')
client = self._client
headers = {'Connection': 'close'}
timeout = self._config.get('SECURITY_SCANNER_API_TIMEOUT_SECONDS', 10)
timeout = self._config['SECURITY_SCANNER_API_TIMEOUT_SECONDS']
endpoint = self._config['SECURITY_SCANNER_ENDPOINT']
if method != 'GET':
timeout = self._config.get('SECURITY_SCANNER_API_BATCH_TIMEOUT_SECONDS', timeout)
endpoint = self._config.get('SECURITY_SCANNER_ENDPOINT_BATCH') or endpoint
api_url = urljoin(endpoint, '/' + self._config.get('SECURITY_SCANNER_API_VERSION', 'v1')) + '/'
url = urljoin(api_url, relative_url)
signer_proxy_url = self._config.get('JWTPROXY_SIGNER', 'localhost:8080')
with CloseForLongOperation(self._config):
logger.debug('%sing security URL %s', method.upper(), url)
return client.request(method, url, json=body, params=params, timeout=timeout,
verify='/conf/mitm.cert', headers=headers,
proxies={
'https': 'https://' + signer_proxy_url,
'http': 'http://' + signer_proxy_url
})
# If the request isn't a read, attempt to use a batch stack and do not fail over.
if method != 'GET':
if self._config.get('SECURITY_SCANNER_ENDPOINT_BATCH') is not None:
endpoint = self._config['SECURITY_SCANNER_ENDPOINT_BATCH']
timeout = self._config.get('SECURITY_SCANNER_API_BATCH_TIMEOUT_SECONDS') or timeout
return self._request(method, endpoint, path, body, params, timeout)
# The request is read-only and can failover.
all_endpoints = [endpoint] + self._config['SECURITY_SCANNER_READONLY_FAILOVER_ENDPOINTS']
try:
return _failover_read_request(*[((self._request, endpoint, path, body, params, timeout), {})
for endpoint in all_endpoints])
except FailoverException:
raise APIRequestFailure()
def _join_api_url(endpoint, api_version, path):
pathless_url = urljoin(endpoint, '/' + api_version) + '/'
return urljoin(pathless_url, path)
@failover
def _failover_read_request(request_fn, endpoint, path, body, params, timeout):
""" This function auto-retries read-only requests until they return a 2xx status code. """
resp = request_fn('GET', endpoint, path, body, params, timeout)
if resp.status_code / 100 != 2:
raise FailoverException('status code was not 2xx')
return resp

View file

@ -28,10 +28,17 @@ class FakeSecurityScanner(object):
self.notifications = {}
self.layer_vulns = {}
self.ok_layer_id = None
self.fail_layer_id = None
self.internal_error_layer_id = None
self.error_layer_id = None
def set_ok_layer_id(self, ok_layer_id):
""" Sets a layer ID that, if encountered when the analyze call is made, causes a 200
to be immediately returned.
"""
self.ok_layer_id = ok_layer_id
def set_fail_layer_id(self, fail_layer_id):
""" Sets a layer ID that, if encountered when the analyze call is made, causes a 422
to be raised.
@ -167,6 +174,12 @@ class FakeSecurityScanner(object):
@urlmatch(netloc=r'(.*\.)?' + self.hostname, path=r'/v1/layers/(.+)', method='GET')
def get_layer_mock(url, request):
layer_id = url.path[len('/v1/layers/'):]
if layer_id == self.ok_layer_id:
return {
'status_code': 200,
'content': json.dumps({'Layer': {}}),
}
if layer_id == self.internal_error_layer_id:
return {
'status_code': 500,
@ -305,7 +318,10 @@ class FakeSecurityScanner(object):
@all_requests
def response_content(url, _):
raise Exception('Unknown endpoint: ' + str(url))
return {
'status_code': 500,
'content': '',
}
return [get_layer_mock, post_layer_mock, remove_layer_mock, get_notification,
delete_notification, response_content]

View file

@ -1,6 +1,8 @@
import features
import logging
import features
logger = logging.getLogger(__name__)

View file

@ -0,0 +1,44 @@
import pytest
from util.failover import failover, FailoverException
class Counter(object):
""" Wraps a counter in an object so that it'll be passed by reference. """
def __init__(self):
self.calls = 0
def increment(self):
self.calls += 1
@failover
def my_failover_func(i, should_raise=None):
""" Increments a counter and raises an exception when told. """
i.increment()
if should_raise is not None:
raise should_raise()
raise FailoverException('incrementing')
@pytest.mark.parametrize('stop_on,exception', [
(10, None),
(5, IndexError),
])
def test_readonly_failover(stop_on, exception):
""" Generates failover arguments and checks against a counter to ensure that
the failover function has been called the proper amount of times and stops
at unhandled exceptions.
"""
counter = Counter()
arg_sets = []
for i in xrange(stop_on):
should_raise = exception if exception is not None and i == stop_on-1 else None
arg_sets.append(((counter,), {'should_raise': should_raise}))
if exception is not None:
with pytest.raises(exception):
my_failover_func(*arg_sets)
else:
my_failover_func(*arg_sets)
assert counter.calls == stop_on