Add feature flag to force all direct download URLs to be proxied

Fixes #1667
This commit is contained in:
Joseph Schorr 2016-08-24 12:55:33 -04:00
parent 2b00c644b5
commit dd2e086a20
12 changed files with 350 additions and 34 deletions

View file

@ -3,7 +3,9 @@ from storage.cloud import S3Storage, GoogleCloudStorage, RadosGWStorage
from storage.fakestorage import FakeStorage
from storage.distributedstorage import DistributedStorage
from storage.swift import SwiftStorage
from storage.downloadproxy import DownloadProxy
from urlparse import urlparse, parse_qs
STORAGE_DRIVER_CLASSES = {
'LocalStorage': LocalStorage,
@ -23,14 +25,14 @@ def get_storage_driver(metric_queue, storage_params):
class Storage(object):
def __init__(self, app=None, metric_queue=None):
def __init__(self, app=None, metric_queue=None, instance_keys=None):
self.app = app
if app is not None and metric_queue is not None:
self.state = self.init_app(app, metric_queue)
if app is not None:
self.state = self.init_app(app, metric_queue, instance_keys)
else:
self.state = None
def init_app(self, app, metric_queue):
def init_app(self, app, metric_queue, instance_keys):
storages = {}
for location, storage_params in app.config.get('DISTRIBUTED_STORAGE_CONFIG').items():
storages[location] = get_storage_driver(metric_queue, storage_params)
@ -40,7 +42,12 @@ class Storage(object):
preference = storages.keys()
default_locations = app.config.get('DISTRIBUTED_STORAGE_DEFAULT_LOCATIONS') or []
d_storage = DistributedStorage(storages, preference, default_locations)
download_proxy = None
if app.config.get('FEATURE_PROXY_STORAGE', False) and instance_keys is not None:
download_proxy = DownloadProxy(app, instance_keys)
d_storage = DistributedStorage(storages, preference, default_locations, download_proxy)
# register extension with app
app.extensions = getattr(app, 'extensions', {})

View file

@ -5,10 +5,8 @@ from functools import wraps
from storage.basestorage import StoragePaths, BaseStorage, BaseStorageV2
logger = logging.getLogger(__name__)
def _location_aware(unbound_func):
@wraps(unbound_func)
def wrapper(self, locations, *args, **kwargs):
@ -27,17 +25,19 @@ def _location_aware(unbound_func):
class DistributedStorage(StoragePaths):
def __init__(self, storages, preferred_locations=None, default_locations=None):
def __init__(self, storages, preferred_locations=None, default_locations=None, proxy=None):
self._storages = dict(storages)
self.preferred_locations = list(preferred_locations or [])
self.default_locations = list(default_locations or [])
self.proxy = proxy
@property
def locations(self):
""" Returns the names of the locations supported. """
return list(self._storages.keys())
get_direct_download_url = _location_aware(BaseStorage.get_direct_download_url)
_get_direct_download_url = _location_aware(BaseStorage.get_direct_download_url)
get_direct_upload_url = _location_aware(BaseStorage.get_direct_upload_url)
get_content = _location_aware(BaseStorage.get_content)
put_content = _location_aware(BaseStorage.put_content)
@ -55,6 +55,19 @@ class DistributedStorage(StoragePaths):
complete_chunked_upload = _location_aware(BaseStorageV2.complete_chunked_upload)
cancel_chunked_upload = _location_aware(BaseStorageV2.cancel_chunked_upload)
def get_direct_download_url(self, locations, path, expires_in=600, requires_cors=False,
head=False):
download_url = self._get_direct_download_url(locations, path, expires_in, requires_cors, head)
if download_url is None:
return None
if self.proxy is None:
return download_url
return self.proxy.proxy_download_url(download_url)
def copy_between(self, path, source_location, destination_location):
""" Copies a file between the source location and the destination location. """
source_storage = self._storages[source_location]

167
storage/downloadproxy.py Normal file
View file

@ -0,0 +1,167 @@
import urllib
from urlparse import urlparse, parse_qs
from util.security.registry_jwt import (generate_bearer_token, decode_bearer_token,
InvalidBearerTokenException)
from flask import abort, request
from jsonschema import validate, ValidationError
import logging
logger = logging.getLogger(__name__)
PROXY_STORAGE_MAX_LIFETIME_S = 30 # Seconds
STORAGE_PROXY_SUBJECT = 'storageproxy'
STORAGE_PROXY_ACCESS_TYPE = 'storageproxy'
ACCESS_SCHEMA = {
'type': 'array',
'description': 'List of access granted to the subject',
'items': {
'type': 'object',
'required': [
'type',
'scheme',
'host',
'uri',
],
'properties': {
'type': {
'type': 'string',
'description': 'We only allow storage proxy permissions',
'enum': [
'storageproxy',
],
},
'scheme': {
'type': 'string',
'description': 'The scheme for the storage URL being proxied'
},
'host': {
'type': 'string',
'description': 'The hostname for the storage URL being proxied'
},
'uri': {
'type': 'string',
'description': 'The URI path for the storage URL being proxied'
},
},
},
}
class DownloadProxy(object):
""" Helper class to enable proxying of direct download URLs for storage via the registry's
local NGINX.
"""
def __init__(self, app, instance_keys):
self.app = app
self.instance_keys = instance_keys
app.add_url_rule('/_storage_proxy_auth', '_storage_proxy_auth', self._validate_proxy_url)
def proxy_download_url(self, download_url):
""" Returns a URL to proxy the specified blob download URL.
"""
# Parse the URL to be downloaded into its components (host, path, scheme).
parsed = urlparse(download_url)
path = parsed.path
if parsed.query:
path = path + '?' + parsed.query
if path.startswith('/'):
path = path[1:]
access = {
'type': STORAGE_PROXY_ACCESS_TYPE,
'uri': path,
'host': parsed.netloc,
'scheme': parsed.scheme,
}
# Generate a JWT that signs access to this URL. This JWT will be passed back to the registry
# code when the download commences. Note that we don't add any context here, as it isn't
# needed.
server_hostname = self.app.config['SERVER_HOSTNAME']
token = generate_bearer_token(server_hostname, STORAGE_PROXY_SUBJECT, {}, [access],
PROXY_STORAGE_MAX_LIFETIME_S, self.instance_keys)
url_scheme = self.app.config['PREFERRED_URL_SCHEME']
server_hostname = self.app.config['SERVER_HOSTNAME']
# The proxy path is of the form:
# http(s)://registry_server/_storage_proxy/{token}/{scheme}/{hostname}/rest/of/path/here
encoded_token = urllib.quote(token)
proxy_url = '%s://%s/_storage_proxy/%s/%s/%s/%s' % (url_scheme, server_hostname, encoded_token,
parsed.scheme, parsed.netloc, path)
logger.debug('Proxying via URL %s', proxy_url)
return proxy_url
def _validate_proxy_url(self):
original_uri = request.headers.get('X-Original-URI', None)
if not original_uri:
logger.error('Missing original URI: %s', request.headers)
abort(401)
if not original_uri.startswith('/_storage_proxy/'):
logger.error('Unknown storage proxy path: %s', original_uri)
abort(401)
# The proxy path is of the form:
# /_storage_proxy/{token}/{scheme}/{hostname}/rest/of/path/here
without_prefix = original_uri[len('/_storage_proxy/'):]
parts = without_prefix.split('/', 3)
if len(parts) != 4:
logger.error('Invalid storage proxy path (found %s parts): %s', len(parts), without_prefix)
abort(401)
encoded_token, scheme, host, uri = parts
token = urllib.unquote(encoded_token)
logger.debug('Got token %s for storage proxy auth request %s with parts %s', token,
original_uri, parts)
# Decode the bearer token.
try:
decoded = decode_bearer_token(token, self.instance_keys)
except InvalidBearerTokenException:
logger.exception('Invalid token for storage proxy')
abort(401)
# Ensure it is for the proxy.
if decoded['sub'] != STORAGE_PROXY_SUBJECT:
logger.exception('Invalid subject %s for storage proxy auth', decoded['subject'])
abort(401)
# Validate that the access matches the token format.
access = decoded.get('access', {})
try:
validate(access, ACCESS_SCHEMA)
except ValidationError:
logger.exception('We should not be minting invalid credentials: %s', access)
abort(401)
# For now, we only expect a single access credential.
if len(access) != 1:
logger.exception('We should not be minting invalid credentials: %s', access)
abort(401)
# Ensure the signed access matches the requested URL's pieces.
granted_access = access[0]
if granted_access['scheme'] != scheme:
logger.exception('Mismatch in scheme. %s expected, %s found', granted_access['scheme'],
scheme)
abort(401)
if granted_access['host'] != host:
logger.exception('Mismatch in host. %s expected, %s found', granted_access['host'], host)
abort(401)
if granted_access['uri'] != uri:
logger.exception('Mismatch in uri. %s expected, %s found', granted_access['uri'], uri)
abort(401)
return 'OK'

View file

@ -18,7 +18,7 @@ class FakeStorage(BaseStorageV2):
def get_direct_download_url(self, path, expires_in=60, requires_cors=False, head=False):
try:
if self.get_content('supports_direct_download') == 'true':
return 'http://somefakeurl'
return 'http://somefakeurl?goes=here'
except:
pass