Add feature flag to force all direct download URLs to be proxied
Fixes #1667
This commit is contained in:
parent
2b00c644b5
commit
dd2e086a20
12 changed files with 350 additions and 34 deletions
|
@ -3,7 +3,9 @@ from storage.cloud import S3Storage, GoogleCloudStorage, RadosGWStorage
|
|||
from storage.fakestorage import FakeStorage
|
||||
from storage.distributedstorage import DistributedStorage
|
||||
from storage.swift import SwiftStorage
|
||||
from storage.downloadproxy import DownloadProxy
|
||||
|
||||
from urlparse import urlparse, parse_qs
|
||||
|
||||
STORAGE_DRIVER_CLASSES = {
|
||||
'LocalStorage': LocalStorage,
|
||||
|
@ -23,14 +25,14 @@ def get_storage_driver(metric_queue, storage_params):
|
|||
|
||||
|
||||
class Storage(object):
|
||||
def __init__(self, app=None, metric_queue=None):
|
||||
def __init__(self, app=None, metric_queue=None, instance_keys=None):
|
||||
self.app = app
|
||||
if app is not None and metric_queue is not None:
|
||||
self.state = self.init_app(app, metric_queue)
|
||||
if app is not None:
|
||||
self.state = self.init_app(app, metric_queue, instance_keys)
|
||||
else:
|
||||
self.state = None
|
||||
|
||||
def init_app(self, app, metric_queue):
|
||||
def init_app(self, app, metric_queue, instance_keys):
|
||||
storages = {}
|
||||
for location, storage_params in app.config.get('DISTRIBUTED_STORAGE_CONFIG').items():
|
||||
storages[location] = get_storage_driver(metric_queue, storage_params)
|
||||
|
@ -40,7 +42,12 @@ class Storage(object):
|
|||
preference = storages.keys()
|
||||
|
||||
default_locations = app.config.get('DISTRIBUTED_STORAGE_DEFAULT_LOCATIONS') or []
|
||||
d_storage = DistributedStorage(storages, preference, default_locations)
|
||||
|
||||
download_proxy = None
|
||||
if app.config.get('FEATURE_PROXY_STORAGE', False) and instance_keys is not None:
|
||||
download_proxy = DownloadProxy(app, instance_keys)
|
||||
|
||||
d_storage = DistributedStorage(storages, preference, default_locations, download_proxy)
|
||||
|
||||
# register extension with app
|
||||
app.extensions = getattr(app, 'extensions', {})
|
||||
|
|
|
@ -5,10 +5,8 @@ from functools import wraps
|
|||
|
||||
from storage.basestorage import StoragePaths, BaseStorage, BaseStorageV2
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _location_aware(unbound_func):
|
||||
@wraps(unbound_func)
|
||||
def wrapper(self, locations, *args, **kwargs):
|
||||
|
@ -27,17 +25,19 @@ def _location_aware(unbound_func):
|
|||
|
||||
|
||||
class DistributedStorage(StoragePaths):
|
||||
def __init__(self, storages, preferred_locations=None, default_locations=None):
|
||||
def __init__(self, storages, preferred_locations=None, default_locations=None, proxy=None):
|
||||
self._storages = dict(storages)
|
||||
self.preferred_locations = list(preferred_locations or [])
|
||||
self.default_locations = list(default_locations or [])
|
||||
self.proxy = proxy
|
||||
|
||||
@property
|
||||
def locations(self):
|
||||
""" Returns the names of the locations supported. """
|
||||
return list(self._storages.keys())
|
||||
|
||||
get_direct_download_url = _location_aware(BaseStorage.get_direct_download_url)
|
||||
_get_direct_download_url = _location_aware(BaseStorage.get_direct_download_url)
|
||||
|
||||
get_direct_upload_url = _location_aware(BaseStorage.get_direct_upload_url)
|
||||
get_content = _location_aware(BaseStorage.get_content)
|
||||
put_content = _location_aware(BaseStorage.put_content)
|
||||
|
@ -55,6 +55,19 @@ class DistributedStorage(StoragePaths):
|
|||
complete_chunked_upload = _location_aware(BaseStorageV2.complete_chunked_upload)
|
||||
cancel_chunked_upload = _location_aware(BaseStorageV2.cancel_chunked_upload)
|
||||
|
||||
|
||||
def get_direct_download_url(self, locations, path, expires_in=600, requires_cors=False,
|
||||
head=False):
|
||||
download_url = self._get_direct_download_url(locations, path, expires_in, requires_cors, head)
|
||||
if download_url is None:
|
||||
return None
|
||||
|
||||
if self.proxy is None:
|
||||
return download_url
|
||||
|
||||
return self.proxy.proxy_download_url(download_url)
|
||||
|
||||
|
||||
def copy_between(self, path, source_location, destination_location):
|
||||
""" Copies a file between the source location and the destination location. """
|
||||
source_storage = self._storages[source_location]
|
||||
|
|
167
storage/downloadproxy.py
Normal file
167
storage/downloadproxy.py
Normal file
|
@ -0,0 +1,167 @@
|
|||
import urllib
|
||||
from urlparse import urlparse, parse_qs
|
||||
from util.security.registry_jwt import (generate_bearer_token, decode_bearer_token,
|
||||
InvalidBearerTokenException)
|
||||
|
||||
from flask import abort, request
|
||||
from jsonschema import validate, ValidationError
|
||||
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
PROXY_STORAGE_MAX_LIFETIME_S = 30 # Seconds
|
||||
STORAGE_PROXY_SUBJECT = 'storageproxy'
|
||||
STORAGE_PROXY_ACCESS_TYPE = 'storageproxy'
|
||||
|
||||
ACCESS_SCHEMA = {
|
||||
'type': 'array',
|
||||
'description': 'List of access granted to the subject',
|
||||
'items': {
|
||||
'type': 'object',
|
||||
'required': [
|
||||
'type',
|
||||
'scheme',
|
||||
'host',
|
||||
'uri',
|
||||
],
|
||||
'properties': {
|
||||
'type': {
|
||||
'type': 'string',
|
||||
'description': 'We only allow storage proxy permissions',
|
||||
'enum': [
|
||||
'storageproxy',
|
||||
],
|
||||
},
|
||||
'scheme': {
|
||||
'type': 'string',
|
||||
'description': 'The scheme for the storage URL being proxied'
|
||||
},
|
||||
'host': {
|
||||
'type': 'string',
|
||||
'description': 'The hostname for the storage URL being proxied'
|
||||
},
|
||||
'uri': {
|
||||
'type': 'string',
|
||||
'description': 'The URI path for the storage URL being proxied'
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class DownloadProxy(object):
|
||||
""" Helper class to enable proxying of direct download URLs for storage via the registry's
|
||||
local NGINX.
|
||||
"""
|
||||
def __init__(self, app, instance_keys):
|
||||
self.app = app
|
||||
self.instance_keys = instance_keys
|
||||
|
||||
app.add_url_rule('/_storage_proxy_auth', '_storage_proxy_auth', self._validate_proxy_url)
|
||||
|
||||
def proxy_download_url(self, download_url):
|
||||
""" Returns a URL to proxy the specified blob download URL.
|
||||
"""
|
||||
# Parse the URL to be downloaded into its components (host, path, scheme).
|
||||
parsed = urlparse(download_url)
|
||||
|
||||
path = parsed.path
|
||||
if parsed.query:
|
||||
path = path + '?' + parsed.query
|
||||
|
||||
if path.startswith('/'):
|
||||
path = path[1:]
|
||||
|
||||
access = {
|
||||
'type': STORAGE_PROXY_ACCESS_TYPE,
|
||||
'uri': path,
|
||||
'host': parsed.netloc,
|
||||
'scheme': parsed.scheme,
|
||||
}
|
||||
|
||||
# Generate a JWT that signs access to this URL. This JWT will be passed back to the registry
|
||||
# code when the download commences. Note that we don't add any context here, as it isn't
|
||||
# needed.
|
||||
server_hostname = self.app.config['SERVER_HOSTNAME']
|
||||
token = generate_bearer_token(server_hostname, STORAGE_PROXY_SUBJECT, {}, [access],
|
||||
PROXY_STORAGE_MAX_LIFETIME_S, self.instance_keys)
|
||||
|
||||
url_scheme = self.app.config['PREFERRED_URL_SCHEME']
|
||||
server_hostname = self.app.config['SERVER_HOSTNAME']
|
||||
|
||||
# The proxy path is of the form:
|
||||
# http(s)://registry_server/_storage_proxy/{token}/{scheme}/{hostname}/rest/of/path/here
|
||||
encoded_token = urllib.quote(token)
|
||||
proxy_url = '%s://%s/_storage_proxy/%s/%s/%s/%s' % (url_scheme, server_hostname, encoded_token,
|
||||
parsed.scheme, parsed.netloc, path)
|
||||
logger.debug('Proxying via URL %s', proxy_url)
|
||||
return proxy_url
|
||||
|
||||
|
||||
def _validate_proxy_url(self):
|
||||
original_uri = request.headers.get('X-Original-URI', None)
|
||||
if not original_uri:
|
||||
logger.error('Missing original URI: %s', request.headers)
|
||||
abort(401)
|
||||
|
||||
if not original_uri.startswith('/_storage_proxy/'):
|
||||
logger.error('Unknown storage proxy path: %s', original_uri)
|
||||
abort(401)
|
||||
|
||||
# The proxy path is of the form:
|
||||
# /_storage_proxy/{token}/{scheme}/{hostname}/rest/of/path/here
|
||||
without_prefix = original_uri[len('/_storage_proxy/'):]
|
||||
parts = without_prefix.split('/', 3)
|
||||
if len(parts) != 4:
|
||||
logger.error('Invalid storage proxy path (found %s parts): %s', len(parts), without_prefix)
|
||||
abort(401)
|
||||
|
||||
encoded_token, scheme, host, uri = parts
|
||||
token = urllib.unquote(encoded_token)
|
||||
|
||||
logger.debug('Got token %s for storage proxy auth request %s with parts %s', token,
|
||||
original_uri, parts)
|
||||
|
||||
# Decode the bearer token.
|
||||
try:
|
||||
decoded = decode_bearer_token(token, self.instance_keys)
|
||||
except InvalidBearerTokenException:
|
||||
logger.exception('Invalid token for storage proxy')
|
||||
abort(401)
|
||||
|
||||
# Ensure it is for the proxy.
|
||||
if decoded['sub'] != STORAGE_PROXY_SUBJECT:
|
||||
logger.exception('Invalid subject %s for storage proxy auth', decoded['subject'])
|
||||
abort(401)
|
||||
|
||||
# Validate that the access matches the token format.
|
||||
access = decoded.get('access', {})
|
||||
try:
|
||||
validate(access, ACCESS_SCHEMA)
|
||||
except ValidationError:
|
||||
logger.exception('We should not be minting invalid credentials: %s', access)
|
||||
abort(401)
|
||||
|
||||
# For now, we only expect a single access credential.
|
||||
if len(access) != 1:
|
||||
logger.exception('We should not be minting invalid credentials: %s', access)
|
||||
abort(401)
|
||||
|
||||
# Ensure the signed access matches the requested URL's pieces.
|
||||
granted_access = access[0]
|
||||
if granted_access['scheme'] != scheme:
|
||||
logger.exception('Mismatch in scheme. %s expected, %s found', granted_access['scheme'],
|
||||
scheme)
|
||||
abort(401)
|
||||
|
||||
if granted_access['host'] != host:
|
||||
logger.exception('Mismatch in host. %s expected, %s found', granted_access['host'], host)
|
||||
abort(401)
|
||||
|
||||
if granted_access['uri'] != uri:
|
||||
logger.exception('Mismatch in uri. %s expected, %s found', granted_access['uri'], uri)
|
||||
abort(401)
|
||||
|
||||
return 'OK'
|
|
@ -18,7 +18,7 @@ class FakeStorage(BaseStorageV2):
|
|||
def get_direct_download_url(self, path, expires_in=60, requires_cors=False, head=False):
|
||||
try:
|
||||
if self.get_content('supports_direct_download') == 'true':
|
||||
return 'http://somefakeurl'
|
||||
return 'http://somefakeurl?goes=here'
|
||||
except:
|
||||
pass
|
||||
|
||||
|
|
Reference in a new issue