Add CloudFrontedS3Storage, which redirects to CloudFront for non-S3 ips

This commit is contained in:
Joseph Schorr 2017-09-26 16:08:50 -04:00
parent 2d522764f7
commit 010dda2c52
14 changed files with 175 additions and 69 deletions

View file

@ -1,5 +1,5 @@
from storage.local import LocalStorage
from storage.cloud import S3Storage, GoogleCloudStorage, RadosGWStorage
from storage.cloud import S3Storage, GoogleCloudStorage, RadosGWStorage, CloudFrontedS3Storage
from storage.fakestorage import FakeStorage
from storage.distributedstorage import DistributedStorage
from storage.swift import SwiftStorage
@ -11,39 +11,44 @@ STORAGE_DRIVER_CLASSES = {
'GoogleCloudStorage': GoogleCloudStorage,
'RadosGWStorage': RadosGWStorage,
'SwiftStorage': SwiftStorage,
'CloudFrontedS3Storage': CloudFrontedS3Storage,
}
def get_storage_driver(location, metric_queue, chunk_cleanup_queue, storage_params):
def get_storage_driver(location, metric_queue, chunk_cleanup_queue, config_provider, ip_resolver, storage_params):
""" Returns a storage driver class for the given storage configuration
(a pair of string name and a dict of parameters). """
driver = storage_params[0]
parameters = storage_params[1]
driver_class = STORAGE_DRIVER_CLASSES.get(driver, FakeStorage)
context = StorageContext(location, metric_queue, chunk_cleanup_queue)
context = StorageContext(location, metric_queue, chunk_cleanup_queue, config_provider, ip_resolver)
return driver_class(context, **parameters)
class StorageContext(object):
def __init__(self, location, metric_queue, chunk_cleanup_queue):
def __init__(self, location, metric_queue, chunk_cleanup_queue, config_provider, ip_resolver):
self.location = location
self.metric_queue = metric_queue
self.chunk_cleanup_queue = chunk_cleanup_queue
self.config_provider = config_provider
self.ip_resolver = ip_resolver
class Storage(object):
def __init__(self, app=None, metric_queue=None, chunk_cleanup_queue=None, instance_keys=None):
def __init__(self, app=None, metric_queue=None, chunk_cleanup_queue=None, instance_keys=None,
config_provider=None, ip_resolver=None):
self.app = app
if app is not None:
self.state = self.init_app(app, metric_queue, chunk_cleanup_queue, instance_keys)
self.state = self.init_app(app, metric_queue, chunk_cleanup_queue, instance_keys,
config_provider, ip_resolver)
else:
self.state = None
def init_app(self, app, metric_queue, chunk_cleanup_queue, instance_keys):
def init_app(self, app, metric_queue, chunk_cleanup_queue, instance_keys, config_provider, ip_resolver):
storages = {}
for location, storage_params in app.config.get('DISTRIBUTED_STORAGE_CONFIG').items():
storages[location] = get_storage_driver(location, metric_queue, chunk_cleanup_queue,
storage_params)
config_provider, ip_resolver, storage_params)
preference = app.config.get('DISTRIBUTED_STORAGE_PREFERENCE', None)
if not preference:

View file

@ -49,7 +49,7 @@ class BaseStorage(StoragePaths):
if not self.exists('_verify'):
raise Exception('Could not find verification file')
def get_direct_download_url(self, path, expires_in=60, requires_cors=False, head=False):
def get_direct_download_url(self, path, request_ip=None, expires_in=60, requires_cors=False, head=False):
return None
def get_direct_upload_url(self, path, mime_type, requires_cors=True):

View file

@ -3,8 +3,17 @@ import os
import logging
import copy
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.primitives import serialization
from cryptography.hazmat.primitives.asymmetric import padding
from cachetools import lru_cache
from itertools import chain
from datetime import datetime, timedelta
from botocore.signers import CloudFrontSigner
from boto.exception import S3ResponseError
import boto.s3.connection
import boto.s3.multipart
@ -590,3 +599,58 @@ class RadosGWStorage(_CloudStorage):
# See https://github.com/ceph/ceph/pull/5139
chunk_list = self._chunk_list_from_metadata(storage_metadata)
self._client_side_chunk_join(final_path, chunk_list)
class CloudFrontedS3Storage(S3Storage):
""" An S3Storage engine that redirects to CloudFront for all requests outside of AWS. """
def __init__(self, context, cloudfront_distribution_domain, cloudfront_key_id,
cloudfront_privatekey_filename, storage_path, s3_bucket, *args, **kwargs):
super(CloudFrontedS3Storage, self).__init__(context, storage_path, s3_bucket, *args, **kwargs)
self.cloudfront_distribution_domain = cloudfront_distribution_domain
self.cloudfront_key_id = cloudfront_key_id
self.cloudfront_privatekey = self._load_private_key(cloudfront_privatekey_filename)
def get_direct_download_url(self, path, request_ip=None, expires_in=60, requires_cors=False, head=False):
logger.debug('Got direct download request for path "%s" with IP "%s"', path, request_ip)
if request_ip is not None:
# Lookup the IP address in our resolution table and determine whether it is under AWS. If it is *not*,
# then return a CloudFront signed URL.
resolved_ip_info = self._context.ip_resolver.resolve_ip(request_ip)
logger.debug('Resolved IP information for IP %s: %s', request_ip, resolved_ip_info)
if resolved_ip_info and resolved_ip_info.provider != 'aws':
url = 'https://%s/%s' % (self.cloudfront_distribution_domain, path)
expire_date = datetime.now() + timedelta(seconds=expires_in)
signer = self._get_cloudfront_signer()
signed_url = signer.generate_presigned_url(url, date_less_than=expire_date)
logger.debug('Returning CloudFront URL for path "%s" with IP "%s": %s', path, resolved_ip_info, signed_url)
return signed_url
return super(CloudFrontedS3Storage, self).get_direct_download_url(path, request_ip, expires_in, requires_cors,
head)
@lru_cache(maxsize=1)
def _get_cloudfront_signer(self):
return CloudFrontSigner(self.cloudfront_key_id, self._get_rsa_signer())
@lru_cache(maxsize=1)
def _get_rsa_signer(self):
private_key = self.cloudfront_privatekey
def handler(message):
signer = private_key.signer(padding.PKCS1v15(), hashes.SHA1())
signer.update(message)
return signer.finalize()
return handler
@lru_cache(maxsize=1)
def _load_private_key(self, cloudfront_privatekey_filename):
""" Returns the private key, loaded from the config provider, used to sign direct
download URLs to CloudFront.
"""
with self._context.config_provider.get_volume_file(cloudfront_privatekey_filename) as key_file:
return serialization.load_pem_private_key(
key_file.read(),
password=None,
backend=default_backend()
)

View file

@ -0,0 +1,56 @@
import pytest
from httmock import urlmatch, HTTMock
from moto import mock_s3
import boto
from app import config_provider
from storage import CloudFrontedS3Storage, StorageContext
from util.ipresolver import IPResolver
from util.ipresolver.test.test_ipresolver import http_client, test_aws_ip, aws_ip_range_handler
from test.fixtures import *
_TEST_CONTENT = os.urandom(1024)
_TEST_BUCKET = 'some_bucket'
_TEST_USER = 'someuser'
_TEST_PASSWORD = 'somepassword'
_TEST_PATH = 'some/cool/path'
@pytest.fixture(params=[True, False])
def ipranges_populated(request):
return request.param
@pytest.fixture()
def ipresolver(http_client, aws_ip_range_handler, ipranges_populated, app):
with HTTMock(aws_ip_range_handler):
ipresolver = IPResolver(app, client=http_client)
if ipranges_populated:
assert ipresolver._update_aws_ip_range()
return ipresolver
@pytest.fixture()
def storage_context(ipresolver, app):
return StorageContext('nyc', None, None, config_provider, ipresolver)
@mock_s3
def test_direct_download(storage_context, test_aws_ip, ipranges_populated, app):
# Create a test bucket and put some test content.
boto.connect_s3().create_bucket(_TEST_BUCKET)
engine = CloudFrontedS3Storage(storage_context, 'cloudfrontdomain', 'keyid', 'test/data/test.pem', 'some/path',
_TEST_BUCKET, _TEST_USER, _TEST_PASSWORD)
engine.put_content(_TEST_PATH, _TEST_CONTENT)
assert engine.exists(_TEST_PATH)
# Request a direct download URL for a request from a known AWS IP, and ensure we are returned an S3 URL.
assert 's3.amazonaws.com' in engine.get_direct_download_url(_TEST_PATH, test_aws_ip)
if ipranges_populated:
# Request a direct download URL for a request from a non-AWS IP, and ensure we are returned a CloudFront URL.
assert 'cloudfrontdomain' in engine.get_direct_download_url(_TEST_PATH, '1.2.3.4')
else:
# Request a direct download URL for a request from a non-AWS IP, but since IP Ranges isn't populated, we still
# get back an S3 URL.
assert 's3.amazonaws.com' in engine.get_direct_download_url(_TEST_PATH, '1.2.3.4')

View file

@ -9,7 +9,7 @@ from storage import StorageContext
from storage.swift import SwiftStorage
base_args = {
'context': StorageContext('nyc', None, None),
'context': StorageContext('nyc', None, None, None, None),
'swift_container': 'container-name',
'storage_path': '/basepath',
'auth_url': 'https://auth.com',
@ -191,7 +191,7 @@ def test_cancel_chunked_upload():
def test_empty_chunks_queued_for_deletion():
chunk_cleanup_queue = FakeQueue()
args = dict(base_args)
args['context'] = StorageContext('nyc', None, chunk_cleanup_queue)
args['context'] = StorageContext('nyc', None, chunk_cleanup_queue, None, None)
swift = FakeSwiftStorage(**args)
uuid, metadata = swift.initiate_chunked_upload()