initial import for Open Source 🎉
This commit is contained in:
parent
1898c361f3
commit
9c0dd3b722
2048 changed files with 218743 additions and 0 deletions
82
storage/__init__.py
Normal file
82
storage/__init__.py
Normal file
|
@ -0,0 +1,82 @@
|
|||
from storage.local import LocalStorage
|
||||
from storage.cloud import (S3Storage, GoogleCloudStorage, RadosGWStorage, CloudFrontedS3Storage,
|
||||
RHOCSStorage)
|
||||
from storage.fakestorage import FakeStorage
|
||||
from storage.distributedstorage import DistributedStorage
|
||||
from storage.swift import SwiftStorage
|
||||
from storage.azurestorage import AzureStorage
|
||||
from storage.downloadproxy import DownloadProxy
|
||||
from util.ipresolver import NoopIPResolver
|
||||
|
||||
TYPE_LOCAL_STORAGE = 'LocalStorage'
|
||||
|
||||
STORAGE_DRIVER_CLASSES = {
|
||||
'LocalStorage': LocalStorage,
|
||||
'S3Storage': S3Storage,
|
||||
'GoogleCloudStorage': GoogleCloudStorage,
|
||||
'RadosGWStorage': RadosGWStorage,
|
||||
'SwiftStorage': SwiftStorage,
|
||||
'CloudFrontedS3Storage': CloudFrontedS3Storage,
|
||||
'AzureStorage': AzureStorage,
|
||||
'RHOCSStorage': RHOCSStorage,
|
||||
}
|
||||
|
||||
|
||||
def get_storage_driver(location, metric_queue, chunk_cleanup_queue, config_provider, ip_resolver,
|
||||
storage_params):
|
||||
""" Returns a storage driver class for the given storage configuration
|
||||
(a pair of string name and a dict of parameters). """
|
||||
driver = storage_params[0]
|
||||
parameters = storage_params[1]
|
||||
driver_class = STORAGE_DRIVER_CLASSES.get(driver, FakeStorage)
|
||||
context = StorageContext(location, metric_queue, chunk_cleanup_queue, config_provider,
|
||||
ip_resolver)
|
||||
return driver_class(context, **parameters)
|
||||
|
||||
|
||||
class StorageContext(object):
|
||||
def __init__(self, location, metric_queue, chunk_cleanup_queue, config_provider, ip_resolver):
|
||||
self.location = location
|
||||
self.metric_queue = metric_queue
|
||||
self.chunk_cleanup_queue = chunk_cleanup_queue
|
||||
self.config_provider = config_provider
|
||||
self.ip_resolver = ip_resolver or NoopIPResolver()
|
||||
|
||||
|
||||
class Storage(object):
|
||||
def __init__(self, app=None, metric_queue=None, chunk_cleanup_queue=None, instance_keys=None,
|
||||
config_provider=None, ip_resolver=None):
|
||||
self.app = app
|
||||
if app is not None:
|
||||
self.state = self.init_app(app, metric_queue, chunk_cleanup_queue, instance_keys,
|
||||
config_provider, ip_resolver)
|
||||
else:
|
||||
self.state = None
|
||||
|
||||
def init_app(self, app, metric_queue, chunk_cleanup_queue, instance_keys, config_provider,
|
||||
ip_resolver):
|
||||
storages = {}
|
||||
for location, storage_params in app.config.get('DISTRIBUTED_STORAGE_CONFIG').items():
|
||||
storages[location] = get_storage_driver(location, metric_queue, chunk_cleanup_queue,
|
||||
config_provider, ip_resolver, storage_params)
|
||||
|
||||
preference = app.config.get('DISTRIBUTED_STORAGE_PREFERENCE', None)
|
||||
if not preference:
|
||||
preference = storages.keys()
|
||||
|
||||
default_locations = app.config.get('DISTRIBUTED_STORAGE_DEFAULT_LOCATIONS') or []
|
||||
|
||||
download_proxy = None
|
||||
if app.config.get('FEATURE_PROXY_STORAGE', False) and instance_keys is not None:
|
||||
download_proxy = DownloadProxy(app, instance_keys)
|
||||
|
||||
d_storage = DistributedStorage(storages, preference, default_locations, download_proxy,
|
||||
app.config.get('REGISTRY_STATE') == 'readonly')
|
||||
|
||||
# register extension with app
|
||||
app.extensions = getattr(app, 'extensions', {})
|
||||
app.extensions['storage'] = d_storage
|
||||
return d_storage
|
||||
|
||||
def __getattr__(self, name):
|
||||
return getattr(self.state, name, None)
|
326
storage/azurestorage.py
Normal file
326
storage/azurestorage.py
Normal file
|
@ -0,0 +1,326 @@
|
|||
""" Azure storage driver.
|
||||
|
||||
Based on: https://docs.microsoft.com/en-us/azure/storage/blobs/storage-python-how-to-use-blob-storage
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import io
|
||||
import uuid
|
||||
import copy
|
||||
import time
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from azure.common import AzureException
|
||||
from azure.storage.blob import BlockBlobService, ContentSettings, BlobBlock, ContainerPermissions
|
||||
from azure.storage.common.models import CorsRule
|
||||
|
||||
from storage.basestorage import BaseStorage
|
||||
from util.registry.filelike import LimitingStream, READ_UNTIL_END
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_COPY_POLL_SLEEP = 0.25 # seconds
|
||||
_MAX_COPY_POLL_COUNT = 120 # _COPY_POLL_SLEEPs => 120s
|
||||
_MAX_BLOCK_SIZE = 1024 * 1024 * 100 # 100MB
|
||||
_BLOCKS_KEY = 'blocks'
|
||||
_CONTENT_TYPE_KEY = 'content-type'
|
||||
|
||||
|
||||
class AzureStorage(BaseStorage):
|
||||
def __init__(self, context, azure_container, storage_path, azure_account_name,
|
||||
azure_account_key=None, sas_token=None, connection_string=None,
|
||||
is_emulated=False, socket_timeout=20, request_timeout=20):
|
||||
super(AzureStorage, self).__init__()
|
||||
self._context = context
|
||||
self._storage_path = storage_path.lstrip('/')
|
||||
|
||||
self._azure_account_name = azure_account_key
|
||||
self._azure_account_key = azure_account_key
|
||||
self._azure_sas_token = sas_token
|
||||
self._azure_container = azure_container
|
||||
self._azure_connection_string = connection_string
|
||||
self._request_timeout = request_timeout
|
||||
|
||||
self._blob_service = BlockBlobService(account_name=azure_account_name,
|
||||
account_key=azure_account_key,
|
||||
sas_token=sas_token,
|
||||
is_emulated=is_emulated,
|
||||
connection_string=connection_string,
|
||||
socket_timeout=socket_timeout)
|
||||
|
||||
def _blob_name_from_path(self, object_path):
|
||||
if '..' in object_path:
|
||||
raise Exception('Relative paths are not allowed; found %s' % object_path)
|
||||
|
||||
return os.path.join(self._storage_path, object_path).rstrip('/')
|
||||
|
||||
def _upload_blob_path_from_uuid(self, uuid):
|
||||
return self._blob_name_from_path(self._upload_blob_name_from_uuid(uuid))
|
||||
|
||||
def _upload_blob_name_from_uuid(self, uuid):
|
||||
return 'uploads/{0}'.format(uuid)
|
||||
|
||||
def get_direct_download_url(self, object_path, request_ip=None, expires_in=60,
|
||||
requires_cors=False, head=False):
|
||||
blob_name = self._blob_name_from_path(object_path)
|
||||
|
||||
try:
|
||||
sas_token = self._blob_service.generate_blob_shared_access_signature(
|
||||
self._azure_container,
|
||||
blob_name,
|
||||
ContainerPermissions.READ,
|
||||
datetime.utcnow() + timedelta(seconds=expires_in))
|
||||
|
||||
blob_url = self._blob_service.make_blob_url(self._azure_container, blob_name,
|
||||
sas_token=sas_token)
|
||||
except AzureException:
|
||||
logger.exception('Exception when trying to get direct download for path %s', object_path)
|
||||
raise IOError('Exception when trying to get direct download')
|
||||
|
||||
return blob_url
|
||||
|
||||
def validate(self, client):
|
||||
super(AzureStorage, self).validate(client)
|
||||
self._blob_service.get_container_properties(self._azure_container,
|
||||
timeout=self._request_timeout)
|
||||
|
||||
def get_content(self, path):
|
||||
blob_name = self._blob_name_from_path(path)
|
||||
try:
|
||||
blob = self._blob_service.get_blob_to_bytes(self._azure_container, blob_name)
|
||||
except AzureException:
|
||||
logger.exception('Exception when trying to get path %s', path)
|
||||
raise IOError('Exception when trying to get path')
|
||||
|
||||
return blob.content
|
||||
|
||||
def put_content(self, path, content):
|
||||
blob_name = self._blob_name_from_path(path)
|
||||
try:
|
||||
self._blob_service.create_blob_from_bytes(self._azure_container, blob_name, content)
|
||||
except AzureException:
|
||||
logger.exception('Exception when trying to put path %s', path)
|
||||
raise IOError('Exception when trying to put path')
|
||||
|
||||
def stream_read(self, path):
|
||||
with self.stream_read_file(path) as f:
|
||||
while True:
|
||||
buf = f.read(self.buffer_size)
|
||||
if not buf:
|
||||
break
|
||||
yield buf
|
||||
|
||||
def stream_read_file(self, path):
|
||||
blob_name = self._blob_name_from_path(path)
|
||||
|
||||
try:
|
||||
output_stream = io.BytesIO()
|
||||
self._blob_service.get_blob_to_stream(self._azure_container, blob_name, output_stream)
|
||||
output_stream.seek(0)
|
||||
except AzureException:
|
||||
logger.exception('Exception when trying to stream_file_read path %s', path)
|
||||
raise IOError('Exception when trying to stream_file_read path')
|
||||
|
||||
return output_stream
|
||||
|
||||
def stream_write(self, path, fp, content_type=None, content_encoding=None):
|
||||
blob_name = self._blob_name_from_path(path)
|
||||
content_settings = ContentSettings(
|
||||
content_type=content_type,
|
||||
content_encoding=content_encoding,
|
||||
)
|
||||
|
||||
try:
|
||||
self._blob_service.create_blob_from_stream(self._azure_container, blob_name, fp,
|
||||
content_settings=content_settings)
|
||||
except AzureException:
|
||||
logger.exception('Exception when trying to stream_write path %s', path)
|
||||
raise IOError('Exception when trying to stream_write path')
|
||||
|
||||
def exists(self, path):
|
||||
blob_name = self._blob_name_from_path(path)
|
||||
try:
|
||||
return self._blob_service.exists(self._azure_container, blob_name,
|
||||
timeout=self._request_timeout)
|
||||
except AzureException:
|
||||
logger.exception('Exception when trying to check exists path %s', path)
|
||||
raise IOError('Exception when trying to check exists path')
|
||||
|
||||
def remove(self, path):
|
||||
blob_name = self._blob_name_from_path(path)
|
||||
try:
|
||||
self._blob_service.delete_blob(self._azure_container, blob_name)
|
||||
except AzureException:
|
||||
logger.exception('Exception when trying to remove path %s', path)
|
||||
raise IOError('Exception when trying to remove path')
|
||||
|
||||
def get_checksum(self, path):
|
||||
blob_name = self._blob_name_from_path(path)
|
||||
try:
|
||||
blob = self._blob_service.get_blob_properties(self._azure_container, blob_name)
|
||||
except AzureException:
|
||||
logger.exception('Exception when trying to get_checksum for path %s', path)
|
||||
raise IOError('Exception when trying to get_checksum path')
|
||||
return blob.properties.etag
|
||||
|
||||
def initiate_chunked_upload(self):
|
||||
random_uuid = str(uuid.uuid4())
|
||||
metadata = {
|
||||
_BLOCKS_KEY: [],
|
||||
_CONTENT_TYPE_KEY: None,
|
||||
}
|
||||
return random_uuid, metadata
|
||||
|
||||
def stream_upload_chunk(self, uuid, offset, length, in_fp, storage_metadata, content_type=None):
|
||||
if length == 0:
|
||||
return 0, storage_metadata, None
|
||||
|
||||
upload_blob_path = self._upload_blob_path_from_uuid(uuid)
|
||||
new_metadata = copy.deepcopy(storage_metadata)
|
||||
|
||||
total_bytes_written = 0
|
||||
|
||||
while True:
|
||||
current_length = length - total_bytes_written
|
||||
max_length = (min(current_length, _MAX_BLOCK_SIZE) if length != READ_UNTIL_END
|
||||
else _MAX_BLOCK_SIZE)
|
||||
if max_length <= 0:
|
||||
break
|
||||
|
||||
limited = LimitingStream(in_fp, max_length, seekable=False)
|
||||
|
||||
# Note: Azure fails if a zero-length block is uploaded, so we read all the data here,
|
||||
# and, if there is none, terminate early.
|
||||
block_data = b''
|
||||
for chunk in iter(lambda: limited.read(4096), b""):
|
||||
block_data += chunk
|
||||
|
||||
if len(block_data) == 0:
|
||||
break
|
||||
|
||||
block_index = len(new_metadata[_BLOCKS_KEY])
|
||||
block_id = format(block_index, '05')
|
||||
new_metadata[_BLOCKS_KEY].append(block_id)
|
||||
|
||||
try:
|
||||
self._blob_service.put_block(self._azure_container, upload_blob_path, block_data, block_id,
|
||||
validate_content=True)
|
||||
except AzureException as ae:
|
||||
logger.exception('Exception when trying to stream_upload_chunk block %s for %s', block_id,
|
||||
uuid)
|
||||
return total_bytes_written, new_metadata, ae
|
||||
|
||||
bytes_written = len(block_data)
|
||||
total_bytes_written += bytes_written
|
||||
if bytes_written == 0 or bytes_written < max_length:
|
||||
break
|
||||
|
||||
if content_type is not None:
|
||||
new_metadata[_CONTENT_TYPE_KEY] = content_type
|
||||
|
||||
return total_bytes_written, new_metadata, None
|
||||
|
||||
def complete_chunked_upload(self, uuid, final_path, storage_metadata):
|
||||
""" Complete the chunked upload and store the final results in the path indicated.
|
||||
Returns nothing.
|
||||
"""
|
||||
# Commit the blob's blocks.
|
||||
upload_blob_path = self._upload_blob_path_from_uuid(uuid)
|
||||
block_list = [BlobBlock(block_id) for block_id in storage_metadata[_BLOCKS_KEY]]
|
||||
|
||||
try:
|
||||
self._blob_service.put_block_list(self._azure_container, upload_blob_path, block_list)
|
||||
except AzureException:
|
||||
logger.exception('Exception when trying to put block list for path %s from upload %s',
|
||||
final_path, uuid)
|
||||
raise IOError('Exception when trying to put block list')
|
||||
|
||||
# Set the content type on the blob if applicable.
|
||||
if storage_metadata[_CONTENT_TYPE_KEY] is not None:
|
||||
content_settings = ContentSettings(content_type=storage_metadata[_CONTENT_TYPE_KEY])
|
||||
try:
|
||||
self._blob_service.set_blob_properties(self._azure_container, upload_blob_path,
|
||||
content_settings=content_settings)
|
||||
except AzureException:
|
||||
logger.exception('Exception when trying to set blob properties for path %s', final_path)
|
||||
raise IOError('Exception when trying to set blob properties')
|
||||
|
||||
# Copy the blob to its final location.
|
||||
upload_blob_name = self._upload_blob_name_from_uuid(uuid)
|
||||
copy_source_url = self.get_direct_download_url(upload_blob_name, expires_in=300)
|
||||
|
||||
try:
|
||||
blob_name = self._blob_name_from_path(final_path)
|
||||
copy_prop = self._blob_service.copy_blob(self._azure_container, blob_name,
|
||||
copy_source_url)
|
||||
except AzureException:
|
||||
logger.exception('Exception when trying to set copy uploaded blob %s to path %s', uuid,
|
||||
final_path)
|
||||
raise IOError('Exception when trying to copy uploaded blob')
|
||||
|
||||
self._await_copy(self._azure_container, blob_name, copy_prop)
|
||||
|
||||
# Delete the original blob.
|
||||
logger.debug('Deleting chunked upload %s at path %s', uuid, upload_blob_path)
|
||||
try:
|
||||
self._blob_service.delete_blob(self._azure_container, upload_blob_path)
|
||||
except AzureException:
|
||||
logger.exception('Exception when trying to set delete uploaded blob %s', uuid)
|
||||
raise IOError('Exception when trying to delete uploaded blob')
|
||||
|
||||
def cancel_chunked_upload(self, uuid, storage_metadata):
|
||||
""" Cancel the chunked upload and clean up any outstanding partially uploaded data.
|
||||
Returns nothing.
|
||||
"""
|
||||
upload_blob_path = self._upload_blob_path_from_uuid(uuid)
|
||||
logger.debug('Canceling chunked upload %s at path %s', uuid, upload_blob_path)
|
||||
self._blob_service.delete_blob(self._azure_container, upload_blob_path)
|
||||
|
||||
def _await_copy(self, container, blob_name, copy_prop):
|
||||
# Poll for copy completion.
|
||||
count = 0
|
||||
while copy_prop.status == 'pending':
|
||||
props = self._blob_service.get_blob_properties(container, blob_name)
|
||||
copy_prop = props.properties.copy
|
||||
|
||||
if copy_prop.status == 'success':
|
||||
return
|
||||
|
||||
if copy_prop.status == 'failed' or copy_prop.status == 'aborted':
|
||||
raise IOError('Copy of blob %s failed with status %s' % (blob_name, copy_prop.status))
|
||||
|
||||
count = count + 1
|
||||
if count > _MAX_COPY_POLL_COUNT:
|
||||
raise IOError('Timed out waiting for copy to complete')
|
||||
|
||||
time.sleep(_COPY_POLL_SLEEP)
|
||||
|
||||
def copy_to(self, destination, path):
|
||||
if (self.__class__ == destination.__class__):
|
||||
logger.debug('Starting copying file from Azure %s to Azure %s via an Azure copy',
|
||||
self._azure_container, destination)
|
||||
blob_name = self._blob_name_from_path(path)
|
||||
copy_source_url = self.get_direct_download_url(path)
|
||||
copy_prop = self._blob_service.copy_blob(destination._azure_container, blob_name,
|
||||
copy_source_url)
|
||||
self._await_copy(destination._azure_container, blob_name, copy_prop)
|
||||
logger.debug('Finished copying file from Azure %s to Azure %s via an Azure copy',
|
||||
self._azure_container, destination)
|
||||
return
|
||||
|
||||
# Fallback to a slower, default copy.
|
||||
logger.debug('Copying file from Azure container %s to %s via a streamed copy',
|
||||
self._azure_container, destination)
|
||||
with self.stream_read_file(path) as fp:
|
||||
destination.stream_write(path, fp)
|
||||
|
||||
def setup(self):
|
||||
# From: https://docs.microsoft.com/en-us/rest/api/storageservices/cross-origin-resource-sharing--cors--support-for-the-azure-storage-services
|
||||
cors = [CorsRule(allowed_origins='*', allowed_methods=['GET', 'PUT'], max_age_in_seconds=3000,
|
||||
exposed_headers=['x-ms-meta-*'],
|
||||
allowed_headers=['x-ms-meta-data*', 'x-ms-meta-target*', 'x-ms-meta-abc',
|
||||
'Content-Type'])]
|
||||
|
||||
self._blob_service.set_blob_service_properties(cors=cors)
|
131
storage/basestorage.py
Normal file
131
storage/basestorage.py
Normal file
|
@ -0,0 +1,131 @@
|
|||
import logging
|
||||
import tempfile
|
||||
|
||||
from digest.digest_tools import content_path
|
||||
from util.registry.filelike import READ_UNTIL_END
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class StoragePaths(object):
|
||||
shared_images = 'sharedimages'
|
||||
|
||||
@staticmethod
|
||||
def temp_store_handler():
|
||||
tmpf = tempfile.TemporaryFile()
|
||||
|
||||
def fn(buf):
|
||||
try:
|
||||
tmpf.write(buf)
|
||||
except IOError:
|
||||
pass
|
||||
|
||||
return tmpf, fn
|
||||
|
||||
def _image_path(self, storage_uuid):
|
||||
return '{0}/{1}/'.format(self.shared_images, storage_uuid)
|
||||
|
||||
def v1_image_layer_path(self, storage_uuid):
|
||||
base_path = self._image_path(storage_uuid)
|
||||
return '{0}layer'.format(base_path)
|
||||
|
||||
def blob_path(self, digest_str):
|
||||
return content_path(digest_str)
|
||||
|
||||
|
||||
class BaseStorage(StoragePaths):
|
||||
def __init__(self):
|
||||
# Set the IO buffer to 64kB
|
||||
self.buffer_size = 64 * 1024
|
||||
|
||||
def setup(self):
|
||||
""" Called to perform any storage system setup. """
|
||||
pass
|
||||
|
||||
def validate(self, client):
|
||||
""" Called to perform storage system validation. The client is an HTTP
|
||||
client to use for any external calls. """
|
||||
# Put a temporary file to make sure the normal storage paths work.
|
||||
self.put_content('_verify', 'testing 123')
|
||||
if not self.exists('_verify'):
|
||||
raise Exception('Could not find verification file')
|
||||
|
||||
def get_direct_download_url(self, path, request_ip=None, expires_in=60, requires_cors=False, head=False):
|
||||
return None
|
||||
|
||||
def get_direct_upload_url(self, path, mime_type, requires_cors=True):
|
||||
return None
|
||||
|
||||
def get_supports_resumable_downloads(self):
|
||||
return False
|
||||
|
||||
def get_content(self, path):
|
||||
raise NotImplementedError
|
||||
|
||||
def put_content(self, path, content):
|
||||
raise NotImplementedError
|
||||
|
||||
def stream_read(self, path):
|
||||
raise NotImplementedError
|
||||
|
||||
def stream_read_file(self, path):
|
||||
raise NotImplementedError
|
||||
|
||||
def stream_write(self, path, fp, content_type=None, content_encoding=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def exists(self, path):
|
||||
raise NotImplementedError
|
||||
|
||||
def remove(self, path):
|
||||
raise NotImplementedError
|
||||
|
||||
def get_checksum(self, path):
|
||||
raise NotImplementedError
|
||||
|
||||
def stream_write_to_fp(self, in_fp, out_fp, num_bytes=READ_UNTIL_END):
|
||||
""" Copy the specified number of bytes from the input file stream to the output stream. If
|
||||
num_bytes < 0 copy until the stream ends. Returns the number of bytes copied.
|
||||
"""
|
||||
bytes_copied = 0
|
||||
while bytes_copied < num_bytes or num_bytes == READ_UNTIL_END:
|
||||
size_to_read = min(num_bytes - bytes_copied, self.buffer_size)
|
||||
if size_to_read < 0:
|
||||
size_to_read = self.buffer_size
|
||||
|
||||
buf = in_fp.read(size_to_read)
|
||||
if not buf:
|
||||
break
|
||||
out_fp.write(buf)
|
||||
bytes_copied += len(buf)
|
||||
|
||||
return bytes_copied
|
||||
|
||||
def copy_to(self, destination, path):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class BaseStorageV2(BaseStorage):
|
||||
def initiate_chunked_upload(self):
|
||||
""" Start a new chunked upload, returning the uuid and any associated storage metadata
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def stream_upload_chunk(self, uuid, offset, length, in_fp, storage_metadata, content_type=None):
|
||||
""" Upload the specified amount of data from the given file pointer to the chunked destination
|
||||
specified, starting at the given offset. Returns the number of bytes uploaded, a new
|
||||
version of the storage_metadata and an error object (if one occurred or None if none).
|
||||
Pass length as -1 to upload as much data from the in_fp as possible.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def complete_chunked_upload(self, uuid, final_path, storage_metadata):
|
||||
""" Complete the chunked upload and store the final results in the path indicated.
|
||||
Returns nothing.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def cancel_chunked_upload(self, uuid, storage_metadata):
|
||||
""" Cancel the chunked upload and clean up any outstanding partially uploaded data.
|
||||
Returns nothing.
|
||||
"""
|
||||
raise NotImplementedError
|
707
storage/cloud.py
Normal file
707
storage/cloud.py
Normal file
|
@ -0,0 +1,707 @@
|
|||
import cStringIO as StringIO
|
||||
import os
|
||||
import logging
|
||||
import copy
|
||||
|
||||
from cryptography.hazmat.backends import default_backend
|
||||
from cryptography.hazmat.primitives import hashes
|
||||
from cryptography.hazmat.primitives import serialization
|
||||
from cryptography.hazmat.primitives.asymmetric import padding
|
||||
|
||||
from cachetools.func import lru_cache
|
||||
from itertools import chain
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from botocore.signers import CloudFrontSigner
|
||||
from boto.exception import S3ResponseError
|
||||
import boto.s3.connection
|
||||
import boto.s3.multipart
|
||||
import boto.gs.connection
|
||||
import boto.s3.key
|
||||
import boto.gs.key
|
||||
|
||||
from io import BufferedIOBase
|
||||
from uuid import uuid4
|
||||
from collections import namedtuple
|
||||
|
||||
from util.registry import filelike
|
||||
from storage.basestorage import BaseStorageV2
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_PartUploadMetadata = namedtuple('_PartUploadMetadata', ['path', 'offset', 'length'])
|
||||
_CHUNKS_KEY = 'chunks'
|
||||
|
||||
|
||||
class StreamReadKeyAsFile(BufferedIOBase):
|
||||
def __init__(self, key):
|
||||
self._key = key
|
||||
|
||||
def read(self, amt=None):
|
||||
if self.closed:
|
||||
return None
|
||||
|
||||
resp = self._key.read(amt)
|
||||
return resp
|
||||
|
||||
def readable(self):
|
||||
return True
|
||||
|
||||
@property
|
||||
def closed(self):
|
||||
return self._key.closed
|
||||
|
||||
def close(self):
|
||||
self._key.close(fast=True)
|
||||
|
||||
|
||||
class _CloudStorage(BaseStorageV2):
|
||||
def __init__(self, context, connection_class, key_class, connect_kwargs, upload_params,
|
||||
storage_path, bucket_name, access_key=None, secret_key=None):
|
||||
super(_CloudStorage, self).__init__()
|
||||
|
||||
self.minimum_chunk_size = 5 * 1024 * 1024
|
||||
self.maximum_chunk_size = None
|
||||
|
||||
self._initialized = False
|
||||
self._bucket_name = bucket_name
|
||||
self._access_key = access_key
|
||||
self._secret_key = secret_key
|
||||
self._root_path = storage_path
|
||||
self._connection_class = connection_class
|
||||
self._key_class = key_class
|
||||
self._upload_params = upload_params
|
||||
self._connect_kwargs = connect_kwargs
|
||||
self._cloud_conn = None
|
||||
self._cloud_bucket = None
|
||||
self._context = context
|
||||
|
||||
def _initialize_cloud_conn(self):
|
||||
if not self._initialized:
|
||||
self._cloud_conn = self._connection_class(self._access_key, self._secret_key,
|
||||
**self._connect_kwargs)
|
||||
self._cloud_bucket = self._cloud_conn.get_bucket(self._bucket_name, validate=False)
|
||||
self._initialized = True
|
||||
|
||||
def _debug_key(self, key):
|
||||
"""Used for debugging only."""
|
||||
orig_meth = key.bucket.connection.make_request
|
||||
|
||||
def new_meth(*args, **kwargs):
|
||||
print '#' * 16
|
||||
print args
|
||||
print kwargs
|
||||
print '#' * 16
|
||||
return orig_meth(*args, **kwargs)
|
||||
key.bucket.connection.make_request = new_meth
|
||||
|
||||
def _init_path(self, path=None):
|
||||
path = os.path.join(self._root_path, path) if path else self._root_path
|
||||
if path and path[0] == '/':
|
||||
return path[1:]
|
||||
return path
|
||||
|
||||
def get_cloud_conn(self):
|
||||
self._initialize_cloud_conn()
|
||||
return self._cloud_conn
|
||||
|
||||
def get_cloud_bucket(self):
|
||||
return self._cloud_bucket
|
||||
|
||||
def get_content(self, path):
|
||||
self._initialize_cloud_conn()
|
||||
path = self._init_path(path)
|
||||
key = self._key_class(self._cloud_bucket, path)
|
||||
try:
|
||||
return key.get_contents_as_string()
|
||||
except S3ResponseError as s3r:
|
||||
# Raise an IOError in case the key was not found, to maintain the current
|
||||
# interface.
|
||||
if s3r.error_code == 'NoSuchKey':
|
||||
raise IOError('No such key: \'{0}\''.format(path))
|
||||
|
||||
raise
|
||||
|
||||
def put_content(self, path, content):
|
||||
self._initialize_cloud_conn()
|
||||
path = self._init_path(path)
|
||||
key = self._key_class(self._cloud_bucket, path)
|
||||
key.set_contents_from_string(content, **self._upload_params)
|
||||
return path
|
||||
|
||||
def get_supports_resumable_downloads(self):
|
||||
return True
|
||||
|
||||
def get_direct_download_url(self, path, request_ip=None, expires_in=60, requires_cors=False, head=False):
|
||||
self._initialize_cloud_conn()
|
||||
path = self._init_path(path)
|
||||
k = self._key_class(self._cloud_bucket, path)
|
||||
if head:
|
||||
return k.generate_url(expires_in, 'HEAD')
|
||||
return k.generate_url(expires_in)
|
||||
|
||||
def get_direct_upload_url(self, path, mime_type, requires_cors=True):
|
||||
self._initialize_cloud_conn()
|
||||
path = self._init_path(path)
|
||||
key = self._key_class(self._cloud_bucket, path)
|
||||
url = key.generate_url(300, 'PUT', headers={'Content-Type': mime_type}, encrypt_key=True)
|
||||
return url
|
||||
|
||||
def stream_read(self, path):
|
||||
self._initialize_cloud_conn()
|
||||
path = self._init_path(path)
|
||||
key = self._key_class(self._cloud_bucket, path)
|
||||
if not key.exists():
|
||||
raise IOError('No such key: \'{0}\''.format(path))
|
||||
while True:
|
||||
buf = key.read(self.buffer_size)
|
||||
if not buf:
|
||||
break
|
||||
yield buf
|
||||
|
||||
def stream_read_file(self, path):
|
||||
self._initialize_cloud_conn()
|
||||
path = self._init_path(path)
|
||||
key = self._key_class(self._cloud_bucket, path)
|
||||
if not key.exists():
|
||||
raise IOError('No such key: \'{0}\''.format(path))
|
||||
return StreamReadKeyAsFile(key)
|
||||
|
||||
def __initiate_multipart_upload(self, path, content_type, content_encoding):
|
||||
# Minimum size of upload part size on S3 is 5MB
|
||||
self._initialize_cloud_conn()
|
||||
path = self._init_path(path)
|
||||
|
||||
metadata = {}
|
||||
if content_type is not None:
|
||||
metadata['Content-Type'] = content_type
|
||||
|
||||
if content_encoding is not None:
|
||||
metadata['Content-Encoding'] = content_encoding
|
||||
|
||||
if self._context.metric_queue is not None:
|
||||
self._context.metric_queue.multipart_upload_start.Inc()
|
||||
|
||||
return self._cloud_bucket.initiate_multipart_upload(path, metadata=metadata,
|
||||
**self._upload_params)
|
||||
|
||||
def stream_write(self, path, fp, content_type=None, content_encoding=None):
|
||||
""" Writes the data found in the file-like stream to the given path. Raises an IOError
|
||||
if the write fails.
|
||||
"""
|
||||
_, write_error = self._stream_write_internal(path, fp, content_type, content_encoding)
|
||||
if write_error is not None:
|
||||
logger.error('Error when trying to stream_write path `%s`: %s', path, write_error)
|
||||
raise IOError('Exception when trying to stream_write path')
|
||||
|
||||
def _stream_write_internal(self, path, fp, content_type=None, content_encoding=None,
|
||||
cancel_on_error=True, size=filelike.READ_UNTIL_END):
|
||||
""" Writes the data found in the file-like stream to the given path, with optional limit
|
||||
on size. Note that this method returns a *tuple* of (bytes_written, write_error) and should
|
||||
*not* raise an exception (such as IOError) if a problem uploading occurred. ALWAYS check
|
||||
the returned tuple on calls to this method.
|
||||
"""
|
||||
write_error = None
|
||||
|
||||
try:
|
||||
mp = self.__initiate_multipart_upload(path, content_type, content_encoding)
|
||||
except S3ResponseError as e:
|
||||
logger.exception('Exception when initiating multipart upload')
|
||||
return 0, e
|
||||
|
||||
# We are going to reuse this but be VERY careful to only read the number of bytes written to it
|
||||
buf = StringIO.StringIO()
|
||||
|
||||
num_part = 1
|
||||
total_bytes_written = 0
|
||||
while size == filelike.READ_UNTIL_END or total_bytes_written < size:
|
||||
bytes_to_copy = self.minimum_chunk_size
|
||||
if size != filelike.READ_UNTIL_END:
|
||||
# We never want to ask for more bytes than our caller has indicated to copy
|
||||
bytes_to_copy = min(bytes_to_copy, size - total_bytes_written)
|
||||
|
||||
buf.seek(0)
|
||||
try:
|
||||
# Stage the bytes into the buffer for use with the multipart upload file API
|
||||
bytes_staged = self.stream_write_to_fp(fp, buf, bytes_to_copy)
|
||||
if bytes_staged == 0:
|
||||
break
|
||||
|
||||
buf.seek(0)
|
||||
mp.upload_part_from_file(buf, num_part, size=bytes_staged)
|
||||
total_bytes_written += bytes_staged
|
||||
num_part += 1
|
||||
except (S3ResponseError, IOError) as e:
|
||||
logger.warn('Error when writing to stream in stream_write_internal at path %s: %s', path, e)
|
||||
write_error = e
|
||||
|
||||
if self._context.metric_queue is not None:
|
||||
self._context.metric_queue.multipart_upload_end.Inc(labelvalues=['failure'])
|
||||
|
||||
if cancel_on_error:
|
||||
try:
|
||||
mp.cancel_upload()
|
||||
except (S3ResponseError, IOError):
|
||||
logger.exception('Could not cancel upload')
|
||||
|
||||
return 0, write_error
|
||||
else:
|
||||
break
|
||||
|
||||
if total_bytes_written > 0:
|
||||
if self._context.metric_queue is not None:
|
||||
self._context.metric_queue.multipart_upload_end.Inc(labelvalues=['success'])
|
||||
|
||||
self._perform_action_with_retry(mp.complete_upload)
|
||||
|
||||
return total_bytes_written, write_error
|
||||
|
||||
def exists(self, path):
|
||||
self._initialize_cloud_conn()
|
||||
path = self._init_path(path)
|
||||
key = self._key_class(self._cloud_bucket, path)
|
||||
return key.exists()
|
||||
|
||||
def remove(self, path):
|
||||
self._initialize_cloud_conn()
|
||||
path = self._init_path(path)
|
||||
key = self._key_class(self._cloud_bucket, path)
|
||||
if key.exists():
|
||||
# It's a file
|
||||
key.delete()
|
||||
return
|
||||
# We assume it's a directory
|
||||
if not path.endswith('/'):
|
||||
path += '/'
|
||||
for key in self._cloud_bucket.list(prefix=path):
|
||||
key.delete()
|
||||
|
||||
def get_checksum(self, path):
|
||||
self._initialize_cloud_conn()
|
||||
path = self._init_path(path)
|
||||
key = self._key_class(self._cloud_bucket, path)
|
||||
k = self._cloud_bucket.lookup(key)
|
||||
if k is None:
|
||||
raise IOError('No such key: \'{0}\''.format(path))
|
||||
|
||||
return k.etag[1:-1][:7]
|
||||
|
||||
def copy_to(self, destination, path):
|
||||
""" Copies the given path from this storage to the destination storage. """
|
||||
self._initialize_cloud_conn()
|
||||
|
||||
# First try to copy directly via boto, but only if the storages are the
|
||||
# same type, with the same access information.
|
||||
if (self.__class__ == destination.__class__ and
|
||||
self._access_key and self._secret_key and
|
||||
self._access_key == destination._access_key and
|
||||
self._secret_key == destination._secret_key and
|
||||
self._connect_kwargs == destination._connect_kwargs):
|
||||
|
||||
# Initialize the cloud connection on the destination as well.
|
||||
destination._initialize_cloud_conn()
|
||||
|
||||
# Check the buckets for both the source and destination locations.
|
||||
if self._cloud_bucket is None:
|
||||
logger.error('Cloud bucket not found for location %s; Configuration is probably invalid!',
|
||||
self._bucket_name)
|
||||
return
|
||||
|
||||
if destination._cloud_bucket is None:
|
||||
logger.error('Cloud bucket not found for location %s; Configuration is probably invalid!',
|
||||
destination._bucket_name)
|
||||
return
|
||||
|
||||
# Perform the copy.
|
||||
logger.debug('Copying file from %s to %s via a direct boto copy', self._cloud_bucket,
|
||||
destination._cloud_bucket)
|
||||
|
||||
source_path = self._init_path(path)
|
||||
source_key = self._key_class(self._cloud_bucket, source_path)
|
||||
|
||||
dest_path = destination._init_path(path)
|
||||
source_key.copy(destination._cloud_bucket, dest_path)
|
||||
return
|
||||
|
||||
# Fallback to a slower, default copy.
|
||||
logger.debug('Copying file from %s to %s via a streamed copy', self._cloud_bucket,
|
||||
destination)
|
||||
with self.stream_read_file(path) as fp:
|
||||
destination.stream_write(path, fp)
|
||||
|
||||
def _rel_upload_path(self, uuid):
|
||||
return 'uploads/{0}'.format(uuid)
|
||||
|
||||
def initiate_chunked_upload(self):
|
||||
self._initialize_cloud_conn()
|
||||
random_uuid = str(uuid4())
|
||||
|
||||
metadata = {
|
||||
_CHUNKS_KEY: [],
|
||||
}
|
||||
|
||||
return random_uuid, metadata
|
||||
|
||||
def stream_upload_chunk(self, uuid, offset, length, in_fp, storage_metadata, content_type=None):
|
||||
self._initialize_cloud_conn()
|
||||
|
||||
# We are going to upload each chunk to a separate key
|
||||
chunk_path = self._rel_upload_path(str(uuid4()))
|
||||
bytes_written, write_error = self._stream_write_internal(chunk_path, in_fp,
|
||||
cancel_on_error=False, size=length,
|
||||
content_type=content_type)
|
||||
|
||||
new_metadata = copy.deepcopy(storage_metadata)
|
||||
|
||||
# We are only going to track keys to which data was confirmed written
|
||||
if bytes_written > 0:
|
||||
new_metadata[_CHUNKS_KEY].append(_PartUploadMetadata(chunk_path, offset, bytes_written))
|
||||
|
||||
return bytes_written, new_metadata, write_error
|
||||
|
||||
def _chunk_generator(self, chunk_list):
|
||||
for chunk in chunk_list:
|
||||
yield filelike.StreamSlice(self.stream_read_file(chunk.path), 0, chunk.length)
|
||||
|
||||
@staticmethod
|
||||
def _chunk_list_from_metadata(storage_metadata):
|
||||
return [_PartUploadMetadata(*chunk_args) for chunk_args in storage_metadata[_CHUNKS_KEY]]
|
||||
|
||||
def _client_side_chunk_join(self, final_path, chunk_list):
|
||||
# If there's only one chunk, just "move" (copy and delete) the key and call it a day.
|
||||
if len(chunk_list) == 1:
|
||||
chunk_path = self._init_path(chunk_list[0].path)
|
||||
abs_final_path = self._init_path(final_path)
|
||||
|
||||
# Let the copy raise an exception if it fails.
|
||||
self._cloud_bucket.copy_key(abs_final_path, self._bucket_name, chunk_path)
|
||||
|
||||
# Attempt to clean up the old chunk.
|
||||
try:
|
||||
self._cloud_bucket.delete_key(chunk_path)
|
||||
except IOError:
|
||||
# We failed to delete a chunk. This sucks, but we shouldn't fail the push.
|
||||
msg = 'Failed to clean up chunk %s for move of %s'
|
||||
logger.exception(msg, chunk_path, abs_final_path)
|
||||
else:
|
||||
# Concatenate and write all the chunks as one key.
|
||||
concatenated = filelike.FilelikeStreamConcat(self._chunk_generator(chunk_list))
|
||||
self.stream_write(final_path, concatenated)
|
||||
|
||||
# Attempt to clean up all the chunks.
|
||||
for chunk in chunk_list:
|
||||
try:
|
||||
self._cloud_bucket.delete_key(self._init_path(chunk.path))
|
||||
except IOError:
|
||||
# We failed to delete a chunk. This sucks, but we shouldn't fail the push.
|
||||
msg = 'Failed to clean up chunk %s for reupload of %s'
|
||||
logger.exception(msg, chunk.path, final_path)
|
||||
|
||||
@staticmethod
|
||||
def _perform_action_with_retry(action, *args, **kwargs):
|
||||
# Note: Sometimes Amazon S3 simply raises an internal error when trying to complete a
|
||||
# an action. The recommendation is to simply try calling the action again.
|
||||
for remaining_retries in range(2, -1, -1):
|
||||
try:
|
||||
action(*args, **kwargs)
|
||||
break
|
||||
except S3ResponseError as s3re:
|
||||
if remaining_retries and s3re.status == 200 and s3re.error_code == 'InternalError':
|
||||
# Weird internal error case. Retry.
|
||||
continue
|
||||
|
||||
# Otherwise, raise it.
|
||||
logger.exception('Exception trying to perform action %s', action)
|
||||
raise s3re
|
||||
|
||||
@staticmethod
|
||||
def _rechunk(chunk, max_chunk_size):
|
||||
""" Rechunks the chunk list to meet maximum chunk size restrictions for the storage engine. """
|
||||
if max_chunk_size is None or chunk.length <= max_chunk_size:
|
||||
yield chunk
|
||||
else:
|
||||
newchunk_length = chunk.length / 2
|
||||
first_subchunk = _PartUploadMetadata(chunk.path, chunk.offset, newchunk_length)
|
||||
second_subchunk = _PartUploadMetadata(chunk.path,
|
||||
chunk.offset + newchunk_length,
|
||||
chunk.length - newchunk_length)
|
||||
for subchunk in chain(_CloudStorage._rechunk(first_subchunk, max_chunk_size),
|
||||
_CloudStorage._rechunk(second_subchunk, max_chunk_size)):
|
||||
yield subchunk
|
||||
|
||||
|
||||
def complete_chunked_upload(self, uuid, final_path, storage_metadata, force_client_side=False):
|
||||
self._initialize_cloud_conn()
|
||||
chunk_list = self._chunk_list_from_metadata(storage_metadata)
|
||||
|
||||
# Here is where things get interesting: we are going to try to assemble this server side
|
||||
# In order to be a candidate all parts (after offsets have been computed) must be at least 5MB
|
||||
server_side_assembly = False
|
||||
if not force_client_side:
|
||||
server_side_assembly = True
|
||||
for chunk_offset, chunk in enumerate(chunk_list):
|
||||
# If the chunk is both too small, and not the last chunk, we rule out server side assembly
|
||||
if chunk.length < self.minimum_chunk_size and (chunk_offset + 1) < len(chunk_list):
|
||||
server_side_assembly = False
|
||||
break
|
||||
|
||||
if server_side_assembly:
|
||||
logger.debug('Performing server side assembly of multi-part upload for: %s', final_path)
|
||||
try:
|
||||
# Awesome, we can do this completely server side, now we have to start a new multipart
|
||||
# upload and use copy_part_from_key to set all of the chunks.
|
||||
mpu = self.__initiate_multipart_upload(final_path, content_type=None, content_encoding=None)
|
||||
updated_chunks = chain.from_iterable([_CloudStorage._rechunk(c, self.maximum_chunk_size)
|
||||
for c in chunk_list])
|
||||
|
||||
for index, chunk in enumerate(updated_chunks):
|
||||
abs_chunk_path = self._init_path(chunk.path)
|
||||
self._perform_action_with_retry(mpu.copy_part_from_key, self.get_cloud_bucket().name,
|
||||
abs_chunk_path, index + 1, start=chunk.offset,
|
||||
end=chunk.length + chunk.offset - 1)
|
||||
|
||||
self._perform_action_with_retry(mpu.complete_upload)
|
||||
except IOError as ioe:
|
||||
# Something bad happened, log it and then give up
|
||||
msg = 'Exception when attempting server-side assembly for: %s'
|
||||
logger.exception(msg, final_path)
|
||||
mpu.cancel_upload()
|
||||
raise ioe
|
||||
|
||||
else:
|
||||
# We are going to turn all of the server side objects into a single file-like stream, and
|
||||
# pass that to stream_write to chunk and upload the final object.
|
||||
self._client_side_chunk_join(final_path, chunk_list)
|
||||
|
||||
|
||||
def cancel_chunked_upload(self, uuid, storage_metadata):
|
||||
self._initialize_cloud_conn()
|
||||
|
||||
# We have to go through and delete all of the uploaded chunks
|
||||
for chunk in self._chunk_list_from_metadata(storage_metadata):
|
||||
self.remove(chunk.path)
|
||||
|
||||
|
||||
class S3Storage(_CloudStorage):
|
||||
def __init__(self, context, storage_path, s3_bucket, s3_access_key=None,
|
||||
s3_secret_key=None, host=None, port=None):
|
||||
upload_params = {
|
||||
'encrypt_key': True,
|
||||
}
|
||||
connect_kwargs = {}
|
||||
if host:
|
||||
if host.startswith('http:') or host.startswith('https:'):
|
||||
raise ValueError('host name must not start with http:// or https://')
|
||||
|
||||
connect_kwargs['host'] = host
|
||||
|
||||
if port:
|
||||
connect_kwargs['port'] = int(port)
|
||||
|
||||
super(S3Storage, self).__init__(context, boto.s3.connection.S3Connection, boto.s3.key.Key,
|
||||
connect_kwargs, upload_params, storage_path, s3_bucket,
|
||||
access_key=s3_access_key or None,
|
||||
secret_key=s3_secret_key or None)
|
||||
|
||||
self.maximum_chunk_size = 5 * 1024 * 1024 * 1024 # 5GB.
|
||||
|
||||
def setup(self):
|
||||
self.get_cloud_bucket().set_cors_xml("""<?xml version="1.0" encoding="UTF-8"?>
|
||||
<CORSConfiguration xmlns="http://s3.amazonaws.com/doc/2006-03-01/">
|
||||
<CORSRule>
|
||||
<AllowedOrigin>*</AllowedOrigin>
|
||||
<AllowedMethod>GET</AllowedMethod>
|
||||
<MaxAgeSeconds>3000</MaxAgeSeconds>
|
||||
<AllowedHeader>Authorization</AllowedHeader>
|
||||
</CORSRule>
|
||||
<CORSRule>
|
||||
<AllowedOrigin>*</AllowedOrigin>
|
||||
<AllowedMethod>PUT</AllowedMethod>
|
||||
<MaxAgeSeconds>3000</MaxAgeSeconds>
|
||||
<AllowedHeader>Content-Type</AllowedHeader>
|
||||
<AllowedHeader>x-amz-acl</AllowedHeader>
|
||||
<AllowedHeader>origin</AllowedHeader>
|
||||
</CORSRule>
|
||||
</CORSConfiguration>""")
|
||||
|
||||
class GoogleCloudStorage(_CloudStorage):
|
||||
def __init__(self, context, storage_path, access_key, secret_key, bucket_name):
|
||||
upload_params = {}
|
||||
connect_kwargs = {}
|
||||
super(GoogleCloudStorage, self).__init__(context, boto.gs.connection.GSConnection,
|
||||
boto.gs.key.Key, connect_kwargs, upload_params,
|
||||
storage_path, bucket_name, access_key, secret_key)
|
||||
|
||||
def setup(self):
|
||||
self.get_cloud_bucket().set_cors_xml("""<?xml version="1.0" encoding="UTF-8"?>
|
||||
<CorsConfig>
|
||||
<Cors>
|
||||
<Origins>
|
||||
<Origin>*</Origin>
|
||||
</Origins>
|
||||
<Methods>
|
||||
<Method>GET</Method>
|
||||
<Method>PUT</Method>
|
||||
</Methods>
|
||||
<ResponseHeaders>
|
||||
<ResponseHeader>Content-Type</ResponseHeader>
|
||||
</ResponseHeaders>
|
||||
<MaxAgeSec>3000</MaxAgeSec>
|
||||
</Cors>
|
||||
</CorsConfig>""")
|
||||
|
||||
def _stream_write_internal(self, path, fp, content_type=None, content_encoding=None,
|
||||
cancel_on_error=True, size=filelike.READ_UNTIL_END):
|
||||
""" Writes the data found in the file-like stream to the given path, with optional limit
|
||||
on size. Note that this method returns a *tuple* of (bytes_written, write_error) and should
|
||||
*not* raise an exception (such as IOError) if a problem uploading occurred. ALWAYS check
|
||||
the returned tuple on calls to this method.
|
||||
"""
|
||||
# Minimum size of upload part size on S3 is 5MB
|
||||
self._initialize_cloud_conn()
|
||||
path = self._init_path(path)
|
||||
key = self._key_class(self._cloud_bucket, path)
|
||||
|
||||
if content_type is not None:
|
||||
key.set_metadata('Content-Type', content_type)
|
||||
|
||||
if content_encoding is not None:
|
||||
key.set_metadata('Content-Encoding', content_encoding)
|
||||
|
||||
if size != filelike.READ_UNTIL_END:
|
||||
fp = filelike.StreamSlice(fp, 0, size)
|
||||
|
||||
# TODO figure out how to handle cancel_on_error=False
|
||||
try:
|
||||
key.set_contents_from_stream(fp)
|
||||
except IOError as ex:
|
||||
return 0, ex
|
||||
|
||||
return key.size, None
|
||||
|
||||
def complete_chunked_upload(self, uuid, final_path, storage_metadata):
|
||||
self._initialize_cloud_conn()
|
||||
|
||||
# Boto does not support GCS's multipart upload API because it differs from S3, so
|
||||
# we are forced to join it all locally and then reupload.
|
||||
# See https://github.com/boto/boto/issues/3355
|
||||
chunk_list = self._chunk_list_from_metadata(storage_metadata)
|
||||
self._client_side_chunk_join(final_path, chunk_list)
|
||||
|
||||
|
||||
class RadosGWStorage(_CloudStorage):
|
||||
def __init__(self, context, hostname, is_secure, storage_path, access_key, secret_key,
|
||||
bucket_name, port=None):
|
||||
upload_params = {}
|
||||
connect_kwargs = {
|
||||
'host': hostname,
|
||||
'is_secure': is_secure,
|
||||
'calling_format': boto.s3.connection.OrdinaryCallingFormat(),
|
||||
}
|
||||
|
||||
if port:
|
||||
connect_kwargs['port'] = int(port)
|
||||
|
||||
super(RadosGWStorage, self).__init__(context, boto.s3.connection.S3Connection,
|
||||
boto.s3.key.Key, connect_kwargs, upload_params,
|
||||
storage_path, bucket_name, access_key, secret_key)
|
||||
|
||||
# TODO remove when radosgw supports cors: http://tracker.ceph.com/issues/8718#change-38624
|
||||
def get_direct_download_url(self, path, request_ip=None, expires_in=60, requires_cors=False,
|
||||
head=False):
|
||||
if requires_cors:
|
||||
return None
|
||||
|
||||
return super(RadosGWStorage, self).get_direct_download_url(path, request_ip, expires_in,
|
||||
requires_cors, head)
|
||||
|
||||
# TODO remove when radosgw supports cors: http://tracker.ceph.com/issues/8718#change-38624
|
||||
def get_direct_upload_url(self, path, mime_type, requires_cors=True):
|
||||
if requires_cors:
|
||||
return None
|
||||
|
||||
return super(RadosGWStorage, self).get_direct_upload_url(path, mime_type, requires_cors)
|
||||
|
||||
def complete_chunked_upload(self, uuid, final_path, storage_metadata):
|
||||
self._initialize_cloud_conn()
|
||||
|
||||
# RadosGW does not support multipart copying from keys, so we are forced to join
|
||||
# it all locally and then reupload.
|
||||
# See https://github.com/ceph/ceph/pull/5139
|
||||
chunk_list = self._chunk_list_from_metadata(storage_metadata)
|
||||
self._client_side_chunk_join(final_path, chunk_list)
|
||||
|
||||
|
||||
class RHOCSStorage(RadosGWStorage):
|
||||
""" RHOCSStorage implements storage explicitly via RHOCS. For now, this uses the same protocol
|
||||
as RadowsGW, but we create a distinct driver for future additional capabilities.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class CloudFrontedS3Storage(S3Storage):
|
||||
""" An S3Storage engine that redirects to CloudFront for all requests outside of AWS. """
|
||||
def __init__(self, context, cloudfront_distribution_domain, cloudfront_key_id,
|
||||
cloudfront_privatekey_filename, storage_path, s3_bucket, *args, **kwargs):
|
||||
super(CloudFrontedS3Storage, self).__init__(context, storage_path, s3_bucket, *args, **kwargs)
|
||||
|
||||
self.cloudfront_distribution_domain = cloudfront_distribution_domain
|
||||
self.cloudfront_key_id = cloudfront_key_id
|
||||
self.cloudfront_privatekey = self._load_private_key(cloudfront_privatekey_filename)
|
||||
|
||||
def get_direct_download_url(self, path, request_ip=None, expires_in=60, requires_cors=False,
|
||||
head=False):
|
||||
# If CloudFront could not be loaded, fall back to normal S3.
|
||||
if self.cloudfront_privatekey is None or request_ip is None:
|
||||
return super(CloudFrontedS3Storage, self).get_direct_download_url(path, request_ip,
|
||||
expires_in, requires_cors,
|
||||
head)
|
||||
|
||||
resolved_ip_info = None
|
||||
logger.debug('Got direct download request for path "%s" with IP "%s"', path, request_ip)
|
||||
|
||||
# Lookup the IP address in our resolution table and determine whether it is under AWS.
|
||||
# If it is, then return an S3 signed URL, since we are in-network.
|
||||
resolved_ip_info = self._context.ip_resolver.resolve_ip(request_ip)
|
||||
logger.debug('Resolved IP information for IP %s: %s', request_ip, resolved_ip_info)
|
||||
if resolved_ip_info and resolved_ip_info.provider == 'aws':
|
||||
return super(CloudFrontedS3Storage, self).get_direct_download_url(path, request_ip,
|
||||
expires_in, requires_cors,
|
||||
head)
|
||||
|
||||
url = 'https://%s/%s' % (self.cloudfront_distribution_domain, path)
|
||||
expire_date = datetime.now() + timedelta(seconds=expires_in)
|
||||
signer = self._get_cloudfront_signer()
|
||||
signed_url = signer.generate_presigned_url(url, date_less_than=expire_date)
|
||||
logger.debug('Returning CloudFront URL for path "%s" with IP "%s": %s', path, resolved_ip_info,
|
||||
signed_url)
|
||||
return signed_url
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def _get_cloudfront_signer(self):
|
||||
return CloudFrontSigner(self.cloudfront_key_id, self._get_rsa_signer())
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def _get_rsa_signer(self):
|
||||
private_key = self.cloudfront_privatekey
|
||||
def handler(message):
|
||||
return private_key.sign(message, padding.PKCS1v15(), hashes.SHA1())
|
||||
|
||||
return handler
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def _load_private_key(self, cloudfront_privatekey_filename):
|
||||
""" Returns the private key, loaded from the config provider, used to sign direct
|
||||
download URLs to CloudFront.
|
||||
"""
|
||||
if self._context.config_provider is None:
|
||||
return None
|
||||
|
||||
with self._context.config_provider.get_volume_file(cloudfront_privatekey_filename) as key_file:
|
||||
return serialization.load_pem_private_key(
|
||||
key_file.read(),
|
||||
password=None,
|
||||
backend=default_backend()
|
||||
)
|
83
storage/distributedstorage.py
Normal file
83
storage/distributedstorage.py
Normal file
|
@ -0,0 +1,83 @@
|
|||
import random
|
||||
import logging
|
||||
|
||||
from functools import wraps
|
||||
|
||||
from storage.basestorage import StoragePaths, BaseStorage, BaseStorageV2
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def _location_aware(unbound_func, requires_write=False):
|
||||
@wraps(unbound_func)
|
||||
def wrapper(self, locations, *args, **kwargs):
|
||||
if requires_write:
|
||||
assert not self.readonly_mode
|
||||
|
||||
storage = None
|
||||
for preferred in self.preferred_locations:
|
||||
if preferred in locations:
|
||||
storage = self._storages[preferred]
|
||||
break
|
||||
|
||||
if not storage:
|
||||
storage = self._storages[random.sample(locations, 1)[0]]
|
||||
|
||||
storage_func = getattr(storage, unbound_func.__name__)
|
||||
return storage_func(*args, **kwargs)
|
||||
return wrapper
|
||||
|
||||
|
||||
class DistributedStorage(StoragePaths):
|
||||
def __init__(self, storages, preferred_locations=None, default_locations=None, proxy=None,
|
||||
readonly_mode=False):
|
||||
self._storages = dict(storages)
|
||||
self.preferred_locations = list(preferred_locations or [])
|
||||
self.default_locations = list(default_locations or [])
|
||||
self.proxy = proxy
|
||||
self.readonly_mode = readonly_mode
|
||||
|
||||
@property
|
||||
def locations(self):
|
||||
""" Returns the names of the locations supported. """
|
||||
return list(self._storages.keys())
|
||||
|
||||
_get_direct_download_url = _location_aware(BaseStorage.get_direct_download_url)
|
||||
|
||||
get_direct_upload_url = _location_aware(BaseStorage.get_direct_upload_url)
|
||||
get_content = _location_aware(BaseStorage.get_content)
|
||||
put_content = _location_aware(BaseStorage.put_content, requires_write=True)
|
||||
stream_read = _location_aware(BaseStorage.stream_read)
|
||||
stream_read_file = _location_aware(BaseStorage.stream_read_file)
|
||||
stream_write = _location_aware(BaseStorage.stream_write, requires_write=True)
|
||||
exists = _location_aware(BaseStorage.exists)
|
||||
remove = _location_aware(BaseStorage.remove, requires_write=True)
|
||||
validate = _location_aware(BaseStorage.validate, requires_write=True)
|
||||
get_checksum = _location_aware(BaseStorage.get_checksum)
|
||||
get_supports_resumable_downloads = _location_aware(BaseStorage.get_supports_resumable_downloads)
|
||||
|
||||
initiate_chunked_upload = _location_aware(BaseStorageV2.initiate_chunked_upload,
|
||||
requires_write=True)
|
||||
stream_upload_chunk = _location_aware(BaseStorageV2.stream_upload_chunk,
|
||||
requires_write=True)
|
||||
complete_chunked_upload = _location_aware(BaseStorageV2.complete_chunked_upload,
|
||||
requires_write=True)
|
||||
cancel_chunked_upload = _location_aware(BaseStorageV2.cancel_chunked_upload,
|
||||
requires_write=True)
|
||||
|
||||
def get_direct_download_url(self, locations, path, request_ip=None, expires_in=600,
|
||||
requires_cors=False, head=False):
|
||||
download_url = self._get_direct_download_url(locations, path, request_ip, expires_in,
|
||||
requires_cors, head)
|
||||
if download_url is None:
|
||||
return None
|
||||
|
||||
if self.proxy is None:
|
||||
return download_url
|
||||
|
||||
return self.proxy.proxy_download_url(download_url)
|
||||
|
||||
def copy_between(self, path, source_location, destination_location):
|
||||
""" Copies a file between the source location and the destination location. """
|
||||
source_storage = self._storages[source_location]
|
||||
destination_storage = self._storages[destination_location]
|
||||
source_storage.copy_to(destination_storage, path)
|
176
storage/downloadproxy.py
Normal file
176
storage/downloadproxy.py
Normal file
|
@ -0,0 +1,176 @@
|
|||
import logging
|
||||
import base64
|
||||
import urllib
|
||||
|
||||
from urlparse import urlparse
|
||||
from flask import abort, request
|
||||
from jsonschema import validate, ValidationError
|
||||
|
||||
from util.security.registry_jwt import (generate_bearer_token, decode_bearer_token,
|
||||
InvalidBearerTokenException)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
PROXY_STORAGE_MAX_LIFETIME_S = 30 # Seconds
|
||||
STORAGE_PROXY_SUBJECT = 'storageproxy'
|
||||
STORAGE_PROXY_ACCESS_TYPE = 'storageproxy'
|
||||
|
||||
ACCESS_SCHEMA = {
|
||||
'type': 'array',
|
||||
'description': 'List of access granted to the subject',
|
||||
'items': {
|
||||
'type': 'object',
|
||||
'required': [
|
||||
'type',
|
||||
'scheme',
|
||||
'host',
|
||||
'uri',
|
||||
],
|
||||
'properties': {
|
||||
'type': {
|
||||
'type': 'string',
|
||||
'description': 'We only allow storage proxy permissions',
|
||||
'enum': [
|
||||
'storageproxy',
|
||||
],
|
||||
},
|
||||
'scheme': {
|
||||
'type': 'string',
|
||||
'description': 'The scheme for the storage URL being proxied'
|
||||
},
|
||||
'host': {
|
||||
'type': 'string',
|
||||
'description': 'The hostname for the storage URL being proxied'
|
||||
},
|
||||
'uri': {
|
||||
'type': 'string',
|
||||
'description': 'The URI path for the storage URL being proxied'
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class DownloadProxy(object):
|
||||
""" Helper class to enable proxying of direct download URLs for storage via the registry's
|
||||
local NGINX.
|
||||
"""
|
||||
def __init__(self, app, instance_keys):
|
||||
self.app = app
|
||||
self.instance_keys = instance_keys
|
||||
|
||||
app.add_url_rule('/_storage_proxy_auth', '_storage_proxy_auth', self._validate_proxy_url)
|
||||
|
||||
def proxy_download_url(self, download_url):
|
||||
""" Returns a URL to proxy the specified blob download URL.
|
||||
"""
|
||||
# Parse the URL to be downloaded into its components (host, path, scheme).
|
||||
parsed = urlparse(download_url)
|
||||
|
||||
path = parsed.path
|
||||
if parsed.query:
|
||||
path = path + '?' + parsed.query
|
||||
|
||||
if path.startswith('/'):
|
||||
path = path[1:]
|
||||
|
||||
access = {
|
||||
'type': STORAGE_PROXY_ACCESS_TYPE,
|
||||
'uri': path,
|
||||
'host': parsed.netloc,
|
||||
'scheme': parsed.scheme,
|
||||
}
|
||||
|
||||
# Generate a JWT that signs access to this URL. This JWT will be passed back to the registry
|
||||
# code when the download commences. Note that we don't add any context here, as it isn't
|
||||
# needed.
|
||||
server_hostname = self.app.config['SERVER_HOSTNAME']
|
||||
token = generate_bearer_token(server_hostname, STORAGE_PROXY_SUBJECT, {}, [access],
|
||||
PROXY_STORAGE_MAX_LIFETIME_S, self.instance_keys)
|
||||
|
||||
url_scheme = self.app.config['PREFERRED_URL_SCHEME']
|
||||
server_hostname = self.app.config['SERVER_HOSTNAME']
|
||||
|
||||
# The proxy path is of the form:
|
||||
# http(s)://registry_server/_storage_proxy/{token}/{scheme}/{hostname}/rest/of/path/here
|
||||
encoded_token = base64.urlsafe_b64encode(token)
|
||||
proxy_url = '%s://%s/_storage_proxy/%s/%s/%s/%s' % (url_scheme, server_hostname, encoded_token,
|
||||
parsed.scheme, parsed.netloc, path)
|
||||
logger.debug('Proxying via URL %s', proxy_url)
|
||||
return proxy_url
|
||||
|
||||
|
||||
def _validate_proxy_url(self):
|
||||
original_uri = request.headers.get('X-Original-URI', None)
|
||||
if not original_uri:
|
||||
logger.error('Missing original URI: %s', request.headers)
|
||||
abort(401)
|
||||
|
||||
if not original_uri.startswith('/_storage_proxy/'):
|
||||
logger.error('Unknown storage proxy path: %s', original_uri)
|
||||
abort(401)
|
||||
|
||||
# The proxy path is of the form:
|
||||
# /_storage_proxy/{token}/{scheme}/{hostname}/rest/of/path/here
|
||||
without_prefix = original_uri[len('/_storage_proxy/'):]
|
||||
parts = without_prefix.split('/', 3)
|
||||
if len(parts) != 4:
|
||||
logger.error('Invalid storage proxy path (found %s parts): %s', len(parts), without_prefix)
|
||||
abort(401)
|
||||
|
||||
encoded_token, scheme, host, uri = parts
|
||||
|
||||
try:
|
||||
token = base64.urlsafe_b64decode(str(encoded_token))
|
||||
except ValueError:
|
||||
logger.exception('Could not decode proxy token')
|
||||
abort(401)
|
||||
except TypeError:
|
||||
logger.exception('Could not decode proxy token')
|
||||
abort(401)
|
||||
|
||||
logger.debug('Got token %s for storage proxy auth request %s with parts %s', token,
|
||||
original_uri, parts)
|
||||
|
||||
# Decode the bearer token.
|
||||
try:
|
||||
decoded = decode_bearer_token(token, self.instance_keys, self.app.config)
|
||||
except InvalidBearerTokenException:
|
||||
logger.exception('Invalid token for storage proxy')
|
||||
abort(401)
|
||||
|
||||
# Ensure it is for the proxy.
|
||||
if decoded['sub'] != STORAGE_PROXY_SUBJECT:
|
||||
logger.exception('Invalid subject %s for storage proxy auth', decoded['subject'])
|
||||
abort(401)
|
||||
|
||||
# Validate that the access matches the token format.
|
||||
access = decoded.get('access', {})
|
||||
try:
|
||||
validate(access, ACCESS_SCHEMA)
|
||||
except ValidationError:
|
||||
logger.exception('We should not be minting invalid credentials: %s', access)
|
||||
abort(401)
|
||||
|
||||
# For now, we only expect a single access credential.
|
||||
if len(access) != 1:
|
||||
logger.exception('We should not be minting invalid credentials: %s', access)
|
||||
abort(401)
|
||||
|
||||
# Ensure the signed access matches the requested URL's pieces.
|
||||
granted_access = access[0]
|
||||
if granted_access['scheme'] != scheme:
|
||||
logger.exception('Mismatch in scheme. %s expected, %s found', granted_access['scheme'],
|
||||
scheme)
|
||||
abort(401)
|
||||
|
||||
if granted_access['host'] != host:
|
||||
logger.exception('Mismatch in host. %s expected, %s found', granted_access['host'], host)
|
||||
abort(401)
|
||||
|
||||
if granted_access['uri'] != uri:
|
||||
logger.exception('Mismatch in uri. %s expected, %s found', granted_access['uri'], uri)
|
||||
abort(401)
|
||||
|
||||
return 'OK'
|
101
storage/fakestorage.py
Normal file
101
storage/fakestorage.py
Normal file
|
@ -0,0 +1,101 @@
|
|||
import cStringIO as StringIO
|
||||
import hashlib
|
||||
|
||||
from collections import defaultdict
|
||||
from uuid import uuid4
|
||||
|
||||
from storage.basestorage import BaseStorageV2
|
||||
|
||||
_GLOBAL_FAKE_STORAGE_MAP = defaultdict(StringIO.StringIO)
|
||||
|
||||
class FakeStorage(BaseStorageV2):
|
||||
def __init__(self, context):
|
||||
super(FakeStorage, self).__init__()
|
||||
self._fake_storage_map = (defaultdict(StringIO.StringIO)
|
||||
if context == 'local' else _GLOBAL_FAKE_STORAGE_MAP)
|
||||
|
||||
def _init_path(self, path=None, create=False):
|
||||
return path
|
||||
|
||||
def get_direct_download_url(self, path, request_ip=None, expires_in=60, requires_cors=False, head=False):
|
||||
try:
|
||||
if self.get_content('supports_direct_download') == 'true':
|
||||
return 'http://somefakeurl?goes=here'
|
||||
except:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
def get_content(self, path):
|
||||
if not path in self._fake_storage_map:
|
||||
raise IOError('Fake file %s not found. Exist: %s' % (path, self._fake_storage_map.keys()))
|
||||
|
||||
self._fake_storage_map.get(path).seek(0)
|
||||
return self._fake_storage_map.get(path).read()
|
||||
|
||||
def put_content(self, path, content):
|
||||
self._fake_storage_map.pop(path, None)
|
||||
self._fake_storage_map[path].write(content)
|
||||
|
||||
def stream_read(self, path):
|
||||
io_obj = self._fake_storage_map[path]
|
||||
io_obj.seek(0)
|
||||
while True:
|
||||
buf = io_obj.read(self.buffer_size)
|
||||
if not buf:
|
||||
break
|
||||
yield buf
|
||||
|
||||
def stream_read_file(self, path):
|
||||
return StringIO.StringIO(self.get_content(path))
|
||||
|
||||
def stream_write(self, path, fp, content_type=None, content_encoding=None):
|
||||
out_fp = self._fake_storage_map[path]
|
||||
out_fp.seek(0)
|
||||
self.stream_write_to_fp(fp, out_fp)
|
||||
|
||||
def remove(self, path):
|
||||
self._fake_storage_map.pop(path, None)
|
||||
|
||||
def exists(self, path):
|
||||
if self._fake_storage_map.get('all_files_exist', None):
|
||||
return True
|
||||
return path in self._fake_storage_map
|
||||
|
||||
def get_checksum(self, path):
|
||||
return hashlib.sha256(self._fake_storage_map[path].read()).hexdigest()[:7]
|
||||
|
||||
def initiate_chunked_upload(self):
|
||||
new_uuid = str(uuid4())
|
||||
self._fake_storage_map[new_uuid].seek(0)
|
||||
return new_uuid, {}
|
||||
|
||||
def stream_upload_chunk(self, uuid, offset, length, in_fp, _, content_type=None):
|
||||
if self.exists('except_upload'):
|
||||
return 0, {}, IOError("I'm an exception!")
|
||||
|
||||
upload_storage = self._fake_storage_map[uuid]
|
||||
try:
|
||||
return self.stream_write_to_fp(in_fp, upload_storage, length), {}, None
|
||||
except IOError as ex:
|
||||
return 0, {}, ex
|
||||
|
||||
def complete_chunked_upload(self, uuid, final_path, _):
|
||||
self._fake_storage_map[final_path] = self._fake_storage_map[uuid]
|
||||
self._fake_storage_map.pop(uuid, None)
|
||||
|
||||
def cancel_chunked_upload(self, uuid, _):
|
||||
self._fake_storage_map.pop(uuid, None)
|
||||
|
||||
def copy_to(self, destination, path):
|
||||
if self.exists('break_copying'):
|
||||
raise IOError('Broken!')
|
||||
|
||||
if self.exists('fake_copying'):
|
||||
return
|
||||
|
||||
if self.exists('except_copying'):
|
||||
raise Exception("I'm an exception!")
|
||||
|
||||
content = self.get_content(path)
|
||||
destination.put_content(path, content)
|
138
storage/local.py
Normal file
138
storage/local.py
Normal file
|
@ -0,0 +1,138 @@
|
|||
import os
|
||||
import shutil
|
||||
import hashlib
|
||||
import io
|
||||
import logging
|
||||
import psutil
|
||||
|
||||
from uuid import uuid4
|
||||
|
||||
from storage.basestorage import BaseStorageV2
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class LocalStorage(BaseStorageV2):
|
||||
def __init__(self, context, storage_path):
|
||||
super(LocalStorage, self).__init__()
|
||||
self._root_path = storage_path
|
||||
|
||||
def _init_path(self, path=None, create=False):
|
||||
path = os.path.join(self._root_path, path) if path else self._root_path
|
||||
if create is True:
|
||||
dirname = os.path.dirname(path)
|
||||
if not os.path.exists(dirname):
|
||||
os.makedirs(dirname)
|
||||
return path
|
||||
|
||||
def get_content(self, path):
|
||||
path = self._init_path(path)
|
||||
with open(path, mode='r') as f:
|
||||
return f.read()
|
||||
|
||||
def put_content(self, path, content):
|
||||
path = self._init_path(path, create=True)
|
||||
with open(path, mode='w') as f:
|
||||
f.write(content)
|
||||
return path
|
||||
|
||||
def stream_read(self, path):
|
||||
path = self._init_path(path)
|
||||
with open(path, mode='rb') as f:
|
||||
while True:
|
||||
buf = f.read(self.buffer_size)
|
||||
if not buf:
|
||||
break
|
||||
yield buf
|
||||
|
||||
def stream_read_file(self, path):
|
||||
path = self._init_path(path)
|
||||
return io.open(path, mode='rb')
|
||||
|
||||
def stream_write(self, path, fp, content_type=None, content_encoding=None):
|
||||
# Size is mandatory
|
||||
path = self._init_path(path, create=True)
|
||||
with open(path, mode='wb') as out_fp:
|
||||
self.stream_write_to_fp(fp, out_fp)
|
||||
|
||||
def exists(self, path):
|
||||
path = self._init_path(path)
|
||||
return os.path.exists(path)
|
||||
|
||||
def remove(self, path):
|
||||
path = self._init_path(path)
|
||||
if os.path.isdir(path):
|
||||
shutil.rmtree(path)
|
||||
return
|
||||
try:
|
||||
os.remove(path)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
def get_checksum(self, path):
|
||||
path = self._init_path(path)
|
||||
sha_hash = hashlib.sha256()
|
||||
with open(path, 'r') as to_hash:
|
||||
while True:
|
||||
buf = to_hash.read(self.buffer_size)
|
||||
if not buf:
|
||||
break
|
||||
sha_hash.update(buf)
|
||||
return sha_hash.hexdigest()[:7]
|
||||
|
||||
def _rel_upload_path(self, uuid):
|
||||
return 'uploads/{0}'.format(uuid)
|
||||
|
||||
def initiate_chunked_upload(self):
|
||||
new_uuid = str(uuid4())
|
||||
|
||||
# Just create an empty file at the path
|
||||
with open(self._init_path(self._rel_upload_path(new_uuid), create=True), 'w'):
|
||||
pass
|
||||
|
||||
return new_uuid, {}
|
||||
|
||||
def stream_upload_chunk(self, uuid, offset, length, in_fp, _, content_type=None):
|
||||
try:
|
||||
with open(self._init_path(self._rel_upload_path(uuid)), 'r+b') as upload_storage:
|
||||
upload_storage.seek(offset)
|
||||
return self.stream_write_to_fp(in_fp, upload_storage, length), {}, None
|
||||
except IOError as ex:
|
||||
return 0, {}, ex
|
||||
|
||||
def complete_chunked_upload(self, uuid, final_path, _):
|
||||
content_path = self._rel_upload_path(uuid)
|
||||
final_path_abs = self._init_path(final_path, create=True)
|
||||
if not self.exists(final_path_abs):
|
||||
logger.debug('Moving content into place at path: %s', final_path_abs)
|
||||
shutil.move(self._init_path(content_path), final_path_abs)
|
||||
else:
|
||||
logger.debug('Content already exists at path: %s', final_path_abs)
|
||||
|
||||
def cancel_chunked_upload(self, uuid, _):
|
||||
content_path = self._init_path(self._rel_upload_path(uuid))
|
||||
os.remove(content_path)
|
||||
|
||||
def validate(self, client):
|
||||
super(LocalStorage, self).validate(client)
|
||||
|
||||
# Load the set of disk mounts.
|
||||
try:
|
||||
mounts = psutil.disk_partitions(all=True)
|
||||
except:
|
||||
logger.exception('Could not load disk partitions')
|
||||
return
|
||||
|
||||
# Verify that the storage's root path is under a mounted Docker volume.
|
||||
for mount in mounts:
|
||||
if mount.mountpoint != '/' and self._root_path.startswith(mount.mountpoint):
|
||||
return
|
||||
|
||||
raise Exception('Storage path %s is not under a mounted volume.\n\n'
|
||||
'Registry data must be stored under a mounted volume '
|
||||
'to prevent data loss' % self._root_path)
|
||||
|
||||
def copy_to(self, destination, path):
|
||||
with self.stream_read_file(path) as fp:
|
||||
destination.stream_write(path, fp)
|
450
storage/swift.py
Normal file
450
storage/swift.py
Normal file
|
@ -0,0 +1,450 @@
|
|||
""" Swift storage driver.
|
||||
|
||||
Uses: http://docs.openstack.org/developer/swift/overview_large_objects.html
|
||||
"""
|
||||
import os.path
|
||||
import copy
|
||||
import hmac
|
||||
import string
|
||||
import logging
|
||||
import json
|
||||
|
||||
from _pyio import BufferedReader
|
||||
|
||||
from collections import namedtuple
|
||||
from hashlib import sha1
|
||||
from random import SystemRandom
|
||||
from time import time
|
||||
from urlparse import urlparse
|
||||
from uuid import uuid4
|
||||
|
||||
from cachetools.func import lru_cache
|
||||
from swiftclient.client import Connection, ClientException, ReadableToIterable
|
||||
|
||||
from storage.basestorage import BaseStorage
|
||||
from util.registry import filelike
|
||||
from util.registry.generatorfile import GeneratorFile
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_PartUploadMetadata = namedtuple('_PartUploadMetadata', ['path', 'offset', 'length'])
|
||||
_SEGMENTS_KEY = 'segments'
|
||||
_EMPTY_SEGMENTS_KEY = 'emptysegments'
|
||||
_SEGMENT_DIRECTORY = 'segments'
|
||||
_MAXIMUM_SEGMENT_SIZE = 200000000 # ~200 MB
|
||||
_DEFAULT_SWIFT_CONNECT_TIMEOUT = 5 # seconds
|
||||
_CHUNK_CLEANUP_DELAY = 30 # seconds
|
||||
|
||||
class SwiftStorage(BaseStorage):
|
||||
def __init__(self, context, swift_container, storage_path, auth_url, swift_user, swift_password,
|
||||
auth_version=None, os_options=None, ca_cert_path=None, temp_url_key=None,
|
||||
simple_path_concat=False, connect_timeout=None, retry_count=None,
|
||||
retry_on_ratelimit=True):
|
||||
super(SwiftStorage, self).__init__()
|
||||
self._swift_container = swift_container
|
||||
self._context = context
|
||||
|
||||
self._storage_path = storage_path.lstrip('/')
|
||||
self._simple_path_concat = simple_path_concat
|
||||
|
||||
self._auth_url = auth_url
|
||||
self._ca_cert_path = ca_cert_path
|
||||
|
||||
self._swift_user = swift_user
|
||||
self._swift_password = swift_password
|
||||
|
||||
self._temp_url_key = temp_url_key
|
||||
self._connect_timeout = connect_timeout
|
||||
self._retry_count = retry_count
|
||||
self._retry_on_ratelimit = retry_on_ratelimit
|
||||
|
||||
try:
|
||||
self._auth_version = int(auth_version or '2')
|
||||
except ValueError:
|
||||
self._auth_version = 2
|
||||
|
||||
self._os_options = os_options or {}
|
||||
|
||||
self._initialized = False
|
||||
|
||||
def _get_connection(self):
|
||||
return Connection(
|
||||
authurl=self._auth_url,
|
||||
cacert=self._ca_cert_path,
|
||||
|
||||
user=self._swift_user,
|
||||
key=self._swift_password,
|
||||
|
||||
auth_version=self._auth_version,
|
||||
os_options=self._os_options,
|
||||
|
||||
retry_on_ratelimit=self._retry_on_ratelimit,
|
||||
timeout=self._connect_timeout or _DEFAULT_SWIFT_CONNECT_TIMEOUT,
|
||||
retries=self._retry_count or 5,
|
||||
)
|
||||
|
||||
def _normalize_path(self, object_path):
|
||||
""" No matter what inputs we get, we are going to return a path without a leading or trailing
|
||||
'/'
|
||||
"""
|
||||
if self._simple_path_concat:
|
||||
return (self._storage_path + object_path).rstrip('/')
|
||||
else:
|
||||
return os.path.join(self._storage_path, object_path).rstrip('/')
|
||||
|
||||
def _get_object(self, path, chunk_size=None):
|
||||
path = self._normalize_path(path)
|
||||
try:
|
||||
_, obj = self._get_connection().get_object(self._swift_container, path,
|
||||
resp_chunk_size=chunk_size)
|
||||
return obj
|
||||
except ClientException as ex:
|
||||
logger.exception('Could not get object at path %s: %s', path, ex)
|
||||
raise IOError('Path %s not found' % path)
|
||||
|
||||
def _put_object(self, path, content, chunk=None, content_type=None, content_encoding=None,
|
||||
headers=None):
|
||||
path = self._normalize_path(path)
|
||||
headers = headers or {}
|
||||
|
||||
if content_encoding is not None:
|
||||
headers['Content-Encoding'] = content_encoding
|
||||
|
||||
is_filelike = hasattr(content, 'read')
|
||||
if is_filelike:
|
||||
content = ReadableToIterable(content, md5=True)
|
||||
|
||||
try:
|
||||
etag = self._get_connection().put_object(self._swift_container, path, content,
|
||||
chunk_size=chunk, content_type=content_type,
|
||||
headers=headers)
|
||||
except ClientException:
|
||||
# We re-raise client exception here so that validation of config during setup can see
|
||||
# the client exception messages.
|
||||
raise
|
||||
|
||||
# If we wrapped the content in a ReadableToIterable, compare its MD5 to the etag returned. If
|
||||
# they don't match, raise an IOError indicating a write failure.
|
||||
if is_filelike:
|
||||
if etag != content.get_md5sum():
|
||||
logger.error('Got mismatch in md5 etag for path %s: Expected %s, but server has %s', path,
|
||||
content.get_md5sum(), etag)
|
||||
raise IOError('upload verification failed for path {0}:'
|
||||
'md5 mismatch, local {1} != remote {2}'
|
||||
.format(path, content.get_md5sum(), etag))
|
||||
|
||||
def _head_object(self, path):
|
||||
path = self._normalize_path(path)
|
||||
try:
|
||||
return self._get_connection().head_object(self._swift_container, path)
|
||||
except ClientException as ce:
|
||||
if ce.http_status != 404:
|
||||
logger.exception('Could not head object at path %s: %s', path, ce)
|
||||
|
||||
return None
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def _get_root_storage_url(self):
|
||||
""" Returns the root storage URL for this Swift storage. Note that since this requires a call
|
||||
to Swift, we cache the result of this function call.
|
||||
"""
|
||||
storage_url, _ = self._get_connection().get_auth()
|
||||
return storage_url
|
||||
|
||||
def get_direct_download_url(self, object_path, request_ip=None, expires_in=60,
|
||||
requires_cors=False, head=False):
|
||||
if requires_cors:
|
||||
return None
|
||||
|
||||
# Reference: http://docs.openstack.org/juno/config-reference/content/object-storage-tempurl.html
|
||||
if not self._temp_url_key:
|
||||
return None
|
||||
|
||||
# Retrieve the root storage URL for the connection.
|
||||
try:
|
||||
root_storage_url = self._get_root_storage_url()
|
||||
except ClientException:
|
||||
logger.exception('Got client exception when trying to load Swift auth')
|
||||
return None
|
||||
|
||||
parsed_storage_url = urlparse(root_storage_url)
|
||||
scheme = parsed_storage_url.scheme
|
||||
path = parsed_storage_url.path.rstrip('/')
|
||||
hostname = parsed_storage_url.netloc
|
||||
|
||||
object_path = self._normalize_path(object_path)
|
||||
|
||||
# Generate the signed HMAC body.
|
||||
method = 'HEAD' if head else 'GET'
|
||||
expires = int(time() + expires_in)
|
||||
full_path = '%s/%s/%s' % (path, self._swift_container, object_path)
|
||||
|
||||
hmac_body = '%s\n%s\n%s' % (method, expires, full_path)
|
||||
sig = hmac.new(self._temp_url_key.encode('utf-8'), hmac_body.encode('utf-8'), sha1).hexdigest()
|
||||
|
||||
surl = '{scheme}://{host}{full_path}?temp_url_sig={sig}&temp_url_expires={expires}'
|
||||
return surl.format(scheme=scheme, host=hostname, full_path=full_path, sig=sig, expires=expires)
|
||||
|
||||
def validate(self, client):
|
||||
super(SwiftStorage, self).validate(client)
|
||||
|
||||
if self._temp_url_key:
|
||||
# Generate a direct download URL.
|
||||
dd_url = self.get_direct_download_url('_verify')
|
||||
|
||||
if not dd_url:
|
||||
raise Exception('Could not validate direct download URL; the token may be invalid.')
|
||||
|
||||
# Try to retrieve the direct download URL.
|
||||
response = client.get(dd_url, timeout=2)
|
||||
if response.status_code != 200:
|
||||
logger.debug('Direct download failure: %s => %s with body %s', dd_url,
|
||||
response.status_code, response.text)
|
||||
|
||||
msg = 'Direct download URL failed with status code %s. Please check your temp-url-key.'
|
||||
raise Exception(msg % response.status_code)
|
||||
|
||||
def get_content(self, path):
|
||||
return self._get_object(path)
|
||||
|
||||
def put_content(self, path, content):
|
||||
self._put_object(path, content)
|
||||
|
||||
def stream_read(self, path):
|
||||
for data in self._get_object(path, self.buffer_size):
|
||||
yield data
|
||||
|
||||
def stream_read_file(self, path):
|
||||
return GeneratorFile(self.stream_read(path))
|
||||
|
||||
def stream_write(self, path, fp, content_type=None, content_encoding=None):
|
||||
self._put_object(path, fp, self.buffer_size, content_type=content_type,
|
||||
content_encoding=content_encoding)
|
||||
|
||||
def exists(self, path):
|
||||
return bool(self._head_object(path))
|
||||
|
||||
def remove(self, path):
|
||||
# Retrieve the object so we can see if it is segmented. If so, we'll delete its segments after
|
||||
# removing the object.
|
||||
try:
|
||||
headers = self._head_object(path)
|
||||
except ClientException as ex:
|
||||
logger.exception('Could not head for delete of path %s: %s', path, str(ex))
|
||||
raise IOError('Cannot delete path: %s' % path)
|
||||
|
||||
logger.debug('Found headers for path %s to delete: %s', path, headers)
|
||||
|
||||
# Delete the path itself.
|
||||
path = self._normalize_path(path)
|
||||
try:
|
||||
self._get_connection().delete_object(self._swift_container, path)
|
||||
except ClientException as ex:
|
||||
logger.exception('Could not delete path %s: %s', path, str(ex))
|
||||
raise IOError('Cannot delete path: %s' % path)
|
||||
|
||||
# Delete the segments.
|
||||
object_manifest = headers.get('x-object-manifest', headers.get('X-Object-Manifest'))
|
||||
if object_manifest is not None:
|
||||
logger.debug('Found DLO for path %s: %s', path, object_manifest)
|
||||
|
||||
# Remove the container name from the beginning.
|
||||
container_name, prefix_path = object_manifest.split('/', 1)
|
||||
if container_name != self._swift_container:
|
||||
logger.error('Expected container name %s, found path %s', self._swift_container,
|
||||
prefix_path)
|
||||
raise Exception("How did we end up with an invalid container name?")
|
||||
|
||||
logger.debug('Loading Dynamic Large Object segments for path prefix %s', prefix_path)
|
||||
try:
|
||||
_, container_objects = self._get_connection().get_container(self._swift_container,
|
||||
full_listing=True,
|
||||
prefix=prefix_path)
|
||||
except ClientException as ex:
|
||||
logger.exception('Could not load objects with prefix path %s: %s', prefix_path, str(ex))
|
||||
raise IOError('Cannot load path: %s' % prefix_path)
|
||||
|
||||
logger.debug('Found Dynamic Large Object segments for path prefix %s: %s', prefix_path,
|
||||
len(container_objects))
|
||||
for obj in container_objects:
|
||||
try:
|
||||
logger.debug('Deleting Dynamic Large Object segment %s for path prefix %s', obj['name'],
|
||||
prefix_path)
|
||||
self._get_connection().delete_object(self._swift_container, obj['name'])
|
||||
except ClientException as ex:
|
||||
logger.exception('Could not delete object with path %s: %s', obj['name'], str(ex))
|
||||
raise IOError('Cannot delete path: %s' % obj['name'])
|
||||
|
||||
def _random_checksum(self, count):
|
||||
chars = string.ascii_uppercase + string.digits
|
||||
return ''.join(SystemRandom().choice(chars) for _ in range(count))
|
||||
|
||||
def get_checksum(self, path):
|
||||
headers = self._head_object(path)
|
||||
if not headers:
|
||||
raise IOError('Cannot lookup path: %s' % path)
|
||||
|
||||
return headers.get('etag', '')[1:-1][:7] or self._random_checksum(7)
|
||||
|
||||
@staticmethod
|
||||
def _segment_list_from_metadata(storage_metadata, key=_SEGMENTS_KEY):
|
||||
return [_PartUploadMetadata(*segment_args) for segment_args in storage_metadata[key]]
|
||||
|
||||
def initiate_chunked_upload(self):
|
||||
random_uuid = str(uuid4())
|
||||
|
||||
metadata = {
|
||||
_SEGMENTS_KEY: [],
|
||||
_EMPTY_SEGMENTS_KEY: [],
|
||||
}
|
||||
|
||||
return random_uuid, metadata
|
||||
|
||||
def stream_upload_chunk(self, uuid, offset, length, in_fp, storage_metadata, content_type=None):
|
||||
if length == 0:
|
||||
return 0, storage_metadata, None
|
||||
|
||||
# Note: Swift limits segments in size, so we need to sub-divide chunks into segments
|
||||
# based on the configured maximum.
|
||||
total_bytes_written = 0
|
||||
upload_error = None
|
||||
read_until_end = length == filelike.READ_UNTIL_END
|
||||
|
||||
while True:
|
||||
try:
|
||||
bytes_written, storage_metadata = self._stream_upload_segment(uuid, offset, length, in_fp,
|
||||
storage_metadata,
|
||||
content_type)
|
||||
except IOError as ex:
|
||||
message = ('Error writing to stream in stream_upload_chunk for uuid %s (offset %s' +
|
||||
', length %s, metadata: %s): %s')
|
||||
logger.exception(message, uuid, offset, length, storage_metadata, ex)
|
||||
upload_error = ex
|
||||
break
|
||||
|
||||
if not read_until_end:
|
||||
length = length - bytes_written
|
||||
|
||||
offset = offset + bytes_written
|
||||
total_bytes_written = total_bytes_written + bytes_written
|
||||
|
||||
if bytes_written == 0 or (not read_until_end and length <= 0):
|
||||
return total_bytes_written, storage_metadata, upload_error
|
||||
|
||||
return total_bytes_written, storage_metadata, upload_error
|
||||
|
||||
def _stream_upload_segment(self, uuid, offset, length, in_fp, storage_metadata, content_type):
|
||||
updated_metadata = copy.deepcopy(storage_metadata)
|
||||
segment_count = len(updated_metadata[_SEGMENTS_KEY])
|
||||
segment_path = '%s/%s/%s' % (_SEGMENT_DIRECTORY, uuid, '%09d' % segment_count)
|
||||
|
||||
# Track the number of bytes read and if an explicit length is specified, limit the
|
||||
# file stream to that length.
|
||||
if length == filelike.READ_UNTIL_END:
|
||||
length = _MAXIMUM_SEGMENT_SIZE
|
||||
else:
|
||||
length = min(_MAXIMUM_SEGMENT_SIZE, length)
|
||||
|
||||
limiting_fp = filelike.LimitingStream(in_fp, length)
|
||||
|
||||
# If retries are requested, then we need to use a buffered reader to allow for calls to
|
||||
# seek() on retries from within the Swift client.
|
||||
if self._retry_count > 0:
|
||||
limiting_fp = BufferedReader(limiting_fp, buffer_size=length)
|
||||
|
||||
# Write the segment to Swift.
|
||||
self.stream_write(segment_path, limiting_fp, content_type)
|
||||
|
||||
# We are only going to track keys to which data was confirmed written.
|
||||
bytes_written = limiting_fp.tell()
|
||||
if bytes_written > 0:
|
||||
updated_metadata[_SEGMENTS_KEY].append(_PartUploadMetadata(segment_path, offset,
|
||||
bytes_written))
|
||||
else:
|
||||
updated_metadata[_EMPTY_SEGMENTS_KEY].append(_PartUploadMetadata(segment_path, offset,
|
||||
bytes_written))
|
||||
|
||||
return bytes_written, updated_metadata
|
||||
|
||||
def complete_chunked_upload(self, uuid, final_path, storage_metadata):
|
||||
""" Complete the chunked upload and store the final results in the path indicated.
|
||||
Returns nothing.
|
||||
"""
|
||||
# Check all potentially empty segments against the segments that were uploaded; if the path
|
||||
# is still empty, then we queue the segment to be deleted.
|
||||
if self._context.chunk_cleanup_queue is not None:
|
||||
nonempty_segments = SwiftStorage._segment_list_from_metadata(storage_metadata,
|
||||
key=_SEGMENTS_KEY)
|
||||
potentially_empty_segments = SwiftStorage._segment_list_from_metadata(storage_metadata,
|
||||
key=_EMPTY_SEGMENTS_KEY)
|
||||
|
||||
nonempty_paths = set([segment.path for segment in nonempty_segments])
|
||||
for segment in potentially_empty_segments:
|
||||
if segment.path in nonempty_paths:
|
||||
continue
|
||||
|
||||
# Queue the chunk to be deleted, as it is empty and therefore unused.
|
||||
self._context.chunk_cleanup_queue.put(
|
||||
['segment/%s/%s' % (self._context.location, uuid)],
|
||||
json.dumps({
|
||||
'location': self._context.location,
|
||||
'uuid': uuid,
|
||||
'path': segment.path,
|
||||
}), available_after=_CHUNK_CLEANUP_DELAY)
|
||||
|
||||
# Finally, we write an empty file at the proper location with a X-Object-Manifest
|
||||
# header pointing to the prefix for the segments.
|
||||
segments_prefix_path = self._normalize_path('%s/%s' % (_SEGMENT_DIRECTORY, uuid))
|
||||
contained_segments_prefix_path = '%s/%s' % (self._swift_container, segments_prefix_path)
|
||||
|
||||
self._put_object(final_path, '', headers={'X-Object-Manifest': contained_segments_prefix_path})
|
||||
|
||||
def cancel_chunked_upload(self, uuid, storage_metadata):
|
||||
""" Cancel the chunked upload and clean up any outstanding partially uploaded data.
|
||||
Returns nothing.
|
||||
"""
|
||||
if not self._context.chunk_cleanup_queue:
|
||||
return
|
||||
|
||||
segments = list(SwiftStorage._segment_list_from_metadata(storage_metadata,
|
||||
key=_SEGMENTS_KEY))
|
||||
segments.extend(SwiftStorage._segment_list_from_metadata(storage_metadata,
|
||||
key=_EMPTY_SEGMENTS_KEY))
|
||||
|
||||
# Queue all the uploaded segments to be deleted.
|
||||
for segment in segments:
|
||||
# Queue the chunk to be deleted.
|
||||
self._context.chunk_cleanup_queue.put(
|
||||
['segment/%s/%s' % (self._context.location, uuid)],
|
||||
json.dumps({
|
||||
'location': self._context.location,
|
||||
'uuid': uuid,
|
||||
'path': segment.path,
|
||||
}), available_after=_CHUNK_CLEANUP_DELAY)
|
||||
|
||||
def copy_to(self, destination, path):
|
||||
if (self.__class__ == destination.__class__ and
|
||||
self._swift_user == destination._swift_user and
|
||||
self._swift_password == destination._swift_password and
|
||||
self._auth_url == destination._auth_url and
|
||||
self._auth_version == destination._auth_version):
|
||||
logger.debug('Copying file from swift %s to swift %s via a Swift copy',
|
||||
self._swift_container, destination)
|
||||
|
||||
normalized_path = self._normalize_path(path)
|
||||
target = '/%s/%s' % (destination._swift_container, normalized_path)
|
||||
|
||||
try:
|
||||
self._get_connection().copy_object(self._swift_container, normalized_path, target)
|
||||
except ClientException as ex:
|
||||
logger.exception('Could not swift copy path %s: %s', path, ex)
|
||||
raise IOError('Failed to swift copy path %s' % path)
|
||||
|
||||
return
|
||||
|
||||
# Fallback to a slower, default copy.
|
||||
logger.debug('Copying file from swift %s to %s via a streamed copy', self._swift_container,
|
||||
destination)
|
||||
with self.stream_read_file(path) as fp:
|
||||
destination.stream_write(path, fp)
|
217
storage/test/test_azure.py
Normal file
217
storage/test/test_azure.py
Normal file
|
@ -0,0 +1,217 @@
|
|||
import base64
|
||||
import md5
|
||||
import pytest
|
||||
import io
|
||||
|
||||
from contextlib import contextmanager
|
||||
from urlparse import parse_qs, urlparse
|
||||
from httmock import urlmatch, HTTMock
|
||||
from xml.dom import minidom
|
||||
|
||||
from azure.storage.blob import BlockBlobService
|
||||
|
||||
from storage.azurestorage import AzureStorage
|
||||
|
||||
@contextmanager
|
||||
def fake_azure_storage(files=None):
|
||||
service = BlockBlobService(is_emulated=True)
|
||||
endpoint = service.primary_endpoint.split('/')
|
||||
container_name = 'somecontainer'
|
||||
files = files if files is not None else {}
|
||||
|
||||
container_prefix = '/' + endpoint[1] + '/' + container_name
|
||||
|
||||
@urlmatch(netloc=endpoint[0], path=container_prefix + '$')
|
||||
def get_container(url, request):
|
||||
return {'status_code': 200, 'content': '{}'}
|
||||
|
||||
@urlmatch(netloc=endpoint[0], path=container_prefix + '/.+')
|
||||
def container_file(url, request):
|
||||
filename = url.path[len(container_prefix)+1:]
|
||||
|
||||
if request.method == 'GET' or request.method == 'HEAD':
|
||||
return {
|
||||
'status_code': 200 if filename in files else 404,
|
||||
'content': files.get(filename),
|
||||
'headers': {
|
||||
'ETag': 'foobar',
|
||||
},
|
||||
}
|
||||
|
||||
if request.method == 'DELETE':
|
||||
files.pop(filename)
|
||||
return {
|
||||
'status_code': 201,
|
||||
'content': '',
|
||||
'headers': {
|
||||
'ETag': 'foobar',
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
if request.method == 'PUT':
|
||||
query_params = parse_qs(url.query)
|
||||
if query_params.get('comp') == ['properties']:
|
||||
return {
|
||||
'status_code': 201,
|
||||
'content': '{}',
|
||||
'headers': {
|
||||
'x-ms-request-server-encrypted': "false",
|
||||
'last-modified': 'Wed, 21 Oct 2015 07:28:00 GMT',
|
||||
}
|
||||
}
|
||||
|
||||
if query_params.get('comp') == ['block']:
|
||||
block_id = query_params['blockid'][0]
|
||||
files[filename] = files.get(filename) or {}
|
||||
files[filename][block_id] = request.body
|
||||
return {
|
||||
'status_code': 201,
|
||||
'content': '{}',
|
||||
'headers': {
|
||||
'Content-MD5': base64.b64encode(md5.new(request.body).digest()),
|
||||
'ETag': 'foo',
|
||||
'x-ms-request-server-encrypted': "false",
|
||||
'last-modified': 'Wed, 21 Oct 2015 07:28:00 GMT',
|
||||
}
|
||||
}
|
||||
|
||||
if query_params.get('comp') == ['blocklist']:
|
||||
parsed = minidom.parseString(request.body)
|
||||
latest = parsed.getElementsByTagName('Latest')
|
||||
combined = []
|
||||
for latest_block in latest:
|
||||
combined.append(files[filename][latest_block.childNodes[0].data])
|
||||
|
||||
files[filename] = ''.join(combined)
|
||||
return {
|
||||
'status_code': 201,
|
||||
'content': '{}',
|
||||
'headers': {
|
||||
'Content-MD5': base64.b64encode(md5.new(files[filename]).digest()),
|
||||
'ETag': 'foo',
|
||||
'x-ms-request-server-encrypted': "false",
|
||||
'last-modified': 'Wed, 21 Oct 2015 07:28:00 GMT',
|
||||
}
|
||||
}
|
||||
|
||||
if request.headers.get('x-ms-copy-source'):
|
||||
copy_source = request.headers['x-ms-copy-source']
|
||||
copy_path = urlparse(copy_source).path[len(container_prefix) + 1:]
|
||||
files[filename] = files[copy_path]
|
||||
return {
|
||||
'status_code': 201,
|
||||
'content': '{}',
|
||||
'headers': {
|
||||
'x-ms-request-server-encrypted': "false",
|
||||
'x-ms-copy-status': 'success',
|
||||
'last-modified': 'Wed, 21 Oct 2015 07:28:00 GMT',
|
||||
}
|
||||
}
|
||||
|
||||
files[filename] = request.body
|
||||
|
||||
return {
|
||||
'status_code': 201,
|
||||
'content': '{}',
|
||||
'headers': {
|
||||
'Content-MD5': base64.b64encode(md5.new(request.body).digest()),
|
||||
'ETag': 'foo',
|
||||
'x-ms-request-server-encrypted': "false",
|
||||
'last-modified': 'Wed, 21 Oct 2015 07:28:00 GMT',
|
||||
}
|
||||
}
|
||||
|
||||
return {'status_code': 405, 'content': ''}
|
||||
|
||||
@urlmatch(netloc=endpoint[0], path='.+')
|
||||
def catchall(url, request):
|
||||
return {'status_code': 405, 'content': ''}
|
||||
|
||||
with HTTMock(get_container, container_file, catchall):
|
||||
yield AzureStorage(None, 'somecontainer', '', 'someaccount', is_emulated=True)
|
||||
|
||||
def test_validate():
|
||||
with fake_azure_storage() as s:
|
||||
s.validate(None)
|
||||
|
||||
def test_basics():
|
||||
with fake_azure_storage() as s:
|
||||
s.put_content('hello', 'hello world')
|
||||
assert s.exists('hello')
|
||||
assert s.get_content('hello') == 'hello world'
|
||||
assert s.get_checksum('hello')
|
||||
assert ''.join(list(s.stream_read('hello'))) == 'hello world'
|
||||
assert s.stream_read_file('hello').read() == 'hello world'
|
||||
|
||||
s.remove('hello')
|
||||
assert not s.exists('hello')
|
||||
|
||||
def test_does_not_exist():
|
||||
with fake_azure_storage() as s:
|
||||
assert not s.exists('hello')
|
||||
|
||||
with pytest.raises(IOError):
|
||||
s.get_content('hello')
|
||||
|
||||
with pytest.raises(IOError):
|
||||
s.get_checksum('hello')
|
||||
|
||||
with pytest.raises(IOError):
|
||||
list(s.stream_read('hello'))
|
||||
|
||||
with pytest.raises(IOError):
|
||||
s.stream_read_file('hello')
|
||||
|
||||
def test_stream_write():
|
||||
fp = io.BytesIO()
|
||||
fp.write('hello world!')
|
||||
fp.seek(0)
|
||||
|
||||
with fake_azure_storage() as s:
|
||||
s.stream_write('hello', fp)
|
||||
|
||||
assert s.get_content('hello') == 'hello world!'
|
||||
|
||||
@pytest.mark.parametrize('chunk_size', [
|
||||
(1),
|
||||
(5),
|
||||
(10),
|
||||
])
|
||||
def test_chunked_uploading(chunk_size):
|
||||
with fake_azure_storage() as s:
|
||||
string_data = 'hello world!'
|
||||
chunks = [string_data[index:index+chunk_size] for index in range(0, len(string_data), chunk_size)]
|
||||
|
||||
uuid, metadata = s.initiate_chunked_upload()
|
||||
start_index = 0
|
||||
|
||||
for chunk in chunks:
|
||||
fp = io.BytesIO()
|
||||
fp.write(chunk)
|
||||
fp.seek(0)
|
||||
|
||||
total_bytes_written, metadata, error = s.stream_upload_chunk(uuid, start_index, -1, fp,
|
||||
metadata)
|
||||
assert total_bytes_written == len(chunk)
|
||||
assert metadata
|
||||
assert not error
|
||||
|
||||
start_index += total_bytes_written
|
||||
|
||||
s.complete_chunked_upload(uuid, 'chunked', metadata)
|
||||
assert s.get_content('chunked') == string_data
|
||||
|
||||
def test_get_direct_download_url():
|
||||
with fake_azure_storage() as s:
|
||||
s.put_content('hello', 'world')
|
||||
assert 'sig' in s.get_direct_download_url('hello')
|
||||
|
||||
def test_copy_to():
|
||||
files = {}
|
||||
|
||||
with fake_azure_storage(files=files) as s:
|
||||
s.put_content('hello', 'hello world')
|
||||
with fake_azure_storage(files=files) as s2:
|
||||
s.copy_to(s2, 'hello')
|
||||
assert s2.exists('hello')
|
258
storage/test/test_cloud_storage.py
Normal file
258
storage/test/test_cloud_storage.py
Normal file
|
@ -0,0 +1,258 @@
|
|||
import os
|
||||
|
||||
from StringIO import StringIO
|
||||
|
||||
import pytest
|
||||
|
||||
import moto
|
||||
import boto
|
||||
|
||||
from moto import mock_s3_deprecated as mock_s3
|
||||
|
||||
from storage import S3Storage, StorageContext
|
||||
from storage.cloud import _CloudStorage, _PartUploadMetadata
|
||||
from storage.cloud import _CHUNKS_KEY
|
||||
|
||||
_TEST_CONTENT = os.urandom(1024)
|
||||
_TEST_BUCKET = 'some_bucket'
|
||||
_TEST_USER = 'someuser'
|
||||
_TEST_PASSWORD = 'somepassword'
|
||||
_TEST_PATH = 'some/cool/path'
|
||||
_TEST_CONTEXT = StorageContext('nyc', None, None, None, None)
|
||||
|
||||
|
||||
@pytest.fixture(scope='function')
|
||||
def storage_engine():
|
||||
with mock_s3():
|
||||
# Create a test bucket and put some test content.
|
||||
boto.connect_s3().create_bucket(_TEST_BUCKET)
|
||||
engine = S3Storage(_TEST_CONTEXT, 'some/path', _TEST_BUCKET, _TEST_USER, _TEST_PASSWORD)
|
||||
engine.put_content(_TEST_PATH, _TEST_CONTENT)
|
||||
|
||||
yield engine
|
||||
|
||||
|
||||
def test_basicop(storage_engine):
|
||||
# Ensure the content exists.
|
||||
assert storage_engine.exists(_TEST_PATH)
|
||||
|
||||
# Verify it can be retrieved.
|
||||
assert storage_engine.get_content(_TEST_PATH) == _TEST_CONTENT
|
||||
|
||||
# Retrieve a checksum for the content.
|
||||
storage_engine.get_checksum(_TEST_PATH)
|
||||
|
||||
# Remove the file.
|
||||
storage_engine.remove(_TEST_PATH)
|
||||
|
||||
# Ensure it no longer exists.
|
||||
with pytest.raises(IOError):
|
||||
storage_engine.get_content(_TEST_PATH)
|
||||
|
||||
with pytest.raises(IOError):
|
||||
storage_engine.get_checksum(_TEST_PATH)
|
||||
|
||||
assert not storage_engine.exists(_TEST_PATH)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('bucket, username, password', [
|
||||
pytest.param(_TEST_BUCKET, _TEST_USER, _TEST_PASSWORD, id='same credentials'),
|
||||
pytest.param('another_bucket', 'blech', 'password', id='different credentials'),
|
||||
])
|
||||
def test_copy(bucket, username, password, storage_engine):
|
||||
# Copy the content to another engine.
|
||||
another_engine = S3Storage(_TEST_CONTEXT, 'another/path', _TEST_BUCKET, _TEST_USER,
|
||||
_TEST_PASSWORD)
|
||||
boto.connect_s3().create_bucket('another_bucket')
|
||||
storage_engine.copy_to(another_engine, _TEST_PATH)
|
||||
|
||||
# Verify it can be retrieved.
|
||||
assert another_engine.get_content(_TEST_PATH) == _TEST_CONTENT
|
||||
|
||||
|
||||
def test_copy_with_error(storage_engine):
|
||||
another_engine = S3Storage(_TEST_CONTEXT, 'another/path', 'anotherbucket', 'foo',
|
||||
'bar')
|
||||
|
||||
with pytest.raises(IOError):
|
||||
storage_engine.copy_to(another_engine, _TEST_PATH)
|
||||
|
||||
|
||||
def test_stream_read(storage_engine):
|
||||
# Read the streaming content.
|
||||
data = ''.join(storage_engine.stream_read(_TEST_PATH))
|
||||
assert data == _TEST_CONTENT
|
||||
|
||||
|
||||
def test_stream_read_file(storage_engine):
|
||||
with storage_engine.stream_read_file(_TEST_PATH) as f:
|
||||
assert f.read() == _TEST_CONTENT
|
||||
|
||||
|
||||
def test_stream_write(storage_engine):
|
||||
new_data = os.urandom(4096)
|
||||
storage_engine.stream_write(_TEST_PATH, StringIO(new_data), content_type='Cool/Type')
|
||||
assert storage_engine.get_content(_TEST_PATH) == new_data
|
||||
|
||||
|
||||
def test_stream_write_error():
|
||||
with mock_s3():
|
||||
# Create an engine but not the bucket.
|
||||
engine = S3Storage(_TEST_CONTEXT, 'some/path', _TEST_BUCKET, _TEST_USER, _TEST_PASSWORD)
|
||||
|
||||
# Attempt to write to the uncreated bucket, which should raise an error.
|
||||
with pytest.raises(IOError):
|
||||
engine.stream_write(_TEST_PATH, StringIO('hello world'), content_type='Cool/Type')
|
||||
|
||||
assert not engine.exists(_TEST_PATH)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('chunk_count', [
|
||||
0,
|
||||
1,
|
||||
50,
|
||||
])
|
||||
@pytest.mark.parametrize('force_client_side', [
|
||||
False,
|
||||
True
|
||||
])
|
||||
def test_chunk_upload(storage_engine, chunk_count, force_client_side):
|
||||
if chunk_count == 0 and force_client_side:
|
||||
return
|
||||
|
||||
upload_id, metadata = storage_engine.initiate_chunked_upload()
|
||||
final_data = ''
|
||||
|
||||
for index in range(0, chunk_count):
|
||||
chunk_data = os.urandom(1024)
|
||||
final_data = final_data + chunk_data
|
||||
bytes_written, new_metadata, error = storage_engine.stream_upload_chunk(upload_id, 0,
|
||||
len(chunk_data),
|
||||
StringIO(chunk_data),
|
||||
metadata)
|
||||
metadata = new_metadata
|
||||
|
||||
assert bytes_written == len(chunk_data)
|
||||
assert error is None
|
||||
assert len(metadata[_CHUNKS_KEY]) == index + 1
|
||||
|
||||
# Complete the chunked upload.
|
||||
storage_engine.complete_chunked_upload(upload_id, 'some/chunked/path', metadata,
|
||||
force_client_side=force_client_side)
|
||||
|
||||
# Ensure the file contents are valid.
|
||||
assert storage_engine.get_content('some/chunked/path') == final_data
|
||||
|
||||
|
||||
@pytest.mark.parametrize('chunk_count', [
|
||||
0,
|
||||
1,
|
||||
50,
|
||||
])
|
||||
def test_cancel_chunked_upload(storage_engine, chunk_count):
|
||||
upload_id, metadata = storage_engine.initiate_chunked_upload()
|
||||
|
||||
for _ in range(0, chunk_count):
|
||||
chunk_data = os.urandom(1024)
|
||||
_, new_metadata, _ = storage_engine.stream_upload_chunk(upload_id, 0,
|
||||
len(chunk_data),
|
||||
StringIO(chunk_data),
|
||||
metadata)
|
||||
metadata = new_metadata
|
||||
|
||||
# Cancel the upload.
|
||||
storage_engine.cancel_chunked_upload(upload_id, metadata)
|
||||
|
||||
# Ensure all chunks were deleted.
|
||||
for chunk in metadata[_CHUNKS_KEY]:
|
||||
assert not storage_engine.exists(chunk.path)
|
||||
|
||||
|
||||
def test_large_chunks_upload(storage_engine):
|
||||
# Make the max chunk size much smaller for testing.
|
||||
storage_engine.maximum_chunk_size = storage_engine.minimum_chunk_size * 2
|
||||
|
||||
upload_id, metadata = storage_engine.initiate_chunked_upload()
|
||||
|
||||
# Write a "super large" chunk, to ensure that it is broken into smaller chunks.
|
||||
chunk_data = os.urandom(int(storage_engine.maximum_chunk_size * 2.5))
|
||||
bytes_written, new_metadata, _ = storage_engine.stream_upload_chunk(upload_id, 0,
|
||||
-1,
|
||||
StringIO(chunk_data),
|
||||
metadata)
|
||||
assert len(chunk_data) == bytes_written
|
||||
|
||||
# Complete the chunked upload.
|
||||
storage_engine.complete_chunked_upload(upload_id, 'some/chunked/path', new_metadata)
|
||||
|
||||
# Ensure the file contents are valid.
|
||||
assert len(chunk_data) == len(storage_engine.get_content('some/chunked/path'))
|
||||
assert storage_engine.get_content('some/chunked/path') == chunk_data
|
||||
|
||||
|
||||
def test_large_chunks_with_ragged_edge(storage_engine):
|
||||
# Make the max chunk size much smaller for testing and force it to have a ragged edge.
|
||||
storage_engine.maximum_chunk_size = storage_engine.minimum_chunk_size * 2 + 10
|
||||
|
||||
upload_id, metadata = storage_engine.initiate_chunked_upload()
|
||||
|
||||
# Write a few "super large" chunks, to ensure that it is broken into smaller chunks.
|
||||
all_data = ''
|
||||
for _ in range(0, 2):
|
||||
chunk_data = os.urandom(int(storage_engine.maximum_chunk_size) + 20)
|
||||
bytes_written, new_metadata, _ = storage_engine.stream_upload_chunk(upload_id, 0,
|
||||
-1,
|
||||
StringIO(chunk_data),
|
||||
metadata)
|
||||
assert len(chunk_data) == bytes_written
|
||||
all_data = all_data + chunk_data
|
||||
metadata = new_metadata
|
||||
|
||||
# Complete the chunked upload.
|
||||
storage_engine.complete_chunked_upload(upload_id, 'some/chunked/path', new_metadata)
|
||||
|
||||
# Ensure the file contents are valid.
|
||||
assert len(all_data) == len(storage_engine.get_content('some/chunked/path'))
|
||||
assert storage_engine.get_content('some/chunked/path') == all_data
|
||||
|
||||
|
||||
@pytest.mark.parametrize('max_size, parts', [
|
||||
(50, [
|
||||
_PartUploadMetadata('foo', 0, 50),
|
||||
_PartUploadMetadata('foo', 50, 50),
|
||||
]),
|
||||
|
||||
(40, [
|
||||
_PartUploadMetadata('foo', 0, 25),
|
||||
_PartUploadMetadata('foo', 25, 25),
|
||||
_PartUploadMetadata('foo', 50, 25),
|
||||
_PartUploadMetadata('foo', 75, 25)
|
||||
]),
|
||||
|
||||
(51, [
|
||||
_PartUploadMetadata('foo', 0, 50),
|
||||
_PartUploadMetadata('foo', 50, 50),
|
||||
]),
|
||||
|
||||
(49, [
|
||||
_PartUploadMetadata('foo', 0, 25),
|
||||
_PartUploadMetadata('foo', 25, 25),
|
||||
_PartUploadMetadata('foo', 50, 25),
|
||||
_PartUploadMetadata('foo', 75, 25),
|
||||
]),
|
||||
|
||||
(99, [
|
||||
_PartUploadMetadata('foo', 0, 50),
|
||||
_PartUploadMetadata('foo', 50, 50),
|
||||
]),
|
||||
|
||||
(100, [
|
||||
_PartUploadMetadata('foo', 0, 100),
|
||||
]),
|
||||
])
|
||||
def test_rechunked(max_size, parts):
|
||||
chunk = _PartUploadMetadata('foo', 0, 100)
|
||||
rechunked = list(_CloudStorage._rechunk(chunk, max_size))
|
||||
assert len(rechunked) == len(parts)
|
||||
for index, chunk in enumerate(rechunked):
|
||||
assert chunk == parts[index]
|
80
storage/test/test_cloudfront.py
Normal file
80
storage/test/test_cloudfront.py
Normal file
|
@ -0,0 +1,80 @@
|
|||
import pytest
|
||||
|
||||
from contextlib import contextmanager
|
||||
from mock import patch
|
||||
from moto import mock_s3_deprecated as mock_s3
|
||||
import boto
|
||||
|
||||
from app import config_provider
|
||||
from storage import CloudFrontedS3Storage, StorageContext
|
||||
from util.ipresolver import IPResolver
|
||||
from util.ipresolver.test.test_ipresolver import test_aws_ip, aws_ip_range_data, test_ip_range_cache
|
||||
from test.fixtures import *
|
||||
|
||||
_TEST_CONTENT = os.urandom(1024)
|
||||
_TEST_BUCKET = 'some_bucket'
|
||||
_TEST_USER = 'someuser'
|
||||
_TEST_PASSWORD = 'somepassword'
|
||||
_TEST_PATH = 'some/cool/path'
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def ipranges_populated(request):
|
||||
return request.param
|
||||
|
||||
@pytest.fixture()
|
||||
def test_empty_ip_range_cache(empty_range_data):
|
||||
sync_token = empty_range_data['syncToken']
|
||||
all_amazon = IPResolver._parse_amazon_ranges(empty_range_data)
|
||||
fake_cache = {
|
||||
'sync_token': sync_token,
|
||||
}
|
||||
return fake_cache
|
||||
|
||||
@pytest.fixture()
|
||||
def empty_range_data():
|
||||
empty_range_data = {
|
||||
'syncToken': 123456789,
|
||||
'prefixes': [],
|
||||
}
|
||||
return empty_range_data
|
||||
|
||||
@mock_s3
|
||||
def test_direct_download(test_aws_ip, test_empty_ip_range_cache, test_ip_range_cache, aws_ip_range_data, ipranges_populated, app):
|
||||
ipresolver = IPResolver(app)
|
||||
if ipranges_populated:
|
||||
ipresolver.sync_token = test_ip_range_cache['sync_token'] if ipranges_populated else test_empty_ip_range_cache['sync_token']
|
||||
ipresolver.amazon_ranges = test_ip_range_cache['all_amazon'] if ipranges_populated else test_empty_ip_range_cache['all_amazon']
|
||||
context = StorageContext('nyc', None, None, config_provider, ipresolver)
|
||||
|
||||
# Create a test bucket and put some test content.
|
||||
boto.connect_s3().create_bucket(_TEST_BUCKET)
|
||||
|
||||
engine = CloudFrontedS3Storage(context, 'cloudfrontdomain', 'keyid', 'test/data/test.pem', 'some/path',
|
||||
_TEST_BUCKET, _TEST_USER, _TEST_PASSWORD)
|
||||
engine.put_content(_TEST_PATH, _TEST_CONTENT)
|
||||
assert engine.exists(_TEST_PATH)
|
||||
|
||||
# Request a direct download URL for a request from a known AWS IP, and ensure we are returned an S3 URL.
|
||||
assert 's3.amazonaws.com' in engine.get_direct_download_url(_TEST_PATH, test_aws_ip)
|
||||
|
||||
if ipranges_populated:
|
||||
# Request a direct download URL for a request from a non-AWS IP, and ensure we are returned a CloudFront URL.
|
||||
assert 'cloudfrontdomain' in engine.get_direct_download_url(_TEST_PATH, '1.2.3.4')
|
||||
else:
|
||||
# Request a direct download URL for a request from a non-AWS IP, but since IP Ranges isn't populated, we still
|
||||
# get back an S3 URL.
|
||||
assert 's3.amazonaws.com' in engine.get_direct_download_url(_TEST_PATH, '1.2.3.4')
|
||||
|
||||
@mock_s3
|
||||
def test_direct_download_no_ip(test_aws_ip, aws_ip_range_data, ipranges_populated, app):
|
||||
ipresolver = IPResolver(app)
|
||||
context = StorageContext('nyc', None, None, config_provider, ipresolver)
|
||||
|
||||
# Create a test bucket and put some test content.
|
||||
boto.connect_s3().create_bucket(_TEST_BUCKET)
|
||||
|
||||
engine = CloudFrontedS3Storage(context, 'cloudfrontdomain', 'keyid', 'test/data/test.pem', 'some/path',
|
||||
_TEST_BUCKET, _TEST_USER, _TEST_PASSWORD)
|
||||
engine.put_content(_TEST_PATH, _TEST_CONTENT)
|
||||
assert engine.exists(_TEST_PATH)
|
||||
assert 's3.amazonaws.com' in engine.get_direct_download_url(_TEST_PATH)
|
95
storage/test/test_storageproxy.py
Normal file
95
storage/test/test_storageproxy.py
Normal file
|
@ -0,0 +1,95 @@
|
|||
import os
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
from flask import Flask
|
||||
from flask_testing import LiveServerTestCase
|
||||
|
||||
from storage import Storage
|
||||
from util.security.instancekeys import InstanceKeys
|
||||
|
||||
from test.registry.liveserverfixture import *
|
||||
from test.fixtures import *
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def is_proxying_enabled(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def server_executor(app):
|
||||
def reload_app(server_hostname):
|
||||
# Close any existing connection.
|
||||
close_db_filter(None)
|
||||
|
||||
# Reload the database config.
|
||||
app.config['SERVER_HOSTNAME'] = server_hostname[len('http://'):]
|
||||
configure(app.config)
|
||||
return 'OK'
|
||||
|
||||
executor = LiveServerExecutor()
|
||||
executor.register('reload_app', reload_app)
|
||||
return executor
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def liveserver_app(app, server_executor, init_db_path, is_proxying_enabled):
|
||||
server_executor.apply_blueprint_to_app(app)
|
||||
|
||||
if os.environ.get('DEBUG') == 'true':
|
||||
app.config['DEBUG'] = True
|
||||
|
||||
app.config['TESTING'] = True
|
||||
app.config['INSTANCE_SERVICE_KEY_KID_LOCATION'] = 'test/data/test.kid'
|
||||
app.config['INSTANCE_SERVICE_KEY_LOCATION'] = 'test/data/test.pem'
|
||||
app.config['INSTANCE_SERVICE_KEY_SERVICE'] = 'quay'
|
||||
|
||||
app.config['FEATURE_PROXY_STORAGE'] = is_proxying_enabled
|
||||
|
||||
app.config['DISTRIBUTED_STORAGE_CONFIG'] = {
|
||||
'test': ['FakeStorage', {}],
|
||||
}
|
||||
app.config['DISTRIBUTED_STORAGE_PREFERENCE'] = ['test']
|
||||
return app
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def instance_keys(liveserver_app):
|
||||
return InstanceKeys(liveserver_app)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def storage(liveserver_app, instance_keys):
|
||||
return Storage(liveserver_app, instance_keys=instance_keys)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def app_reloader(liveserver, server_executor):
|
||||
server_executor.on(liveserver).reload_app(liveserver.url)
|
||||
yield
|
||||
|
||||
|
||||
@pytest.mark.skipif(os.environ.get('TEST_DATABASE_URI') is not None,
|
||||
reason="not supported for non SQLite testing")
|
||||
def test_storage_proxy_auth(storage, liveserver_app, liveserver_session, is_proxying_enabled,
|
||||
app_reloader):
|
||||
# Activate direct download on the fake storage.
|
||||
storage.put_content(['test'], 'supports_direct_download', 'true')
|
||||
|
||||
# Get the unwrapped URL.
|
||||
direct_download_url = storage.get_direct_download_url(['test'], 'somepath')
|
||||
proxy_index = direct_download_url.find('/_storage_proxy/')
|
||||
if is_proxying_enabled:
|
||||
assert proxy_index >= 0
|
||||
else:
|
||||
assert proxy_index == -1
|
||||
|
||||
# Ensure that auth returns the expected value.
|
||||
headers = {
|
||||
'X-Original-URI': direct_download_url[proxy_index:] if proxy_index else 'someurihere'
|
||||
}
|
||||
|
||||
resp = liveserver_session.get('_storage_proxy_auth', headers=headers)
|
||||
assert resp.status_code == (500 if not is_proxying_enabled else 200)
|
327
storage/test/test_swift.py
Normal file
327
storage/test/test_swift.py
Normal file
|
@ -0,0 +1,327 @@
|
|||
import io
|
||||
import pytest
|
||||
import hashlib
|
||||
import copy
|
||||
|
||||
from collections import defaultdict
|
||||
from mock import MagicMock, patch
|
||||
|
||||
from storage import StorageContext
|
||||
from storage.swift import SwiftStorage, _EMPTY_SEGMENTS_KEY
|
||||
from swiftclient.client import ClientException
|
||||
|
||||
base_args = {
|
||||
'context': StorageContext('nyc', None, None, None, None),
|
||||
'swift_container': 'container-name',
|
||||
'storage_path': '/basepath',
|
||||
'auth_url': 'https://auth.com',
|
||||
'swift_user': 'root',
|
||||
'swift_password': 'password',
|
||||
}
|
||||
|
||||
class MockSwiftStorage(SwiftStorage):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(MockSwiftStorage, self).__init__(*args, **kwargs)
|
||||
self._connection = MagicMock()
|
||||
|
||||
def _get_connection(self):
|
||||
return self._connection
|
||||
|
||||
class FakeSwiftStorage(SwiftStorage):
|
||||
def __init__(self, fail_checksum=False,connection=None, *args, **kwargs):
|
||||
super(FakeSwiftStorage, self).__init__(*args, **kwargs)
|
||||
self._connection = connection or FakeSwift(fail_checksum=fail_checksum,
|
||||
temp_url_key=kwargs.get('temp_url_key'))
|
||||
|
||||
def _get_connection(self):
|
||||
return self._connection
|
||||
|
||||
|
||||
class FakeSwift(object):
|
||||
def __init__(self, fail_checksum=False, temp_url_key=None):
|
||||
self.containers = defaultdict(dict)
|
||||
self.fail_checksum = fail_checksum
|
||||
self.temp_url_key = temp_url_key
|
||||
|
||||
def get_auth(self):
|
||||
if self.temp_url_key == 'exception':
|
||||
raise ClientException('I failed!')
|
||||
|
||||
return 'http://fake/swift', None
|
||||
|
||||
def head_object(self, container, path):
|
||||
return self.containers.get(container, {}).get(path, {}).get('headers', None)
|
||||
|
||||
def copy_object(self, container, path, target):
|
||||
pieces = target.split('/', 2)
|
||||
_, content = self.get_object(container, path)
|
||||
self.put_object(pieces[1], pieces[2], content)
|
||||
|
||||
def get_container(self, container, prefix=None, full_listing=None):
|
||||
container_entries = self.containers[container]
|
||||
objs = []
|
||||
for path, data in list(container_entries.iteritems()):
|
||||
if not prefix or path.startswith(prefix):
|
||||
objs.append({
|
||||
'name': path,
|
||||
'bytes': len(data['content']),
|
||||
})
|
||||
return {}, objs
|
||||
|
||||
def put_object(self, container, path, content, chunk_size=None, content_type=None, headers=None):
|
||||
if not isinstance(content, str):
|
||||
if hasattr(content, 'read'):
|
||||
content = content.read()
|
||||
else:
|
||||
content = ''.join(content)
|
||||
|
||||
self.containers[container][path] = {
|
||||
'content': content,
|
||||
'chunk_size': chunk_size,
|
||||
'content_type': content_type,
|
||||
'headers': headers or {'is': True},
|
||||
}
|
||||
|
||||
digest = hashlib.md5()
|
||||
digest.update(content)
|
||||
return digest.hexdigest() if not self.fail_checksum else 'invalid'
|
||||
|
||||
def get_object(self, container, path, resp_chunk_size=None):
|
||||
data = self.containers[container].get(path, {})
|
||||
if 'X-Object-Manifest' in data['headers']:
|
||||
new_contents = []
|
||||
prefix = data['headers']['X-Object-Manifest']
|
||||
for key, value in self.containers[container].iteritems():
|
||||
if ('container-name/' + key).startswith(prefix):
|
||||
new_contents.append((key, value['content']))
|
||||
|
||||
new_contents.sort(key=lambda value: value[0])
|
||||
|
||||
data = dict(data)
|
||||
data['content'] = ''.join([nc[1] for nc in new_contents])
|
||||
return bool(data), data.get('content')
|
||||
|
||||
return bool(data), data.get('content')
|
||||
|
||||
def delete_object(self, container, path):
|
||||
self.containers[container].pop(path, None)
|
||||
|
||||
|
||||
class FakeQueue(object):
|
||||
def __init__(self):
|
||||
self.items = []
|
||||
|
||||
def get(self):
|
||||
if not self.items:
|
||||
return None
|
||||
|
||||
return self.items.pop()
|
||||
|
||||
def put(self, names, item, available_after=0):
|
||||
self.items.append({
|
||||
'names': names,
|
||||
'item': item,
|
||||
'available_after': available_after,
|
||||
})
|
||||
|
||||
def test_fixed_path_concat():
|
||||
swift = MockSwiftStorage(**base_args)
|
||||
swift.exists('object/path')
|
||||
swift._get_connection().head_object.assert_called_with('container-name', 'basepath/object/path')
|
||||
|
||||
def test_simple_path_concat():
|
||||
simple_concat_args = dict(base_args)
|
||||
simple_concat_args['simple_path_concat'] = True
|
||||
swift = MockSwiftStorage(**simple_concat_args)
|
||||
swift.exists('object/path')
|
||||
swift._get_connection().head_object.assert_called_with('container-name', 'basepathobject/path')
|
||||
|
||||
def test_delete_unknown_path():
|
||||
swift = SwiftStorage(**base_args)
|
||||
with pytest.raises(IOError):
|
||||
swift.remove('someunknownpath')
|
||||
|
||||
def test_simple_put_get():
|
||||
swift = FakeSwiftStorage(**base_args)
|
||||
assert not swift.exists('somepath')
|
||||
|
||||
swift.put_content('somepath', 'hello world!')
|
||||
assert swift.exists('somepath')
|
||||
assert swift.get_content('somepath') == 'hello world!'
|
||||
|
||||
def test_stream_read_write():
|
||||
swift = FakeSwiftStorage(**base_args)
|
||||
assert not swift.exists('somepath')
|
||||
|
||||
swift.stream_write('somepath', io.BytesIO('some content here'))
|
||||
assert swift.exists('somepath')
|
||||
assert swift.get_content('somepath') == 'some content here'
|
||||
assert ''.join(list(swift.stream_read('somepath'))) == 'some content here'
|
||||
|
||||
def test_stream_read_write_invalid_checksum():
|
||||
swift = FakeSwiftStorage(fail_checksum=True, **base_args)
|
||||
assert not swift.exists('somepath')
|
||||
|
||||
with pytest.raises(IOError):
|
||||
swift.stream_write('somepath', io.BytesIO('some content here'))
|
||||
|
||||
def test_remove():
|
||||
swift = FakeSwiftStorage(**base_args)
|
||||
assert not swift.exists('somepath')
|
||||
|
||||
swift.put_content('somepath', 'hello world!')
|
||||
assert swift.exists('somepath')
|
||||
|
||||
swift.remove('somepath')
|
||||
assert not swift.exists('somepath')
|
||||
|
||||
def test_copy_to():
|
||||
swift = FakeSwiftStorage(**base_args)
|
||||
|
||||
modified_args = copy.deepcopy(base_args)
|
||||
modified_args['swift_container'] = 'another_container'
|
||||
|
||||
another_swift = FakeSwiftStorage(connection=swift._connection, **modified_args)
|
||||
|
||||
swift.put_content('somepath', 'some content here')
|
||||
swift.copy_to(another_swift, 'somepath')
|
||||
|
||||
assert swift.exists('somepath')
|
||||
assert another_swift.exists('somepath')
|
||||
|
||||
assert swift.get_content('somepath') == 'some content here'
|
||||
assert another_swift.get_content('somepath') == 'some content here'
|
||||
|
||||
def test_copy_to_different():
|
||||
swift = FakeSwiftStorage(**base_args)
|
||||
|
||||
modified_args = copy.deepcopy(base_args)
|
||||
modified_args['swift_user'] = 'foobarbaz'
|
||||
modified_args['swift_container'] = 'another_container'
|
||||
|
||||
another_swift = FakeSwiftStorage(**modified_args)
|
||||
|
||||
swift.put_content('somepath', 'some content here')
|
||||
swift.copy_to(another_swift, 'somepath')
|
||||
|
||||
assert swift.exists('somepath')
|
||||
assert another_swift.exists('somepath')
|
||||
|
||||
assert swift.get_content('somepath') == 'some content here'
|
||||
assert another_swift.get_content('somepath') == 'some content here'
|
||||
|
||||
def test_checksum():
|
||||
swift = FakeSwiftStorage(**base_args)
|
||||
swift.put_content('somepath', 'hello world!')
|
||||
assert swift.get_checksum('somepath') is not None
|
||||
|
||||
@pytest.mark.parametrize('read_until_end', [
|
||||
(True,),
|
||||
(False,),
|
||||
])
|
||||
@pytest.mark.parametrize('max_chunk_size', [
|
||||
(10000000),
|
||||
(10),
|
||||
(5),
|
||||
(2),
|
||||
(1),
|
||||
])
|
||||
@pytest.mark.parametrize('chunks', [
|
||||
(['this', 'is', 'some', 'chunked', 'data', '']),
|
||||
(['this is a very large chunk of data', '']),
|
||||
(['h', 'e', 'l', 'l', 'o', '']),
|
||||
])
|
||||
def test_chunked_upload(chunks, max_chunk_size, read_until_end):
|
||||
swift = FakeSwiftStorage(**base_args)
|
||||
uuid, metadata = swift.initiate_chunked_upload()
|
||||
|
||||
offset = 0
|
||||
|
||||
with patch('storage.swift._MAXIMUM_SEGMENT_SIZE', max_chunk_size):
|
||||
for chunk in chunks:
|
||||
chunk_length = len(chunk) if not read_until_end else -1
|
||||
bytes_written, metadata, error = swift.stream_upload_chunk(uuid, offset, chunk_length,
|
||||
io.BytesIO(chunk), metadata)
|
||||
assert error is None
|
||||
assert len(chunk) == bytes_written
|
||||
offset += len(chunk)
|
||||
|
||||
swift.complete_chunked_upload(uuid, 'somepath', metadata)
|
||||
assert swift.get_content('somepath') == ''.join(chunks)
|
||||
|
||||
# Ensure each of the segments exist.
|
||||
for segment in metadata['segments']:
|
||||
assert swift.exists(segment.path)
|
||||
|
||||
# Delete the file and ensure all of its segments were removed.
|
||||
swift.remove('somepath')
|
||||
assert not swift.exists('somepath')
|
||||
|
||||
for segment in metadata['segments']:
|
||||
assert not swift.exists(segment.path)
|
||||
|
||||
|
||||
def test_cancel_chunked_upload():
|
||||
chunk_cleanup_queue = FakeQueue()
|
||||
|
||||
args = dict(base_args)
|
||||
args['context'] = StorageContext('nyc', None, chunk_cleanup_queue, None, None)
|
||||
|
||||
swift = FakeSwiftStorage(**args)
|
||||
uuid, metadata = swift.initiate_chunked_upload()
|
||||
|
||||
chunks = ['this', 'is', 'some', 'chunked', 'data', '']
|
||||
offset = 0
|
||||
for chunk in chunks:
|
||||
bytes_written, metadata, error = swift.stream_upload_chunk(uuid, offset, len(chunk),
|
||||
io.BytesIO(chunk), metadata)
|
||||
assert error is None
|
||||
assert len(chunk) == bytes_written
|
||||
offset += len(chunk)
|
||||
|
||||
swift.cancel_chunked_upload(uuid, metadata)
|
||||
|
||||
found = chunk_cleanup_queue.get()
|
||||
assert found is not None
|
||||
|
||||
|
||||
def test_empty_chunks_queued_for_deletion():
|
||||
chunk_cleanup_queue = FakeQueue()
|
||||
args = dict(base_args)
|
||||
args['context'] = StorageContext('nyc', None, chunk_cleanup_queue, None, None)
|
||||
|
||||
swift = FakeSwiftStorage(**args)
|
||||
uuid, metadata = swift.initiate_chunked_upload()
|
||||
|
||||
chunks = ['this', '', 'is', 'some', '', 'chunked', 'data', '']
|
||||
offset = 0
|
||||
for chunk in chunks:
|
||||
length = len(chunk)
|
||||
if length == 0:
|
||||
length = 1
|
||||
|
||||
bytes_written, metadata, error = swift.stream_upload_chunk(uuid, offset, length,
|
||||
io.BytesIO(chunk), metadata)
|
||||
assert error is None
|
||||
assert len(chunk) == bytes_written
|
||||
offset += len(chunk)
|
||||
|
||||
swift.complete_chunked_upload(uuid, 'somepath', metadata)
|
||||
assert ''.join(chunks) == swift.get_content('somepath')
|
||||
|
||||
# Check the chunk deletion queue and ensure we have the last chunk queued.
|
||||
found = chunk_cleanup_queue.get()
|
||||
assert found is not None
|
||||
|
||||
found2 = chunk_cleanup_queue.get()
|
||||
assert found2 is None
|
||||
|
||||
@pytest.mark.parametrize('temp_url_key, expects_url', [
|
||||
(None, False),
|
||||
('foobarbaz', True),
|
||||
('exception', False),
|
||||
])
|
||||
def test_get_direct_download_url(temp_url_key, expects_url):
|
||||
swift = FakeSwiftStorage(temp_url_key=temp_url_key, **base_args)
|
||||
swift.put_content('somepath', 'hello world!')
|
||||
assert (swift.get_direct_download_url('somepath') is not None) == expects_url
|
Reference in a new issue