initial import for Open Source 🎉

This commit is contained in:
Jimmy Zelinskie 2019-11-12 11:09:47 -05:00
parent 1898c361f3
commit 9c0dd3b722
2048 changed files with 218743 additions and 0 deletions

82
storage/__init__.py Normal file
View file

@ -0,0 +1,82 @@
from storage.local import LocalStorage
from storage.cloud import (S3Storage, GoogleCloudStorage, RadosGWStorage, CloudFrontedS3Storage,
RHOCSStorage)
from storage.fakestorage import FakeStorage
from storage.distributedstorage import DistributedStorage
from storage.swift import SwiftStorage
from storage.azurestorage import AzureStorage
from storage.downloadproxy import DownloadProxy
from util.ipresolver import NoopIPResolver
TYPE_LOCAL_STORAGE = 'LocalStorage'
STORAGE_DRIVER_CLASSES = {
'LocalStorage': LocalStorage,
'S3Storage': S3Storage,
'GoogleCloudStorage': GoogleCloudStorage,
'RadosGWStorage': RadosGWStorage,
'SwiftStorage': SwiftStorage,
'CloudFrontedS3Storage': CloudFrontedS3Storage,
'AzureStorage': AzureStorage,
'RHOCSStorage': RHOCSStorage,
}
def get_storage_driver(location, metric_queue, chunk_cleanup_queue, config_provider, ip_resolver,
storage_params):
""" Returns a storage driver class for the given storage configuration
(a pair of string name and a dict of parameters). """
driver = storage_params[0]
parameters = storage_params[1]
driver_class = STORAGE_DRIVER_CLASSES.get(driver, FakeStorage)
context = StorageContext(location, metric_queue, chunk_cleanup_queue, config_provider,
ip_resolver)
return driver_class(context, **parameters)
class StorageContext(object):
def __init__(self, location, metric_queue, chunk_cleanup_queue, config_provider, ip_resolver):
self.location = location
self.metric_queue = metric_queue
self.chunk_cleanup_queue = chunk_cleanup_queue
self.config_provider = config_provider
self.ip_resolver = ip_resolver or NoopIPResolver()
class Storage(object):
def __init__(self, app=None, metric_queue=None, chunk_cleanup_queue=None, instance_keys=None,
config_provider=None, ip_resolver=None):
self.app = app
if app is not None:
self.state = self.init_app(app, metric_queue, chunk_cleanup_queue, instance_keys,
config_provider, ip_resolver)
else:
self.state = None
def init_app(self, app, metric_queue, chunk_cleanup_queue, instance_keys, config_provider,
ip_resolver):
storages = {}
for location, storage_params in app.config.get('DISTRIBUTED_STORAGE_CONFIG').items():
storages[location] = get_storage_driver(location, metric_queue, chunk_cleanup_queue,
config_provider, ip_resolver, storage_params)
preference = app.config.get('DISTRIBUTED_STORAGE_PREFERENCE', None)
if not preference:
preference = storages.keys()
default_locations = app.config.get('DISTRIBUTED_STORAGE_DEFAULT_LOCATIONS') or []
download_proxy = None
if app.config.get('FEATURE_PROXY_STORAGE', False) and instance_keys is not None:
download_proxy = DownloadProxy(app, instance_keys)
d_storage = DistributedStorage(storages, preference, default_locations, download_proxy,
app.config.get('REGISTRY_STATE') == 'readonly')
# register extension with app
app.extensions = getattr(app, 'extensions', {})
app.extensions['storage'] = d_storage
return d_storage
def __getattr__(self, name):
return getattr(self.state, name, None)

326
storage/azurestorage.py Normal file
View file

@ -0,0 +1,326 @@
""" Azure storage driver.
Based on: https://docs.microsoft.com/en-us/azure/storage/blobs/storage-python-how-to-use-blob-storage
"""
import logging
import os
import io
import uuid
import copy
import time
from datetime import datetime, timedelta
from azure.common import AzureException
from azure.storage.blob import BlockBlobService, ContentSettings, BlobBlock, ContainerPermissions
from azure.storage.common.models import CorsRule
from storage.basestorage import BaseStorage
from util.registry.filelike import LimitingStream, READ_UNTIL_END
logger = logging.getLogger(__name__)
_COPY_POLL_SLEEP = 0.25 # seconds
_MAX_COPY_POLL_COUNT = 120 # _COPY_POLL_SLEEPs => 120s
_MAX_BLOCK_SIZE = 1024 * 1024 * 100 # 100MB
_BLOCKS_KEY = 'blocks'
_CONTENT_TYPE_KEY = 'content-type'
class AzureStorage(BaseStorage):
def __init__(self, context, azure_container, storage_path, azure_account_name,
azure_account_key=None, sas_token=None, connection_string=None,
is_emulated=False, socket_timeout=20, request_timeout=20):
super(AzureStorage, self).__init__()
self._context = context
self._storage_path = storage_path.lstrip('/')
self._azure_account_name = azure_account_key
self._azure_account_key = azure_account_key
self._azure_sas_token = sas_token
self._azure_container = azure_container
self._azure_connection_string = connection_string
self._request_timeout = request_timeout
self._blob_service = BlockBlobService(account_name=azure_account_name,
account_key=azure_account_key,
sas_token=sas_token,
is_emulated=is_emulated,
connection_string=connection_string,
socket_timeout=socket_timeout)
def _blob_name_from_path(self, object_path):
if '..' in object_path:
raise Exception('Relative paths are not allowed; found %s' % object_path)
return os.path.join(self._storage_path, object_path).rstrip('/')
def _upload_blob_path_from_uuid(self, uuid):
return self._blob_name_from_path(self._upload_blob_name_from_uuid(uuid))
def _upload_blob_name_from_uuid(self, uuid):
return 'uploads/{0}'.format(uuid)
def get_direct_download_url(self, object_path, request_ip=None, expires_in=60,
requires_cors=False, head=False):
blob_name = self._blob_name_from_path(object_path)
try:
sas_token = self._blob_service.generate_blob_shared_access_signature(
self._azure_container,
blob_name,
ContainerPermissions.READ,
datetime.utcnow() + timedelta(seconds=expires_in))
blob_url = self._blob_service.make_blob_url(self._azure_container, blob_name,
sas_token=sas_token)
except AzureException:
logger.exception('Exception when trying to get direct download for path %s', object_path)
raise IOError('Exception when trying to get direct download')
return blob_url
def validate(self, client):
super(AzureStorage, self).validate(client)
self._blob_service.get_container_properties(self._azure_container,
timeout=self._request_timeout)
def get_content(self, path):
blob_name = self._blob_name_from_path(path)
try:
blob = self._blob_service.get_blob_to_bytes(self._azure_container, blob_name)
except AzureException:
logger.exception('Exception when trying to get path %s', path)
raise IOError('Exception when trying to get path')
return blob.content
def put_content(self, path, content):
blob_name = self._blob_name_from_path(path)
try:
self._blob_service.create_blob_from_bytes(self._azure_container, blob_name, content)
except AzureException:
logger.exception('Exception when trying to put path %s', path)
raise IOError('Exception when trying to put path')
def stream_read(self, path):
with self.stream_read_file(path) as f:
while True:
buf = f.read(self.buffer_size)
if not buf:
break
yield buf
def stream_read_file(self, path):
blob_name = self._blob_name_from_path(path)
try:
output_stream = io.BytesIO()
self._blob_service.get_blob_to_stream(self._azure_container, blob_name, output_stream)
output_stream.seek(0)
except AzureException:
logger.exception('Exception when trying to stream_file_read path %s', path)
raise IOError('Exception when trying to stream_file_read path')
return output_stream
def stream_write(self, path, fp, content_type=None, content_encoding=None):
blob_name = self._blob_name_from_path(path)
content_settings = ContentSettings(
content_type=content_type,
content_encoding=content_encoding,
)
try:
self._blob_service.create_blob_from_stream(self._azure_container, blob_name, fp,
content_settings=content_settings)
except AzureException:
logger.exception('Exception when trying to stream_write path %s', path)
raise IOError('Exception when trying to stream_write path')
def exists(self, path):
blob_name = self._blob_name_from_path(path)
try:
return self._blob_service.exists(self._azure_container, blob_name,
timeout=self._request_timeout)
except AzureException:
logger.exception('Exception when trying to check exists path %s', path)
raise IOError('Exception when trying to check exists path')
def remove(self, path):
blob_name = self._blob_name_from_path(path)
try:
self._blob_service.delete_blob(self._azure_container, blob_name)
except AzureException:
logger.exception('Exception when trying to remove path %s', path)
raise IOError('Exception when trying to remove path')
def get_checksum(self, path):
blob_name = self._blob_name_from_path(path)
try:
blob = self._blob_service.get_blob_properties(self._azure_container, blob_name)
except AzureException:
logger.exception('Exception when trying to get_checksum for path %s', path)
raise IOError('Exception when trying to get_checksum path')
return blob.properties.etag
def initiate_chunked_upload(self):
random_uuid = str(uuid.uuid4())
metadata = {
_BLOCKS_KEY: [],
_CONTENT_TYPE_KEY: None,
}
return random_uuid, metadata
def stream_upload_chunk(self, uuid, offset, length, in_fp, storage_metadata, content_type=None):
if length == 0:
return 0, storage_metadata, None
upload_blob_path = self._upload_blob_path_from_uuid(uuid)
new_metadata = copy.deepcopy(storage_metadata)
total_bytes_written = 0
while True:
current_length = length - total_bytes_written
max_length = (min(current_length, _MAX_BLOCK_SIZE) if length != READ_UNTIL_END
else _MAX_BLOCK_SIZE)
if max_length <= 0:
break
limited = LimitingStream(in_fp, max_length, seekable=False)
# Note: Azure fails if a zero-length block is uploaded, so we read all the data here,
# and, if there is none, terminate early.
block_data = b''
for chunk in iter(lambda: limited.read(4096), b""):
block_data += chunk
if len(block_data) == 0:
break
block_index = len(new_metadata[_BLOCKS_KEY])
block_id = format(block_index, '05')
new_metadata[_BLOCKS_KEY].append(block_id)
try:
self._blob_service.put_block(self._azure_container, upload_blob_path, block_data, block_id,
validate_content=True)
except AzureException as ae:
logger.exception('Exception when trying to stream_upload_chunk block %s for %s', block_id,
uuid)
return total_bytes_written, new_metadata, ae
bytes_written = len(block_data)
total_bytes_written += bytes_written
if bytes_written == 0 or bytes_written < max_length:
break
if content_type is not None:
new_metadata[_CONTENT_TYPE_KEY] = content_type
return total_bytes_written, new_metadata, None
def complete_chunked_upload(self, uuid, final_path, storage_metadata):
""" Complete the chunked upload and store the final results in the path indicated.
Returns nothing.
"""
# Commit the blob's blocks.
upload_blob_path = self._upload_blob_path_from_uuid(uuid)
block_list = [BlobBlock(block_id) for block_id in storage_metadata[_BLOCKS_KEY]]
try:
self._blob_service.put_block_list(self._azure_container, upload_blob_path, block_list)
except AzureException:
logger.exception('Exception when trying to put block list for path %s from upload %s',
final_path, uuid)
raise IOError('Exception when trying to put block list')
# Set the content type on the blob if applicable.
if storage_metadata[_CONTENT_TYPE_KEY] is not None:
content_settings = ContentSettings(content_type=storage_metadata[_CONTENT_TYPE_KEY])
try:
self._blob_service.set_blob_properties(self._azure_container, upload_blob_path,
content_settings=content_settings)
except AzureException:
logger.exception('Exception when trying to set blob properties for path %s', final_path)
raise IOError('Exception when trying to set blob properties')
# Copy the blob to its final location.
upload_blob_name = self._upload_blob_name_from_uuid(uuid)
copy_source_url = self.get_direct_download_url(upload_blob_name, expires_in=300)
try:
blob_name = self._blob_name_from_path(final_path)
copy_prop = self._blob_service.copy_blob(self._azure_container, blob_name,
copy_source_url)
except AzureException:
logger.exception('Exception when trying to set copy uploaded blob %s to path %s', uuid,
final_path)
raise IOError('Exception when trying to copy uploaded blob')
self._await_copy(self._azure_container, blob_name, copy_prop)
# Delete the original blob.
logger.debug('Deleting chunked upload %s at path %s', uuid, upload_blob_path)
try:
self._blob_service.delete_blob(self._azure_container, upload_blob_path)
except AzureException:
logger.exception('Exception when trying to set delete uploaded blob %s', uuid)
raise IOError('Exception when trying to delete uploaded blob')
def cancel_chunked_upload(self, uuid, storage_metadata):
""" Cancel the chunked upload and clean up any outstanding partially uploaded data.
Returns nothing.
"""
upload_blob_path = self._upload_blob_path_from_uuid(uuid)
logger.debug('Canceling chunked upload %s at path %s', uuid, upload_blob_path)
self._blob_service.delete_blob(self._azure_container, upload_blob_path)
def _await_copy(self, container, blob_name, copy_prop):
# Poll for copy completion.
count = 0
while copy_prop.status == 'pending':
props = self._blob_service.get_blob_properties(container, blob_name)
copy_prop = props.properties.copy
if copy_prop.status == 'success':
return
if copy_prop.status == 'failed' or copy_prop.status == 'aborted':
raise IOError('Copy of blob %s failed with status %s' % (blob_name, copy_prop.status))
count = count + 1
if count > _MAX_COPY_POLL_COUNT:
raise IOError('Timed out waiting for copy to complete')
time.sleep(_COPY_POLL_SLEEP)
def copy_to(self, destination, path):
if (self.__class__ == destination.__class__):
logger.debug('Starting copying file from Azure %s to Azure %s via an Azure copy',
self._azure_container, destination)
blob_name = self._blob_name_from_path(path)
copy_source_url = self.get_direct_download_url(path)
copy_prop = self._blob_service.copy_blob(destination._azure_container, blob_name,
copy_source_url)
self._await_copy(destination._azure_container, blob_name, copy_prop)
logger.debug('Finished copying file from Azure %s to Azure %s via an Azure copy',
self._azure_container, destination)
return
# Fallback to a slower, default copy.
logger.debug('Copying file from Azure container %s to %s via a streamed copy',
self._azure_container, destination)
with self.stream_read_file(path) as fp:
destination.stream_write(path, fp)
def setup(self):
# From: https://docs.microsoft.com/en-us/rest/api/storageservices/cross-origin-resource-sharing--cors--support-for-the-azure-storage-services
cors = [CorsRule(allowed_origins='*', allowed_methods=['GET', 'PUT'], max_age_in_seconds=3000,
exposed_headers=['x-ms-meta-*'],
allowed_headers=['x-ms-meta-data*', 'x-ms-meta-target*', 'x-ms-meta-abc',
'Content-Type'])]
self._blob_service.set_blob_service_properties(cors=cors)

131
storage/basestorage.py Normal file
View file

@ -0,0 +1,131 @@
import logging
import tempfile
from digest.digest_tools import content_path
from util.registry.filelike import READ_UNTIL_END
logger = logging.getLogger(__name__)
class StoragePaths(object):
shared_images = 'sharedimages'
@staticmethod
def temp_store_handler():
tmpf = tempfile.TemporaryFile()
def fn(buf):
try:
tmpf.write(buf)
except IOError:
pass
return tmpf, fn
def _image_path(self, storage_uuid):
return '{0}/{1}/'.format(self.shared_images, storage_uuid)
def v1_image_layer_path(self, storage_uuid):
base_path = self._image_path(storage_uuid)
return '{0}layer'.format(base_path)
def blob_path(self, digest_str):
return content_path(digest_str)
class BaseStorage(StoragePaths):
def __init__(self):
# Set the IO buffer to 64kB
self.buffer_size = 64 * 1024
def setup(self):
""" Called to perform any storage system setup. """
pass
def validate(self, client):
""" Called to perform storage system validation. The client is an HTTP
client to use for any external calls. """
# Put a temporary file to make sure the normal storage paths work.
self.put_content('_verify', 'testing 123')
if not self.exists('_verify'):
raise Exception('Could not find verification file')
def get_direct_download_url(self, path, request_ip=None, expires_in=60, requires_cors=False, head=False):
return None
def get_direct_upload_url(self, path, mime_type, requires_cors=True):
return None
def get_supports_resumable_downloads(self):
return False
def get_content(self, path):
raise NotImplementedError
def put_content(self, path, content):
raise NotImplementedError
def stream_read(self, path):
raise NotImplementedError
def stream_read_file(self, path):
raise NotImplementedError
def stream_write(self, path, fp, content_type=None, content_encoding=None):
raise NotImplementedError
def exists(self, path):
raise NotImplementedError
def remove(self, path):
raise NotImplementedError
def get_checksum(self, path):
raise NotImplementedError
def stream_write_to_fp(self, in_fp, out_fp, num_bytes=READ_UNTIL_END):
""" Copy the specified number of bytes from the input file stream to the output stream. If
num_bytes < 0 copy until the stream ends. Returns the number of bytes copied.
"""
bytes_copied = 0
while bytes_copied < num_bytes or num_bytes == READ_UNTIL_END:
size_to_read = min(num_bytes - bytes_copied, self.buffer_size)
if size_to_read < 0:
size_to_read = self.buffer_size
buf = in_fp.read(size_to_read)
if not buf:
break
out_fp.write(buf)
bytes_copied += len(buf)
return bytes_copied
def copy_to(self, destination, path):
raise NotImplementedError
class BaseStorageV2(BaseStorage):
def initiate_chunked_upload(self):
""" Start a new chunked upload, returning the uuid and any associated storage metadata
"""
raise NotImplementedError
def stream_upload_chunk(self, uuid, offset, length, in_fp, storage_metadata, content_type=None):
""" Upload the specified amount of data from the given file pointer to the chunked destination
specified, starting at the given offset. Returns the number of bytes uploaded, a new
version of the storage_metadata and an error object (if one occurred or None if none).
Pass length as -1 to upload as much data from the in_fp as possible.
"""
raise NotImplementedError
def complete_chunked_upload(self, uuid, final_path, storage_metadata):
""" Complete the chunked upload and store the final results in the path indicated.
Returns nothing.
"""
raise NotImplementedError
def cancel_chunked_upload(self, uuid, storage_metadata):
""" Cancel the chunked upload and clean up any outstanding partially uploaded data.
Returns nothing.
"""
raise NotImplementedError

707
storage/cloud.py Normal file
View file

@ -0,0 +1,707 @@
import cStringIO as StringIO
import os
import logging
import copy
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.primitives import serialization
from cryptography.hazmat.primitives.asymmetric import padding
from cachetools.func import lru_cache
from itertools import chain
from datetime import datetime, timedelta
from botocore.signers import CloudFrontSigner
from boto.exception import S3ResponseError
import boto.s3.connection
import boto.s3.multipart
import boto.gs.connection
import boto.s3.key
import boto.gs.key
from io import BufferedIOBase
from uuid import uuid4
from collections import namedtuple
from util.registry import filelike
from storage.basestorage import BaseStorageV2
logger = logging.getLogger(__name__)
_PartUploadMetadata = namedtuple('_PartUploadMetadata', ['path', 'offset', 'length'])
_CHUNKS_KEY = 'chunks'
class StreamReadKeyAsFile(BufferedIOBase):
def __init__(self, key):
self._key = key
def read(self, amt=None):
if self.closed:
return None
resp = self._key.read(amt)
return resp
def readable(self):
return True
@property
def closed(self):
return self._key.closed
def close(self):
self._key.close(fast=True)
class _CloudStorage(BaseStorageV2):
def __init__(self, context, connection_class, key_class, connect_kwargs, upload_params,
storage_path, bucket_name, access_key=None, secret_key=None):
super(_CloudStorage, self).__init__()
self.minimum_chunk_size = 5 * 1024 * 1024
self.maximum_chunk_size = None
self._initialized = False
self._bucket_name = bucket_name
self._access_key = access_key
self._secret_key = secret_key
self._root_path = storage_path
self._connection_class = connection_class
self._key_class = key_class
self._upload_params = upload_params
self._connect_kwargs = connect_kwargs
self._cloud_conn = None
self._cloud_bucket = None
self._context = context
def _initialize_cloud_conn(self):
if not self._initialized:
self._cloud_conn = self._connection_class(self._access_key, self._secret_key,
**self._connect_kwargs)
self._cloud_bucket = self._cloud_conn.get_bucket(self._bucket_name, validate=False)
self._initialized = True
def _debug_key(self, key):
"""Used for debugging only."""
orig_meth = key.bucket.connection.make_request
def new_meth(*args, **kwargs):
print '#' * 16
print args
print kwargs
print '#' * 16
return orig_meth(*args, **kwargs)
key.bucket.connection.make_request = new_meth
def _init_path(self, path=None):
path = os.path.join(self._root_path, path) if path else self._root_path
if path and path[0] == '/':
return path[1:]
return path
def get_cloud_conn(self):
self._initialize_cloud_conn()
return self._cloud_conn
def get_cloud_bucket(self):
return self._cloud_bucket
def get_content(self, path):
self._initialize_cloud_conn()
path = self._init_path(path)
key = self._key_class(self._cloud_bucket, path)
try:
return key.get_contents_as_string()
except S3ResponseError as s3r:
# Raise an IOError in case the key was not found, to maintain the current
# interface.
if s3r.error_code == 'NoSuchKey':
raise IOError('No such key: \'{0}\''.format(path))
raise
def put_content(self, path, content):
self._initialize_cloud_conn()
path = self._init_path(path)
key = self._key_class(self._cloud_bucket, path)
key.set_contents_from_string(content, **self._upload_params)
return path
def get_supports_resumable_downloads(self):
return True
def get_direct_download_url(self, path, request_ip=None, expires_in=60, requires_cors=False, head=False):
self._initialize_cloud_conn()
path = self._init_path(path)
k = self._key_class(self._cloud_bucket, path)
if head:
return k.generate_url(expires_in, 'HEAD')
return k.generate_url(expires_in)
def get_direct_upload_url(self, path, mime_type, requires_cors=True):
self._initialize_cloud_conn()
path = self._init_path(path)
key = self._key_class(self._cloud_bucket, path)
url = key.generate_url(300, 'PUT', headers={'Content-Type': mime_type}, encrypt_key=True)
return url
def stream_read(self, path):
self._initialize_cloud_conn()
path = self._init_path(path)
key = self._key_class(self._cloud_bucket, path)
if not key.exists():
raise IOError('No such key: \'{0}\''.format(path))
while True:
buf = key.read(self.buffer_size)
if not buf:
break
yield buf
def stream_read_file(self, path):
self._initialize_cloud_conn()
path = self._init_path(path)
key = self._key_class(self._cloud_bucket, path)
if not key.exists():
raise IOError('No such key: \'{0}\''.format(path))
return StreamReadKeyAsFile(key)
def __initiate_multipart_upload(self, path, content_type, content_encoding):
# Minimum size of upload part size on S3 is 5MB
self._initialize_cloud_conn()
path = self._init_path(path)
metadata = {}
if content_type is not None:
metadata['Content-Type'] = content_type
if content_encoding is not None:
metadata['Content-Encoding'] = content_encoding
if self._context.metric_queue is not None:
self._context.metric_queue.multipart_upload_start.Inc()
return self._cloud_bucket.initiate_multipart_upload(path, metadata=metadata,
**self._upload_params)
def stream_write(self, path, fp, content_type=None, content_encoding=None):
""" Writes the data found in the file-like stream to the given path. Raises an IOError
if the write fails.
"""
_, write_error = self._stream_write_internal(path, fp, content_type, content_encoding)
if write_error is not None:
logger.error('Error when trying to stream_write path `%s`: %s', path, write_error)
raise IOError('Exception when trying to stream_write path')
def _stream_write_internal(self, path, fp, content_type=None, content_encoding=None,
cancel_on_error=True, size=filelike.READ_UNTIL_END):
""" Writes the data found in the file-like stream to the given path, with optional limit
on size. Note that this method returns a *tuple* of (bytes_written, write_error) and should
*not* raise an exception (such as IOError) if a problem uploading occurred. ALWAYS check
the returned tuple on calls to this method.
"""
write_error = None
try:
mp = self.__initiate_multipart_upload(path, content_type, content_encoding)
except S3ResponseError as e:
logger.exception('Exception when initiating multipart upload')
return 0, e
# We are going to reuse this but be VERY careful to only read the number of bytes written to it
buf = StringIO.StringIO()
num_part = 1
total_bytes_written = 0
while size == filelike.READ_UNTIL_END or total_bytes_written < size:
bytes_to_copy = self.minimum_chunk_size
if size != filelike.READ_UNTIL_END:
# We never want to ask for more bytes than our caller has indicated to copy
bytes_to_copy = min(bytes_to_copy, size - total_bytes_written)
buf.seek(0)
try:
# Stage the bytes into the buffer for use with the multipart upload file API
bytes_staged = self.stream_write_to_fp(fp, buf, bytes_to_copy)
if bytes_staged == 0:
break
buf.seek(0)
mp.upload_part_from_file(buf, num_part, size=bytes_staged)
total_bytes_written += bytes_staged
num_part += 1
except (S3ResponseError, IOError) as e:
logger.warn('Error when writing to stream in stream_write_internal at path %s: %s', path, e)
write_error = e
if self._context.metric_queue is not None:
self._context.metric_queue.multipart_upload_end.Inc(labelvalues=['failure'])
if cancel_on_error:
try:
mp.cancel_upload()
except (S3ResponseError, IOError):
logger.exception('Could not cancel upload')
return 0, write_error
else:
break
if total_bytes_written > 0:
if self._context.metric_queue is not None:
self._context.metric_queue.multipart_upload_end.Inc(labelvalues=['success'])
self._perform_action_with_retry(mp.complete_upload)
return total_bytes_written, write_error
def exists(self, path):
self._initialize_cloud_conn()
path = self._init_path(path)
key = self._key_class(self._cloud_bucket, path)
return key.exists()
def remove(self, path):
self._initialize_cloud_conn()
path = self._init_path(path)
key = self._key_class(self._cloud_bucket, path)
if key.exists():
# It's a file
key.delete()
return
# We assume it's a directory
if not path.endswith('/'):
path += '/'
for key in self._cloud_bucket.list(prefix=path):
key.delete()
def get_checksum(self, path):
self._initialize_cloud_conn()
path = self._init_path(path)
key = self._key_class(self._cloud_bucket, path)
k = self._cloud_bucket.lookup(key)
if k is None:
raise IOError('No such key: \'{0}\''.format(path))
return k.etag[1:-1][:7]
def copy_to(self, destination, path):
""" Copies the given path from this storage to the destination storage. """
self._initialize_cloud_conn()
# First try to copy directly via boto, but only if the storages are the
# same type, with the same access information.
if (self.__class__ == destination.__class__ and
self._access_key and self._secret_key and
self._access_key == destination._access_key and
self._secret_key == destination._secret_key and
self._connect_kwargs == destination._connect_kwargs):
# Initialize the cloud connection on the destination as well.
destination._initialize_cloud_conn()
# Check the buckets for both the source and destination locations.
if self._cloud_bucket is None:
logger.error('Cloud bucket not found for location %s; Configuration is probably invalid!',
self._bucket_name)
return
if destination._cloud_bucket is None:
logger.error('Cloud bucket not found for location %s; Configuration is probably invalid!',
destination._bucket_name)
return
# Perform the copy.
logger.debug('Copying file from %s to %s via a direct boto copy', self._cloud_bucket,
destination._cloud_bucket)
source_path = self._init_path(path)
source_key = self._key_class(self._cloud_bucket, source_path)
dest_path = destination._init_path(path)
source_key.copy(destination._cloud_bucket, dest_path)
return
# Fallback to a slower, default copy.
logger.debug('Copying file from %s to %s via a streamed copy', self._cloud_bucket,
destination)
with self.stream_read_file(path) as fp:
destination.stream_write(path, fp)
def _rel_upload_path(self, uuid):
return 'uploads/{0}'.format(uuid)
def initiate_chunked_upload(self):
self._initialize_cloud_conn()
random_uuid = str(uuid4())
metadata = {
_CHUNKS_KEY: [],
}
return random_uuid, metadata
def stream_upload_chunk(self, uuid, offset, length, in_fp, storage_metadata, content_type=None):
self._initialize_cloud_conn()
# We are going to upload each chunk to a separate key
chunk_path = self._rel_upload_path(str(uuid4()))
bytes_written, write_error = self._stream_write_internal(chunk_path, in_fp,
cancel_on_error=False, size=length,
content_type=content_type)
new_metadata = copy.deepcopy(storage_metadata)
# We are only going to track keys to which data was confirmed written
if bytes_written > 0:
new_metadata[_CHUNKS_KEY].append(_PartUploadMetadata(chunk_path, offset, bytes_written))
return bytes_written, new_metadata, write_error
def _chunk_generator(self, chunk_list):
for chunk in chunk_list:
yield filelike.StreamSlice(self.stream_read_file(chunk.path), 0, chunk.length)
@staticmethod
def _chunk_list_from_metadata(storage_metadata):
return [_PartUploadMetadata(*chunk_args) for chunk_args in storage_metadata[_CHUNKS_KEY]]
def _client_side_chunk_join(self, final_path, chunk_list):
# If there's only one chunk, just "move" (copy and delete) the key and call it a day.
if len(chunk_list) == 1:
chunk_path = self._init_path(chunk_list[0].path)
abs_final_path = self._init_path(final_path)
# Let the copy raise an exception if it fails.
self._cloud_bucket.copy_key(abs_final_path, self._bucket_name, chunk_path)
# Attempt to clean up the old chunk.
try:
self._cloud_bucket.delete_key(chunk_path)
except IOError:
# We failed to delete a chunk. This sucks, but we shouldn't fail the push.
msg = 'Failed to clean up chunk %s for move of %s'
logger.exception(msg, chunk_path, abs_final_path)
else:
# Concatenate and write all the chunks as one key.
concatenated = filelike.FilelikeStreamConcat(self._chunk_generator(chunk_list))
self.stream_write(final_path, concatenated)
# Attempt to clean up all the chunks.
for chunk in chunk_list:
try:
self._cloud_bucket.delete_key(self._init_path(chunk.path))
except IOError:
# We failed to delete a chunk. This sucks, but we shouldn't fail the push.
msg = 'Failed to clean up chunk %s for reupload of %s'
logger.exception(msg, chunk.path, final_path)
@staticmethod
def _perform_action_with_retry(action, *args, **kwargs):
# Note: Sometimes Amazon S3 simply raises an internal error when trying to complete a
# an action. The recommendation is to simply try calling the action again.
for remaining_retries in range(2, -1, -1):
try:
action(*args, **kwargs)
break
except S3ResponseError as s3re:
if remaining_retries and s3re.status == 200 and s3re.error_code == 'InternalError':
# Weird internal error case. Retry.
continue
# Otherwise, raise it.
logger.exception('Exception trying to perform action %s', action)
raise s3re
@staticmethod
def _rechunk(chunk, max_chunk_size):
""" Rechunks the chunk list to meet maximum chunk size restrictions for the storage engine. """
if max_chunk_size is None or chunk.length <= max_chunk_size:
yield chunk
else:
newchunk_length = chunk.length / 2
first_subchunk = _PartUploadMetadata(chunk.path, chunk.offset, newchunk_length)
second_subchunk = _PartUploadMetadata(chunk.path,
chunk.offset + newchunk_length,
chunk.length - newchunk_length)
for subchunk in chain(_CloudStorage._rechunk(first_subchunk, max_chunk_size),
_CloudStorage._rechunk(second_subchunk, max_chunk_size)):
yield subchunk
def complete_chunked_upload(self, uuid, final_path, storage_metadata, force_client_side=False):
self._initialize_cloud_conn()
chunk_list = self._chunk_list_from_metadata(storage_metadata)
# Here is where things get interesting: we are going to try to assemble this server side
# In order to be a candidate all parts (after offsets have been computed) must be at least 5MB
server_side_assembly = False
if not force_client_side:
server_side_assembly = True
for chunk_offset, chunk in enumerate(chunk_list):
# If the chunk is both too small, and not the last chunk, we rule out server side assembly
if chunk.length < self.minimum_chunk_size and (chunk_offset + 1) < len(chunk_list):
server_side_assembly = False
break
if server_side_assembly:
logger.debug('Performing server side assembly of multi-part upload for: %s', final_path)
try:
# Awesome, we can do this completely server side, now we have to start a new multipart
# upload and use copy_part_from_key to set all of the chunks.
mpu = self.__initiate_multipart_upload(final_path, content_type=None, content_encoding=None)
updated_chunks = chain.from_iterable([_CloudStorage._rechunk(c, self.maximum_chunk_size)
for c in chunk_list])
for index, chunk in enumerate(updated_chunks):
abs_chunk_path = self._init_path(chunk.path)
self._perform_action_with_retry(mpu.copy_part_from_key, self.get_cloud_bucket().name,
abs_chunk_path, index + 1, start=chunk.offset,
end=chunk.length + chunk.offset - 1)
self._perform_action_with_retry(mpu.complete_upload)
except IOError as ioe:
# Something bad happened, log it and then give up
msg = 'Exception when attempting server-side assembly for: %s'
logger.exception(msg, final_path)
mpu.cancel_upload()
raise ioe
else:
# We are going to turn all of the server side objects into a single file-like stream, and
# pass that to stream_write to chunk and upload the final object.
self._client_side_chunk_join(final_path, chunk_list)
def cancel_chunked_upload(self, uuid, storage_metadata):
self._initialize_cloud_conn()
# We have to go through and delete all of the uploaded chunks
for chunk in self._chunk_list_from_metadata(storage_metadata):
self.remove(chunk.path)
class S3Storage(_CloudStorage):
def __init__(self, context, storage_path, s3_bucket, s3_access_key=None,
s3_secret_key=None, host=None, port=None):
upload_params = {
'encrypt_key': True,
}
connect_kwargs = {}
if host:
if host.startswith('http:') or host.startswith('https:'):
raise ValueError('host name must not start with http:// or https://')
connect_kwargs['host'] = host
if port:
connect_kwargs['port'] = int(port)
super(S3Storage, self).__init__(context, boto.s3.connection.S3Connection, boto.s3.key.Key,
connect_kwargs, upload_params, storage_path, s3_bucket,
access_key=s3_access_key or None,
secret_key=s3_secret_key or None)
self.maximum_chunk_size = 5 * 1024 * 1024 * 1024 # 5GB.
def setup(self):
self.get_cloud_bucket().set_cors_xml("""<?xml version="1.0" encoding="UTF-8"?>
<CORSConfiguration xmlns="http://s3.amazonaws.com/doc/2006-03-01/">
<CORSRule>
<AllowedOrigin>*</AllowedOrigin>
<AllowedMethod>GET</AllowedMethod>
<MaxAgeSeconds>3000</MaxAgeSeconds>
<AllowedHeader>Authorization</AllowedHeader>
</CORSRule>
<CORSRule>
<AllowedOrigin>*</AllowedOrigin>
<AllowedMethod>PUT</AllowedMethod>
<MaxAgeSeconds>3000</MaxAgeSeconds>
<AllowedHeader>Content-Type</AllowedHeader>
<AllowedHeader>x-amz-acl</AllowedHeader>
<AllowedHeader>origin</AllowedHeader>
</CORSRule>
</CORSConfiguration>""")
class GoogleCloudStorage(_CloudStorage):
def __init__(self, context, storage_path, access_key, secret_key, bucket_name):
upload_params = {}
connect_kwargs = {}
super(GoogleCloudStorage, self).__init__(context, boto.gs.connection.GSConnection,
boto.gs.key.Key, connect_kwargs, upload_params,
storage_path, bucket_name, access_key, secret_key)
def setup(self):
self.get_cloud_bucket().set_cors_xml("""<?xml version="1.0" encoding="UTF-8"?>
<CorsConfig>
<Cors>
<Origins>
<Origin>*</Origin>
</Origins>
<Methods>
<Method>GET</Method>
<Method>PUT</Method>
</Methods>
<ResponseHeaders>
<ResponseHeader>Content-Type</ResponseHeader>
</ResponseHeaders>
<MaxAgeSec>3000</MaxAgeSec>
</Cors>
</CorsConfig>""")
def _stream_write_internal(self, path, fp, content_type=None, content_encoding=None,
cancel_on_error=True, size=filelike.READ_UNTIL_END):
""" Writes the data found in the file-like stream to the given path, with optional limit
on size. Note that this method returns a *tuple* of (bytes_written, write_error) and should
*not* raise an exception (such as IOError) if a problem uploading occurred. ALWAYS check
the returned tuple on calls to this method.
"""
# Minimum size of upload part size on S3 is 5MB
self._initialize_cloud_conn()
path = self._init_path(path)
key = self._key_class(self._cloud_bucket, path)
if content_type is not None:
key.set_metadata('Content-Type', content_type)
if content_encoding is not None:
key.set_metadata('Content-Encoding', content_encoding)
if size != filelike.READ_UNTIL_END:
fp = filelike.StreamSlice(fp, 0, size)
# TODO figure out how to handle cancel_on_error=False
try:
key.set_contents_from_stream(fp)
except IOError as ex:
return 0, ex
return key.size, None
def complete_chunked_upload(self, uuid, final_path, storage_metadata):
self._initialize_cloud_conn()
# Boto does not support GCS's multipart upload API because it differs from S3, so
# we are forced to join it all locally and then reupload.
# See https://github.com/boto/boto/issues/3355
chunk_list = self._chunk_list_from_metadata(storage_metadata)
self._client_side_chunk_join(final_path, chunk_list)
class RadosGWStorage(_CloudStorage):
def __init__(self, context, hostname, is_secure, storage_path, access_key, secret_key,
bucket_name, port=None):
upload_params = {}
connect_kwargs = {
'host': hostname,
'is_secure': is_secure,
'calling_format': boto.s3.connection.OrdinaryCallingFormat(),
}
if port:
connect_kwargs['port'] = int(port)
super(RadosGWStorage, self).__init__(context, boto.s3.connection.S3Connection,
boto.s3.key.Key, connect_kwargs, upload_params,
storage_path, bucket_name, access_key, secret_key)
# TODO remove when radosgw supports cors: http://tracker.ceph.com/issues/8718#change-38624
def get_direct_download_url(self, path, request_ip=None, expires_in=60, requires_cors=False,
head=False):
if requires_cors:
return None
return super(RadosGWStorage, self).get_direct_download_url(path, request_ip, expires_in,
requires_cors, head)
# TODO remove when radosgw supports cors: http://tracker.ceph.com/issues/8718#change-38624
def get_direct_upload_url(self, path, mime_type, requires_cors=True):
if requires_cors:
return None
return super(RadosGWStorage, self).get_direct_upload_url(path, mime_type, requires_cors)
def complete_chunked_upload(self, uuid, final_path, storage_metadata):
self._initialize_cloud_conn()
# RadosGW does not support multipart copying from keys, so we are forced to join
# it all locally and then reupload.
# See https://github.com/ceph/ceph/pull/5139
chunk_list = self._chunk_list_from_metadata(storage_metadata)
self._client_side_chunk_join(final_path, chunk_list)
class RHOCSStorage(RadosGWStorage):
""" RHOCSStorage implements storage explicitly via RHOCS. For now, this uses the same protocol
as RadowsGW, but we create a distinct driver for future additional capabilities.
"""
pass
class CloudFrontedS3Storage(S3Storage):
""" An S3Storage engine that redirects to CloudFront for all requests outside of AWS. """
def __init__(self, context, cloudfront_distribution_domain, cloudfront_key_id,
cloudfront_privatekey_filename, storage_path, s3_bucket, *args, **kwargs):
super(CloudFrontedS3Storage, self).__init__(context, storage_path, s3_bucket, *args, **kwargs)
self.cloudfront_distribution_domain = cloudfront_distribution_domain
self.cloudfront_key_id = cloudfront_key_id
self.cloudfront_privatekey = self._load_private_key(cloudfront_privatekey_filename)
def get_direct_download_url(self, path, request_ip=None, expires_in=60, requires_cors=False,
head=False):
# If CloudFront could not be loaded, fall back to normal S3.
if self.cloudfront_privatekey is None or request_ip is None:
return super(CloudFrontedS3Storage, self).get_direct_download_url(path, request_ip,
expires_in, requires_cors,
head)
resolved_ip_info = None
logger.debug('Got direct download request for path "%s" with IP "%s"', path, request_ip)
# Lookup the IP address in our resolution table and determine whether it is under AWS.
# If it is, then return an S3 signed URL, since we are in-network.
resolved_ip_info = self._context.ip_resolver.resolve_ip(request_ip)
logger.debug('Resolved IP information for IP %s: %s', request_ip, resolved_ip_info)
if resolved_ip_info and resolved_ip_info.provider == 'aws':
return super(CloudFrontedS3Storage, self).get_direct_download_url(path, request_ip,
expires_in, requires_cors,
head)
url = 'https://%s/%s' % (self.cloudfront_distribution_domain, path)
expire_date = datetime.now() + timedelta(seconds=expires_in)
signer = self._get_cloudfront_signer()
signed_url = signer.generate_presigned_url(url, date_less_than=expire_date)
logger.debug('Returning CloudFront URL for path "%s" with IP "%s": %s', path, resolved_ip_info,
signed_url)
return signed_url
@lru_cache(maxsize=1)
def _get_cloudfront_signer(self):
return CloudFrontSigner(self.cloudfront_key_id, self._get_rsa_signer())
@lru_cache(maxsize=1)
def _get_rsa_signer(self):
private_key = self.cloudfront_privatekey
def handler(message):
return private_key.sign(message, padding.PKCS1v15(), hashes.SHA1())
return handler
@lru_cache(maxsize=1)
def _load_private_key(self, cloudfront_privatekey_filename):
""" Returns the private key, loaded from the config provider, used to sign direct
download URLs to CloudFront.
"""
if self._context.config_provider is None:
return None
with self._context.config_provider.get_volume_file(cloudfront_privatekey_filename) as key_file:
return serialization.load_pem_private_key(
key_file.read(),
password=None,
backend=default_backend()
)

View file

@ -0,0 +1,83 @@
import random
import logging
from functools import wraps
from storage.basestorage import StoragePaths, BaseStorage, BaseStorageV2
logger = logging.getLogger(__name__)
def _location_aware(unbound_func, requires_write=False):
@wraps(unbound_func)
def wrapper(self, locations, *args, **kwargs):
if requires_write:
assert not self.readonly_mode
storage = None
for preferred in self.preferred_locations:
if preferred in locations:
storage = self._storages[preferred]
break
if not storage:
storage = self._storages[random.sample(locations, 1)[0]]
storage_func = getattr(storage, unbound_func.__name__)
return storage_func(*args, **kwargs)
return wrapper
class DistributedStorage(StoragePaths):
def __init__(self, storages, preferred_locations=None, default_locations=None, proxy=None,
readonly_mode=False):
self._storages = dict(storages)
self.preferred_locations = list(preferred_locations or [])
self.default_locations = list(default_locations or [])
self.proxy = proxy
self.readonly_mode = readonly_mode
@property
def locations(self):
""" Returns the names of the locations supported. """
return list(self._storages.keys())
_get_direct_download_url = _location_aware(BaseStorage.get_direct_download_url)
get_direct_upload_url = _location_aware(BaseStorage.get_direct_upload_url)
get_content = _location_aware(BaseStorage.get_content)
put_content = _location_aware(BaseStorage.put_content, requires_write=True)
stream_read = _location_aware(BaseStorage.stream_read)
stream_read_file = _location_aware(BaseStorage.stream_read_file)
stream_write = _location_aware(BaseStorage.stream_write, requires_write=True)
exists = _location_aware(BaseStorage.exists)
remove = _location_aware(BaseStorage.remove, requires_write=True)
validate = _location_aware(BaseStorage.validate, requires_write=True)
get_checksum = _location_aware(BaseStorage.get_checksum)
get_supports_resumable_downloads = _location_aware(BaseStorage.get_supports_resumable_downloads)
initiate_chunked_upload = _location_aware(BaseStorageV2.initiate_chunked_upload,
requires_write=True)
stream_upload_chunk = _location_aware(BaseStorageV2.stream_upload_chunk,
requires_write=True)
complete_chunked_upload = _location_aware(BaseStorageV2.complete_chunked_upload,
requires_write=True)
cancel_chunked_upload = _location_aware(BaseStorageV2.cancel_chunked_upload,
requires_write=True)
def get_direct_download_url(self, locations, path, request_ip=None, expires_in=600,
requires_cors=False, head=False):
download_url = self._get_direct_download_url(locations, path, request_ip, expires_in,
requires_cors, head)
if download_url is None:
return None
if self.proxy is None:
return download_url
return self.proxy.proxy_download_url(download_url)
def copy_between(self, path, source_location, destination_location):
""" Copies a file between the source location and the destination location. """
source_storage = self._storages[source_location]
destination_storage = self._storages[destination_location]
source_storage.copy_to(destination_storage, path)

176
storage/downloadproxy.py Normal file
View file

@ -0,0 +1,176 @@
import logging
import base64
import urllib
from urlparse import urlparse
from flask import abort, request
from jsonschema import validate, ValidationError
from util.security.registry_jwt import (generate_bearer_token, decode_bearer_token,
InvalidBearerTokenException)
logger = logging.getLogger(__name__)
PROXY_STORAGE_MAX_LIFETIME_S = 30 # Seconds
STORAGE_PROXY_SUBJECT = 'storageproxy'
STORAGE_PROXY_ACCESS_TYPE = 'storageproxy'
ACCESS_SCHEMA = {
'type': 'array',
'description': 'List of access granted to the subject',
'items': {
'type': 'object',
'required': [
'type',
'scheme',
'host',
'uri',
],
'properties': {
'type': {
'type': 'string',
'description': 'We only allow storage proxy permissions',
'enum': [
'storageproxy',
],
},
'scheme': {
'type': 'string',
'description': 'The scheme for the storage URL being proxied'
},
'host': {
'type': 'string',
'description': 'The hostname for the storage URL being proxied'
},
'uri': {
'type': 'string',
'description': 'The URI path for the storage URL being proxied'
},
},
},
}
class DownloadProxy(object):
""" Helper class to enable proxying of direct download URLs for storage via the registry's
local NGINX.
"""
def __init__(self, app, instance_keys):
self.app = app
self.instance_keys = instance_keys
app.add_url_rule('/_storage_proxy_auth', '_storage_proxy_auth', self._validate_proxy_url)
def proxy_download_url(self, download_url):
""" Returns a URL to proxy the specified blob download URL.
"""
# Parse the URL to be downloaded into its components (host, path, scheme).
parsed = urlparse(download_url)
path = parsed.path
if parsed.query:
path = path + '?' + parsed.query
if path.startswith('/'):
path = path[1:]
access = {
'type': STORAGE_PROXY_ACCESS_TYPE,
'uri': path,
'host': parsed.netloc,
'scheme': parsed.scheme,
}
# Generate a JWT that signs access to this URL. This JWT will be passed back to the registry
# code when the download commences. Note that we don't add any context here, as it isn't
# needed.
server_hostname = self.app.config['SERVER_HOSTNAME']
token = generate_bearer_token(server_hostname, STORAGE_PROXY_SUBJECT, {}, [access],
PROXY_STORAGE_MAX_LIFETIME_S, self.instance_keys)
url_scheme = self.app.config['PREFERRED_URL_SCHEME']
server_hostname = self.app.config['SERVER_HOSTNAME']
# The proxy path is of the form:
# http(s)://registry_server/_storage_proxy/{token}/{scheme}/{hostname}/rest/of/path/here
encoded_token = base64.urlsafe_b64encode(token)
proxy_url = '%s://%s/_storage_proxy/%s/%s/%s/%s' % (url_scheme, server_hostname, encoded_token,
parsed.scheme, parsed.netloc, path)
logger.debug('Proxying via URL %s', proxy_url)
return proxy_url
def _validate_proxy_url(self):
original_uri = request.headers.get('X-Original-URI', None)
if not original_uri:
logger.error('Missing original URI: %s', request.headers)
abort(401)
if not original_uri.startswith('/_storage_proxy/'):
logger.error('Unknown storage proxy path: %s', original_uri)
abort(401)
# The proxy path is of the form:
# /_storage_proxy/{token}/{scheme}/{hostname}/rest/of/path/here
without_prefix = original_uri[len('/_storage_proxy/'):]
parts = without_prefix.split('/', 3)
if len(parts) != 4:
logger.error('Invalid storage proxy path (found %s parts): %s', len(parts), without_prefix)
abort(401)
encoded_token, scheme, host, uri = parts
try:
token = base64.urlsafe_b64decode(str(encoded_token))
except ValueError:
logger.exception('Could not decode proxy token')
abort(401)
except TypeError:
logger.exception('Could not decode proxy token')
abort(401)
logger.debug('Got token %s for storage proxy auth request %s with parts %s', token,
original_uri, parts)
# Decode the bearer token.
try:
decoded = decode_bearer_token(token, self.instance_keys, self.app.config)
except InvalidBearerTokenException:
logger.exception('Invalid token for storage proxy')
abort(401)
# Ensure it is for the proxy.
if decoded['sub'] != STORAGE_PROXY_SUBJECT:
logger.exception('Invalid subject %s for storage proxy auth', decoded['subject'])
abort(401)
# Validate that the access matches the token format.
access = decoded.get('access', {})
try:
validate(access, ACCESS_SCHEMA)
except ValidationError:
logger.exception('We should not be minting invalid credentials: %s', access)
abort(401)
# For now, we only expect a single access credential.
if len(access) != 1:
logger.exception('We should not be minting invalid credentials: %s', access)
abort(401)
# Ensure the signed access matches the requested URL's pieces.
granted_access = access[0]
if granted_access['scheme'] != scheme:
logger.exception('Mismatch in scheme. %s expected, %s found', granted_access['scheme'],
scheme)
abort(401)
if granted_access['host'] != host:
logger.exception('Mismatch in host. %s expected, %s found', granted_access['host'], host)
abort(401)
if granted_access['uri'] != uri:
logger.exception('Mismatch in uri. %s expected, %s found', granted_access['uri'], uri)
abort(401)
return 'OK'

101
storage/fakestorage.py Normal file
View file

@ -0,0 +1,101 @@
import cStringIO as StringIO
import hashlib
from collections import defaultdict
from uuid import uuid4
from storage.basestorage import BaseStorageV2
_GLOBAL_FAKE_STORAGE_MAP = defaultdict(StringIO.StringIO)
class FakeStorage(BaseStorageV2):
def __init__(self, context):
super(FakeStorage, self).__init__()
self._fake_storage_map = (defaultdict(StringIO.StringIO)
if context == 'local' else _GLOBAL_FAKE_STORAGE_MAP)
def _init_path(self, path=None, create=False):
return path
def get_direct_download_url(self, path, request_ip=None, expires_in=60, requires_cors=False, head=False):
try:
if self.get_content('supports_direct_download') == 'true':
return 'http://somefakeurl?goes=here'
except:
pass
return None
def get_content(self, path):
if not path in self._fake_storage_map:
raise IOError('Fake file %s not found. Exist: %s' % (path, self._fake_storage_map.keys()))
self._fake_storage_map.get(path).seek(0)
return self._fake_storage_map.get(path).read()
def put_content(self, path, content):
self._fake_storage_map.pop(path, None)
self._fake_storage_map[path].write(content)
def stream_read(self, path):
io_obj = self._fake_storage_map[path]
io_obj.seek(0)
while True:
buf = io_obj.read(self.buffer_size)
if not buf:
break
yield buf
def stream_read_file(self, path):
return StringIO.StringIO(self.get_content(path))
def stream_write(self, path, fp, content_type=None, content_encoding=None):
out_fp = self._fake_storage_map[path]
out_fp.seek(0)
self.stream_write_to_fp(fp, out_fp)
def remove(self, path):
self._fake_storage_map.pop(path, None)
def exists(self, path):
if self._fake_storage_map.get('all_files_exist', None):
return True
return path in self._fake_storage_map
def get_checksum(self, path):
return hashlib.sha256(self._fake_storage_map[path].read()).hexdigest()[:7]
def initiate_chunked_upload(self):
new_uuid = str(uuid4())
self._fake_storage_map[new_uuid].seek(0)
return new_uuid, {}
def stream_upload_chunk(self, uuid, offset, length, in_fp, _, content_type=None):
if self.exists('except_upload'):
return 0, {}, IOError("I'm an exception!")
upload_storage = self._fake_storage_map[uuid]
try:
return self.stream_write_to_fp(in_fp, upload_storage, length), {}, None
except IOError as ex:
return 0, {}, ex
def complete_chunked_upload(self, uuid, final_path, _):
self._fake_storage_map[final_path] = self._fake_storage_map[uuid]
self._fake_storage_map.pop(uuid, None)
def cancel_chunked_upload(self, uuid, _):
self._fake_storage_map.pop(uuid, None)
def copy_to(self, destination, path):
if self.exists('break_copying'):
raise IOError('Broken!')
if self.exists('fake_copying'):
return
if self.exists('except_copying'):
raise Exception("I'm an exception!")
content = self.get_content(path)
destination.put_content(path, content)

138
storage/local.py Normal file
View file

@ -0,0 +1,138 @@
import os
import shutil
import hashlib
import io
import logging
import psutil
from uuid import uuid4
from storage.basestorage import BaseStorageV2
logger = logging.getLogger(__name__)
class LocalStorage(BaseStorageV2):
def __init__(self, context, storage_path):
super(LocalStorage, self).__init__()
self._root_path = storage_path
def _init_path(self, path=None, create=False):
path = os.path.join(self._root_path, path) if path else self._root_path
if create is True:
dirname = os.path.dirname(path)
if not os.path.exists(dirname):
os.makedirs(dirname)
return path
def get_content(self, path):
path = self._init_path(path)
with open(path, mode='r') as f:
return f.read()
def put_content(self, path, content):
path = self._init_path(path, create=True)
with open(path, mode='w') as f:
f.write(content)
return path
def stream_read(self, path):
path = self._init_path(path)
with open(path, mode='rb') as f:
while True:
buf = f.read(self.buffer_size)
if not buf:
break
yield buf
def stream_read_file(self, path):
path = self._init_path(path)
return io.open(path, mode='rb')
def stream_write(self, path, fp, content_type=None, content_encoding=None):
# Size is mandatory
path = self._init_path(path, create=True)
with open(path, mode='wb') as out_fp:
self.stream_write_to_fp(fp, out_fp)
def exists(self, path):
path = self._init_path(path)
return os.path.exists(path)
def remove(self, path):
path = self._init_path(path)
if os.path.isdir(path):
shutil.rmtree(path)
return
try:
os.remove(path)
except OSError:
pass
def get_checksum(self, path):
path = self._init_path(path)
sha_hash = hashlib.sha256()
with open(path, 'r') as to_hash:
while True:
buf = to_hash.read(self.buffer_size)
if not buf:
break
sha_hash.update(buf)
return sha_hash.hexdigest()[:7]
def _rel_upload_path(self, uuid):
return 'uploads/{0}'.format(uuid)
def initiate_chunked_upload(self):
new_uuid = str(uuid4())
# Just create an empty file at the path
with open(self._init_path(self._rel_upload_path(new_uuid), create=True), 'w'):
pass
return new_uuid, {}
def stream_upload_chunk(self, uuid, offset, length, in_fp, _, content_type=None):
try:
with open(self._init_path(self._rel_upload_path(uuid)), 'r+b') as upload_storage:
upload_storage.seek(offset)
return self.stream_write_to_fp(in_fp, upload_storage, length), {}, None
except IOError as ex:
return 0, {}, ex
def complete_chunked_upload(self, uuid, final_path, _):
content_path = self._rel_upload_path(uuid)
final_path_abs = self._init_path(final_path, create=True)
if not self.exists(final_path_abs):
logger.debug('Moving content into place at path: %s', final_path_abs)
shutil.move(self._init_path(content_path), final_path_abs)
else:
logger.debug('Content already exists at path: %s', final_path_abs)
def cancel_chunked_upload(self, uuid, _):
content_path = self._init_path(self._rel_upload_path(uuid))
os.remove(content_path)
def validate(self, client):
super(LocalStorage, self).validate(client)
# Load the set of disk mounts.
try:
mounts = psutil.disk_partitions(all=True)
except:
logger.exception('Could not load disk partitions')
return
# Verify that the storage's root path is under a mounted Docker volume.
for mount in mounts:
if mount.mountpoint != '/' and self._root_path.startswith(mount.mountpoint):
return
raise Exception('Storage path %s is not under a mounted volume.\n\n'
'Registry data must be stored under a mounted volume '
'to prevent data loss' % self._root_path)
def copy_to(self, destination, path):
with self.stream_read_file(path) as fp:
destination.stream_write(path, fp)

450
storage/swift.py Normal file
View file

@ -0,0 +1,450 @@
""" Swift storage driver.
Uses: http://docs.openstack.org/developer/swift/overview_large_objects.html
"""
import os.path
import copy
import hmac
import string
import logging
import json
from _pyio import BufferedReader
from collections import namedtuple
from hashlib import sha1
from random import SystemRandom
from time import time
from urlparse import urlparse
from uuid import uuid4
from cachetools.func import lru_cache
from swiftclient.client import Connection, ClientException, ReadableToIterable
from storage.basestorage import BaseStorage
from util.registry import filelike
from util.registry.generatorfile import GeneratorFile
logger = logging.getLogger(__name__)
_PartUploadMetadata = namedtuple('_PartUploadMetadata', ['path', 'offset', 'length'])
_SEGMENTS_KEY = 'segments'
_EMPTY_SEGMENTS_KEY = 'emptysegments'
_SEGMENT_DIRECTORY = 'segments'
_MAXIMUM_SEGMENT_SIZE = 200000000 # ~200 MB
_DEFAULT_SWIFT_CONNECT_TIMEOUT = 5 # seconds
_CHUNK_CLEANUP_DELAY = 30 # seconds
class SwiftStorage(BaseStorage):
def __init__(self, context, swift_container, storage_path, auth_url, swift_user, swift_password,
auth_version=None, os_options=None, ca_cert_path=None, temp_url_key=None,
simple_path_concat=False, connect_timeout=None, retry_count=None,
retry_on_ratelimit=True):
super(SwiftStorage, self).__init__()
self._swift_container = swift_container
self._context = context
self._storage_path = storage_path.lstrip('/')
self._simple_path_concat = simple_path_concat
self._auth_url = auth_url
self._ca_cert_path = ca_cert_path
self._swift_user = swift_user
self._swift_password = swift_password
self._temp_url_key = temp_url_key
self._connect_timeout = connect_timeout
self._retry_count = retry_count
self._retry_on_ratelimit = retry_on_ratelimit
try:
self._auth_version = int(auth_version or '2')
except ValueError:
self._auth_version = 2
self._os_options = os_options or {}
self._initialized = False
def _get_connection(self):
return Connection(
authurl=self._auth_url,
cacert=self._ca_cert_path,
user=self._swift_user,
key=self._swift_password,
auth_version=self._auth_version,
os_options=self._os_options,
retry_on_ratelimit=self._retry_on_ratelimit,
timeout=self._connect_timeout or _DEFAULT_SWIFT_CONNECT_TIMEOUT,
retries=self._retry_count or 5,
)
def _normalize_path(self, object_path):
""" No matter what inputs we get, we are going to return a path without a leading or trailing
'/'
"""
if self._simple_path_concat:
return (self._storage_path + object_path).rstrip('/')
else:
return os.path.join(self._storage_path, object_path).rstrip('/')
def _get_object(self, path, chunk_size=None):
path = self._normalize_path(path)
try:
_, obj = self._get_connection().get_object(self._swift_container, path,
resp_chunk_size=chunk_size)
return obj
except ClientException as ex:
logger.exception('Could not get object at path %s: %s', path, ex)
raise IOError('Path %s not found' % path)
def _put_object(self, path, content, chunk=None, content_type=None, content_encoding=None,
headers=None):
path = self._normalize_path(path)
headers = headers or {}
if content_encoding is not None:
headers['Content-Encoding'] = content_encoding
is_filelike = hasattr(content, 'read')
if is_filelike:
content = ReadableToIterable(content, md5=True)
try:
etag = self._get_connection().put_object(self._swift_container, path, content,
chunk_size=chunk, content_type=content_type,
headers=headers)
except ClientException:
# We re-raise client exception here so that validation of config during setup can see
# the client exception messages.
raise
# If we wrapped the content in a ReadableToIterable, compare its MD5 to the etag returned. If
# they don't match, raise an IOError indicating a write failure.
if is_filelike:
if etag != content.get_md5sum():
logger.error('Got mismatch in md5 etag for path %s: Expected %s, but server has %s', path,
content.get_md5sum(), etag)
raise IOError('upload verification failed for path {0}:'
'md5 mismatch, local {1} != remote {2}'
.format(path, content.get_md5sum(), etag))
def _head_object(self, path):
path = self._normalize_path(path)
try:
return self._get_connection().head_object(self._swift_container, path)
except ClientException as ce:
if ce.http_status != 404:
logger.exception('Could not head object at path %s: %s', path, ce)
return None
@lru_cache(maxsize=1)
def _get_root_storage_url(self):
""" Returns the root storage URL for this Swift storage. Note that since this requires a call
to Swift, we cache the result of this function call.
"""
storage_url, _ = self._get_connection().get_auth()
return storage_url
def get_direct_download_url(self, object_path, request_ip=None, expires_in=60,
requires_cors=False, head=False):
if requires_cors:
return None
# Reference: http://docs.openstack.org/juno/config-reference/content/object-storage-tempurl.html
if not self._temp_url_key:
return None
# Retrieve the root storage URL for the connection.
try:
root_storage_url = self._get_root_storage_url()
except ClientException:
logger.exception('Got client exception when trying to load Swift auth')
return None
parsed_storage_url = urlparse(root_storage_url)
scheme = parsed_storage_url.scheme
path = parsed_storage_url.path.rstrip('/')
hostname = parsed_storage_url.netloc
object_path = self._normalize_path(object_path)
# Generate the signed HMAC body.
method = 'HEAD' if head else 'GET'
expires = int(time() + expires_in)
full_path = '%s/%s/%s' % (path, self._swift_container, object_path)
hmac_body = '%s\n%s\n%s' % (method, expires, full_path)
sig = hmac.new(self._temp_url_key.encode('utf-8'), hmac_body.encode('utf-8'), sha1).hexdigest()
surl = '{scheme}://{host}{full_path}?temp_url_sig={sig}&temp_url_expires={expires}'
return surl.format(scheme=scheme, host=hostname, full_path=full_path, sig=sig, expires=expires)
def validate(self, client):
super(SwiftStorage, self).validate(client)
if self._temp_url_key:
# Generate a direct download URL.
dd_url = self.get_direct_download_url('_verify')
if not dd_url:
raise Exception('Could not validate direct download URL; the token may be invalid.')
# Try to retrieve the direct download URL.
response = client.get(dd_url, timeout=2)
if response.status_code != 200:
logger.debug('Direct download failure: %s => %s with body %s', dd_url,
response.status_code, response.text)
msg = 'Direct download URL failed with status code %s. Please check your temp-url-key.'
raise Exception(msg % response.status_code)
def get_content(self, path):
return self._get_object(path)
def put_content(self, path, content):
self._put_object(path, content)
def stream_read(self, path):
for data in self._get_object(path, self.buffer_size):
yield data
def stream_read_file(self, path):
return GeneratorFile(self.stream_read(path))
def stream_write(self, path, fp, content_type=None, content_encoding=None):
self._put_object(path, fp, self.buffer_size, content_type=content_type,
content_encoding=content_encoding)
def exists(self, path):
return bool(self._head_object(path))
def remove(self, path):
# Retrieve the object so we can see if it is segmented. If so, we'll delete its segments after
# removing the object.
try:
headers = self._head_object(path)
except ClientException as ex:
logger.exception('Could not head for delete of path %s: %s', path, str(ex))
raise IOError('Cannot delete path: %s' % path)
logger.debug('Found headers for path %s to delete: %s', path, headers)
# Delete the path itself.
path = self._normalize_path(path)
try:
self._get_connection().delete_object(self._swift_container, path)
except ClientException as ex:
logger.exception('Could not delete path %s: %s', path, str(ex))
raise IOError('Cannot delete path: %s' % path)
# Delete the segments.
object_manifest = headers.get('x-object-manifest', headers.get('X-Object-Manifest'))
if object_manifest is not None:
logger.debug('Found DLO for path %s: %s', path, object_manifest)
# Remove the container name from the beginning.
container_name, prefix_path = object_manifest.split('/', 1)
if container_name != self._swift_container:
logger.error('Expected container name %s, found path %s', self._swift_container,
prefix_path)
raise Exception("How did we end up with an invalid container name?")
logger.debug('Loading Dynamic Large Object segments for path prefix %s', prefix_path)
try:
_, container_objects = self._get_connection().get_container(self._swift_container,
full_listing=True,
prefix=prefix_path)
except ClientException as ex:
logger.exception('Could not load objects with prefix path %s: %s', prefix_path, str(ex))
raise IOError('Cannot load path: %s' % prefix_path)
logger.debug('Found Dynamic Large Object segments for path prefix %s: %s', prefix_path,
len(container_objects))
for obj in container_objects:
try:
logger.debug('Deleting Dynamic Large Object segment %s for path prefix %s', obj['name'],
prefix_path)
self._get_connection().delete_object(self._swift_container, obj['name'])
except ClientException as ex:
logger.exception('Could not delete object with path %s: %s', obj['name'], str(ex))
raise IOError('Cannot delete path: %s' % obj['name'])
def _random_checksum(self, count):
chars = string.ascii_uppercase + string.digits
return ''.join(SystemRandom().choice(chars) for _ in range(count))
def get_checksum(self, path):
headers = self._head_object(path)
if not headers:
raise IOError('Cannot lookup path: %s' % path)
return headers.get('etag', '')[1:-1][:7] or self._random_checksum(7)
@staticmethod
def _segment_list_from_metadata(storage_metadata, key=_SEGMENTS_KEY):
return [_PartUploadMetadata(*segment_args) for segment_args in storage_metadata[key]]
def initiate_chunked_upload(self):
random_uuid = str(uuid4())
metadata = {
_SEGMENTS_KEY: [],
_EMPTY_SEGMENTS_KEY: [],
}
return random_uuid, metadata
def stream_upload_chunk(self, uuid, offset, length, in_fp, storage_metadata, content_type=None):
if length == 0:
return 0, storage_metadata, None
# Note: Swift limits segments in size, so we need to sub-divide chunks into segments
# based on the configured maximum.
total_bytes_written = 0
upload_error = None
read_until_end = length == filelike.READ_UNTIL_END
while True:
try:
bytes_written, storage_metadata = self._stream_upload_segment(uuid, offset, length, in_fp,
storage_metadata,
content_type)
except IOError as ex:
message = ('Error writing to stream in stream_upload_chunk for uuid %s (offset %s' +
', length %s, metadata: %s): %s')
logger.exception(message, uuid, offset, length, storage_metadata, ex)
upload_error = ex
break
if not read_until_end:
length = length - bytes_written
offset = offset + bytes_written
total_bytes_written = total_bytes_written + bytes_written
if bytes_written == 0 or (not read_until_end and length <= 0):
return total_bytes_written, storage_metadata, upload_error
return total_bytes_written, storage_metadata, upload_error
def _stream_upload_segment(self, uuid, offset, length, in_fp, storage_metadata, content_type):
updated_metadata = copy.deepcopy(storage_metadata)
segment_count = len(updated_metadata[_SEGMENTS_KEY])
segment_path = '%s/%s/%s' % (_SEGMENT_DIRECTORY, uuid, '%09d' % segment_count)
# Track the number of bytes read and if an explicit length is specified, limit the
# file stream to that length.
if length == filelike.READ_UNTIL_END:
length = _MAXIMUM_SEGMENT_SIZE
else:
length = min(_MAXIMUM_SEGMENT_SIZE, length)
limiting_fp = filelike.LimitingStream(in_fp, length)
# If retries are requested, then we need to use a buffered reader to allow for calls to
# seek() on retries from within the Swift client.
if self._retry_count > 0:
limiting_fp = BufferedReader(limiting_fp, buffer_size=length)
# Write the segment to Swift.
self.stream_write(segment_path, limiting_fp, content_type)
# We are only going to track keys to which data was confirmed written.
bytes_written = limiting_fp.tell()
if bytes_written > 0:
updated_metadata[_SEGMENTS_KEY].append(_PartUploadMetadata(segment_path, offset,
bytes_written))
else:
updated_metadata[_EMPTY_SEGMENTS_KEY].append(_PartUploadMetadata(segment_path, offset,
bytes_written))
return bytes_written, updated_metadata
def complete_chunked_upload(self, uuid, final_path, storage_metadata):
""" Complete the chunked upload and store the final results in the path indicated.
Returns nothing.
"""
# Check all potentially empty segments against the segments that were uploaded; if the path
# is still empty, then we queue the segment to be deleted.
if self._context.chunk_cleanup_queue is not None:
nonempty_segments = SwiftStorage._segment_list_from_metadata(storage_metadata,
key=_SEGMENTS_KEY)
potentially_empty_segments = SwiftStorage._segment_list_from_metadata(storage_metadata,
key=_EMPTY_SEGMENTS_KEY)
nonempty_paths = set([segment.path for segment in nonempty_segments])
for segment in potentially_empty_segments:
if segment.path in nonempty_paths:
continue
# Queue the chunk to be deleted, as it is empty and therefore unused.
self._context.chunk_cleanup_queue.put(
['segment/%s/%s' % (self._context.location, uuid)],
json.dumps({
'location': self._context.location,
'uuid': uuid,
'path': segment.path,
}), available_after=_CHUNK_CLEANUP_DELAY)
# Finally, we write an empty file at the proper location with a X-Object-Manifest
# header pointing to the prefix for the segments.
segments_prefix_path = self._normalize_path('%s/%s' % (_SEGMENT_DIRECTORY, uuid))
contained_segments_prefix_path = '%s/%s' % (self._swift_container, segments_prefix_path)
self._put_object(final_path, '', headers={'X-Object-Manifest': contained_segments_prefix_path})
def cancel_chunked_upload(self, uuid, storage_metadata):
""" Cancel the chunked upload and clean up any outstanding partially uploaded data.
Returns nothing.
"""
if not self._context.chunk_cleanup_queue:
return
segments = list(SwiftStorage._segment_list_from_metadata(storage_metadata,
key=_SEGMENTS_KEY))
segments.extend(SwiftStorage._segment_list_from_metadata(storage_metadata,
key=_EMPTY_SEGMENTS_KEY))
# Queue all the uploaded segments to be deleted.
for segment in segments:
# Queue the chunk to be deleted.
self._context.chunk_cleanup_queue.put(
['segment/%s/%s' % (self._context.location, uuid)],
json.dumps({
'location': self._context.location,
'uuid': uuid,
'path': segment.path,
}), available_after=_CHUNK_CLEANUP_DELAY)
def copy_to(self, destination, path):
if (self.__class__ == destination.__class__ and
self._swift_user == destination._swift_user and
self._swift_password == destination._swift_password and
self._auth_url == destination._auth_url and
self._auth_version == destination._auth_version):
logger.debug('Copying file from swift %s to swift %s via a Swift copy',
self._swift_container, destination)
normalized_path = self._normalize_path(path)
target = '/%s/%s' % (destination._swift_container, normalized_path)
try:
self._get_connection().copy_object(self._swift_container, normalized_path, target)
except ClientException as ex:
logger.exception('Could not swift copy path %s: %s', path, ex)
raise IOError('Failed to swift copy path %s' % path)
return
# Fallback to a slower, default copy.
logger.debug('Copying file from swift %s to %s via a streamed copy', self._swift_container,
destination)
with self.stream_read_file(path) as fp:
destination.stream_write(path, fp)

217
storage/test/test_azure.py Normal file
View file

@ -0,0 +1,217 @@
import base64
import md5
import pytest
import io
from contextlib import contextmanager
from urlparse import parse_qs, urlparse
from httmock import urlmatch, HTTMock
from xml.dom import minidom
from azure.storage.blob import BlockBlobService
from storage.azurestorage import AzureStorage
@contextmanager
def fake_azure_storage(files=None):
service = BlockBlobService(is_emulated=True)
endpoint = service.primary_endpoint.split('/')
container_name = 'somecontainer'
files = files if files is not None else {}
container_prefix = '/' + endpoint[1] + '/' + container_name
@urlmatch(netloc=endpoint[0], path=container_prefix + '$')
def get_container(url, request):
return {'status_code': 200, 'content': '{}'}
@urlmatch(netloc=endpoint[0], path=container_prefix + '/.+')
def container_file(url, request):
filename = url.path[len(container_prefix)+1:]
if request.method == 'GET' or request.method == 'HEAD':
return {
'status_code': 200 if filename in files else 404,
'content': files.get(filename),
'headers': {
'ETag': 'foobar',
},
}
if request.method == 'DELETE':
files.pop(filename)
return {
'status_code': 201,
'content': '',
'headers': {
'ETag': 'foobar',
},
}
if request.method == 'PUT':
query_params = parse_qs(url.query)
if query_params.get('comp') == ['properties']:
return {
'status_code': 201,
'content': '{}',
'headers': {
'x-ms-request-server-encrypted': "false",
'last-modified': 'Wed, 21 Oct 2015 07:28:00 GMT',
}
}
if query_params.get('comp') == ['block']:
block_id = query_params['blockid'][0]
files[filename] = files.get(filename) or {}
files[filename][block_id] = request.body
return {
'status_code': 201,
'content': '{}',
'headers': {
'Content-MD5': base64.b64encode(md5.new(request.body).digest()),
'ETag': 'foo',
'x-ms-request-server-encrypted': "false",
'last-modified': 'Wed, 21 Oct 2015 07:28:00 GMT',
}
}
if query_params.get('comp') == ['blocklist']:
parsed = minidom.parseString(request.body)
latest = parsed.getElementsByTagName('Latest')
combined = []
for latest_block in latest:
combined.append(files[filename][latest_block.childNodes[0].data])
files[filename] = ''.join(combined)
return {
'status_code': 201,
'content': '{}',
'headers': {
'Content-MD5': base64.b64encode(md5.new(files[filename]).digest()),
'ETag': 'foo',
'x-ms-request-server-encrypted': "false",
'last-modified': 'Wed, 21 Oct 2015 07:28:00 GMT',
}
}
if request.headers.get('x-ms-copy-source'):
copy_source = request.headers['x-ms-copy-source']
copy_path = urlparse(copy_source).path[len(container_prefix) + 1:]
files[filename] = files[copy_path]
return {
'status_code': 201,
'content': '{}',
'headers': {
'x-ms-request-server-encrypted': "false",
'x-ms-copy-status': 'success',
'last-modified': 'Wed, 21 Oct 2015 07:28:00 GMT',
}
}
files[filename] = request.body
return {
'status_code': 201,
'content': '{}',
'headers': {
'Content-MD5': base64.b64encode(md5.new(request.body).digest()),
'ETag': 'foo',
'x-ms-request-server-encrypted': "false",
'last-modified': 'Wed, 21 Oct 2015 07:28:00 GMT',
}
}
return {'status_code': 405, 'content': ''}
@urlmatch(netloc=endpoint[0], path='.+')
def catchall(url, request):
return {'status_code': 405, 'content': ''}
with HTTMock(get_container, container_file, catchall):
yield AzureStorage(None, 'somecontainer', '', 'someaccount', is_emulated=True)
def test_validate():
with fake_azure_storage() as s:
s.validate(None)
def test_basics():
with fake_azure_storage() as s:
s.put_content('hello', 'hello world')
assert s.exists('hello')
assert s.get_content('hello') == 'hello world'
assert s.get_checksum('hello')
assert ''.join(list(s.stream_read('hello'))) == 'hello world'
assert s.stream_read_file('hello').read() == 'hello world'
s.remove('hello')
assert not s.exists('hello')
def test_does_not_exist():
with fake_azure_storage() as s:
assert not s.exists('hello')
with pytest.raises(IOError):
s.get_content('hello')
with pytest.raises(IOError):
s.get_checksum('hello')
with pytest.raises(IOError):
list(s.stream_read('hello'))
with pytest.raises(IOError):
s.stream_read_file('hello')
def test_stream_write():
fp = io.BytesIO()
fp.write('hello world!')
fp.seek(0)
with fake_azure_storage() as s:
s.stream_write('hello', fp)
assert s.get_content('hello') == 'hello world!'
@pytest.mark.parametrize('chunk_size', [
(1),
(5),
(10),
])
def test_chunked_uploading(chunk_size):
with fake_azure_storage() as s:
string_data = 'hello world!'
chunks = [string_data[index:index+chunk_size] for index in range(0, len(string_data), chunk_size)]
uuid, metadata = s.initiate_chunked_upload()
start_index = 0
for chunk in chunks:
fp = io.BytesIO()
fp.write(chunk)
fp.seek(0)
total_bytes_written, metadata, error = s.stream_upload_chunk(uuid, start_index, -1, fp,
metadata)
assert total_bytes_written == len(chunk)
assert metadata
assert not error
start_index += total_bytes_written
s.complete_chunked_upload(uuid, 'chunked', metadata)
assert s.get_content('chunked') == string_data
def test_get_direct_download_url():
with fake_azure_storage() as s:
s.put_content('hello', 'world')
assert 'sig' in s.get_direct_download_url('hello')
def test_copy_to():
files = {}
with fake_azure_storage(files=files) as s:
s.put_content('hello', 'hello world')
with fake_azure_storage(files=files) as s2:
s.copy_to(s2, 'hello')
assert s2.exists('hello')

View file

@ -0,0 +1,258 @@
import os
from StringIO import StringIO
import pytest
import moto
import boto
from moto import mock_s3_deprecated as mock_s3
from storage import S3Storage, StorageContext
from storage.cloud import _CloudStorage, _PartUploadMetadata
from storage.cloud import _CHUNKS_KEY
_TEST_CONTENT = os.urandom(1024)
_TEST_BUCKET = 'some_bucket'
_TEST_USER = 'someuser'
_TEST_PASSWORD = 'somepassword'
_TEST_PATH = 'some/cool/path'
_TEST_CONTEXT = StorageContext('nyc', None, None, None, None)
@pytest.fixture(scope='function')
def storage_engine():
with mock_s3():
# Create a test bucket and put some test content.
boto.connect_s3().create_bucket(_TEST_BUCKET)
engine = S3Storage(_TEST_CONTEXT, 'some/path', _TEST_BUCKET, _TEST_USER, _TEST_PASSWORD)
engine.put_content(_TEST_PATH, _TEST_CONTENT)
yield engine
def test_basicop(storage_engine):
# Ensure the content exists.
assert storage_engine.exists(_TEST_PATH)
# Verify it can be retrieved.
assert storage_engine.get_content(_TEST_PATH) == _TEST_CONTENT
# Retrieve a checksum for the content.
storage_engine.get_checksum(_TEST_PATH)
# Remove the file.
storage_engine.remove(_TEST_PATH)
# Ensure it no longer exists.
with pytest.raises(IOError):
storage_engine.get_content(_TEST_PATH)
with pytest.raises(IOError):
storage_engine.get_checksum(_TEST_PATH)
assert not storage_engine.exists(_TEST_PATH)
@pytest.mark.parametrize('bucket, username, password', [
pytest.param(_TEST_BUCKET, _TEST_USER, _TEST_PASSWORD, id='same credentials'),
pytest.param('another_bucket', 'blech', 'password', id='different credentials'),
])
def test_copy(bucket, username, password, storage_engine):
# Copy the content to another engine.
another_engine = S3Storage(_TEST_CONTEXT, 'another/path', _TEST_BUCKET, _TEST_USER,
_TEST_PASSWORD)
boto.connect_s3().create_bucket('another_bucket')
storage_engine.copy_to(another_engine, _TEST_PATH)
# Verify it can be retrieved.
assert another_engine.get_content(_TEST_PATH) == _TEST_CONTENT
def test_copy_with_error(storage_engine):
another_engine = S3Storage(_TEST_CONTEXT, 'another/path', 'anotherbucket', 'foo',
'bar')
with pytest.raises(IOError):
storage_engine.copy_to(another_engine, _TEST_PATH)
def test_stream_read(storage_engine):
# Read the streaming content.
data = ''.join(storage_engine.stream_read(_TEST_PATH))
assert data == _TEST_CONTENT
def test_stream_read_file(storage_engine):
with storage_engine.stream_read_file(_TEST_PATH) as f:
assert f.read() == _TEST_CONTENT
def test_stream_write(storage_engine):
new_data = os.urandom(4096)
storage_engine.stream_write(_TEST_PATH, StringIO(new_data), content_type='Cool/Type')
assert storage_engine.get_content(_TEST_PATH) == new_data
def test_stream_write_error():
with mock_s3():
# Create an engine but not the bucket.
engine = S3Storage(_TEST_CONTEXT, 'some/path', _TEST_BUCKET, _TEST_USER, _TEST_PASSWORD)
# Attempt to write to the uncreated bucket, which should raise an error.
with pytest.raises(IOError):
engine.stream_write(_TEST_PATH, StringIO('hello world'), content_type='Cool/Type')
assert not engine.exists(_TEST_PATH)
@pytest.mark.parametrize('chunk_count', [
0,
1,
50,
])
@pytest.mark.parametrize('force_client_side', [
False,
True
])
def test_chunk_upload(storage_engine, chunk_count, force_client_side):
if chunk_count == 0 and force_client_side:
return
upload_id, metadata = storage_engine.initiate_chunked_upload()
final_data = ''
for index in range(0, chunk_count):
chunk_data = os.urandom(1024)
final_data = final_data + chunk_data
bytes_written, new_metadata, error = storage_engine.stream_upload_chunk(upload_id, 0,
len(chunk_data),
StringIO(chunk_data),
metadata)
metadata = new_metadata
assert bytes_written == len(chunk_data)
assert error is None
assert len(metadata[_CHUNKS_KEY]) == index + 1
# Complete the chunked upload.
storage_engine.complete_chunked_upload(upload_id, 'some/chunked/path', metadata,
force_client_side=force_client_side)
# Ensure the file contents are valid.
assert storage_engine.get_content('some/chunked/path') == final_data
@pytest.mark.parametrize('chunk_count', [
0,
1,
50,
])
def test_cancel_chunked_upload(storage_engine, chunk_count):
upload_id, metadata = storage_engine.initiate_chunked_upload()
for _ in range(0, chunk_count):
chunk_data = os.urandom(1024)
_, new_metadata, _ = storage_engine.stream_upload_chunk(upload_id, 0,
len(chunk_data),
StringIO(chunk_data),
metadata)
metadata = new_metadata
# Cancel the upload.
storage_engine.cancel_chunked_upload(upload_id, metadata)
# Ensure all chunks were deleted.
for chunk in metadata[_CHUNKS_KEY]:
assert not storage_engine.exists(chunk.path)
def test_large_chunks_upload(storage_engine):
# Make the max chunk size much smaller for testing.
storage_engine.maximum_chunk_size = storage_engine.minimum_chunk_size * 2
upload_id, metadata = storage_engine.initiate_chunked_upload()
# Write a "super large" chunk, to ensure that it is broken into smaller chunks.
chunk_data = os.urandom(int(storage_engine.maximum_chunk_size * 2.5))
bytes_written, new_metadata, _ = storage_engine.stream_upload_chunk(upload_id, 0,
-1,
StringIO(chunk_data),
metadata)
assert len(chunk_data) == bytes_written
# Complete the chunked upload.
storage_engine.complete_chunked_upload(upload_id, 'some/chunked/path', new_metadata)
# Ensure the file contents are valid.
assert len(chunk_data) == len(storage_engine.get_content('some/chunked/path'))
assert storage_engine.get_content('some/chunked/path') == chunk_data
def test_large_chunks_with_ragged_edge(storage_engine):
# Make the max chunk size much smaller for testing and force it to have a ragged edge.
storage_engine.maximum_chunk_size = storage_engine.minimum_chunk_size * 2 + 10
upload_id, metadata = storage_engine.initiate_chunked_upload()
# Write a few "super large" chunks, to ensure that it is broken into smaller chunks.
all_data = ''
for _ in range(0, 2):
chunk_data = os.urandom(int(storage_engine.maximum_chunk_size) + 20)
bytes_written, new_metadata, _ = storage_engine.stream_upload_chunk(upload_id, 0,
-1,
StringIO(chunk_data),
metadata)
assert len(chunk_data) == bytes_written
all_data = all_data + chunk_data
metadata = new_metadata
# Complete the chunked upload.
storage_engine.complete_chunked_upload(upload_id, 'some/chunked/path', new_metadata)
# Ensure the file contents are valid.
assert len(all_data) == len(storage_engine.get_content('some/chunked/path'))
assert storage_engine.get_content('some/chunked/path') == all_data
@pytest.mark.parametrize('max_size, parts', [
(50, [
_PartUploadMetadata('foo', 0, 50),
_PartUploadMetadata('foo', 50, 50),
]),
(40, [
_PartUploadMetadata('foo', 0, 25),
_PartUploadMetadata('foo', 25, 25),
_PartUploadMetadata('foo', 50, 25),
_PartUploadMetadata('foo', 75, 25)
]),
(51, [
_PartUploadMetadata('foo', 0, 50),
_PartUploadMetadata('foo', 50, 50),
]),
(49, [
_PartUploadMetadata('foo', 0, 25),
_PartUploadMetadata('foo', 25, 25),
_PartUploadMetadata('foo', 50, 25),
_PartUploadMetadata('foo', 75, 25),
]),
(99, [
_PartUploadMetadata('foo', 0, 50),
_PartUploadMetadata('foo', 50, 50),
]),
(100, [
_PartUploadMetadata('foo', 0, 100),
]),
])
def test_rechunked(max_size, parts):
chunk = _PartUploadMetadata('foo', 0, 100)
rechunked = list(_CloudStorage._rechunk(chunk, max_size))
assert len(rechunked) == len(parts)
for index, chunk in enumerate(rechunked):
assert chunk == parts[index]

View file

@ -0,0 +1,80 @@
import pytest
from contextlib import contextmanager
from mock import patch
from moto import mock_s3_deprecated as mock_s3
import boto
from app import config_provider
from storage import CloudFrontedS3Storage, StorageContext
from util.ipresolver import IPResolver
from util.ipresolver.test.test_ipresolver import test_aws_ip, aws_ip_range_data, test_ip_range_cache
from test.fixtures import *
_TEST_CONTENT = os.urandom(1024)
_TEST_BUCKET = 'some_bucket'
_TEST_USER = 'someuser'
_TEST_PASSWORD = 'somepassword'
_TEST_PATH = 'some/cool/path'
@pytest.fixture(params=[True, False])
def ipranges_populated(request):
return request.param
@pytest.fixture()
def test_empty_ip_range_cache(empty_range_data):
sync_token = empty_range_data['syncToken']
all_amazon = IPResolver._parse_amazon_ranges(empty_range_data)
fake_cache = {
'sync_token': sync_token,
}
return fake_cache
@pytest.fixture()
def empty_range_data():
empty_range_data = {
'syncToken': 123456789,
'prefixes': [],
}
return empty_range_data
@mock_s3
def test_direct_download(test_aws_ip, test_empty_ip_range_cache, test_ip_range_cache, aws_ip_range_data, ipranges_populated, app):
ipresolver = IPResolver(app)
if ipranges_populated:
ipresolver.sync_token = test_ip_range_cache['sync_token'] if ipranges_populated else test_empty_ip_range_cache['sync_token']
ipresolver.amazon_ranges = test_ip_range_cache['all_amazon'] if ipranges_populated else test_empty_ip_range_cache['all_amazon']
context = StorageContext('nyc', None, None, config_provider, ipresolver)
# Create a test bucket and put some test content.
boto.connect_s3().create_bucket(_TEST_BUCKET)
engine = CloudFrontedS3Storage(context, 'cloudfrontdomain', 'keyid', 'test/data/test.pem', 'some/path',
_TEST_BUCKET, _TEST_USER, _TEST_PASSWORD)
engine.put_content(_TEST_PATH, _TEST_CONTENT)
assert engine.exists(_TEST_PATH)
# Request a direct download URL for a request from a known AWS IP, and ensure we are returned an S3 URL.
assert 's3.amazonaws.com' in engine.get_direct_download_url(_TEST_PATH, test_aws_ip)
if ipranges_populated:
# Request a direct download URL for a request from a non-AWS IP, and ensure we are returned a CloudFront URL.
assert 'cloudfrontdomain' in engine.get_direct_download_url(_TEST_PATH, '1.2.3.4')
else:
# Request a direct download URL for a request from a non-AWS IP, but since IP Ranges isn't populated, we still
# get back an S3 URL.
assert 's3.amazonaws.com' in engine.get_direct_download_url(_TEST_PATH, '1.2.3.4')
@mock_s3
def test_direct_download_no_ip(test_aws_ip, aws_ip_range_data, ipranges_populated, app):
ipresolver = IPResolver(app)
context = StorageContext('nyc', None, None, config_provider, ipresolver)
# Create a test bucket and put some test content.
boto.connect_s3().create_bucket(_TEST_BUCKET)
engine = CloudFrontedS3Storage(context, 'cloudfrontdomain', 'keyid', 'test/data/test.pem', 'some/path',
_TEST_BUCKET, _TEST_USER, _TEST_PASSWORD)
engine.put_content(_TEST_PATH, _TEST_CONTENT)
assert engine.exists(_TEST_PATH)
assert 's3.amazonaws.com' in engine.get_direct_download_url(_TEST_PATH)

View file

@ -0,0 +1,95 @@
import os
import pytest
import requests
from flask import Flask
from flask_testing import LiveServerTestCase
from storage import Storage
from util.security.instancekeys import InstanceKeys
from test.registry.liveserverfixture import *
from test.fixtures import *
@pytest.fixture(params=[True, False])
def is_proxying_enabled(request):
return request.param
@pytest.fixture()
def server_executor(app):
def reload_app(server_hostname):
# Close any existing connection.
close_db_filter(None)
# Reload the database config.
app.config['SERVER_HOSTNAME'] = server_hostname[len('http://'):]
configure(app.config)
return 'OK'
executor = LiveServerExecutor()
executor.register('reload_app', reload_app)
return executor
@pytest.fixture()
def liveserver_app(app, server_executor, init_db_path, is_proxying_enabled):
server_executor.apply_blueprint_to_app(app)
if os.environ.get('DEBUG') == 'true':
app.config['DEBUG'] = True
app.config['TESTING'] = True
app.config['INSTANCE_SERVICE_KEY_KID_LOCATION'] = 'test/data/test.kid'
app.config['INSTANCE_SERVICE_KEY_LOCATION'] = 'test/data/test.pem'
app.config['INSTANCE_SERVICE_KEY_SERVICE'] = 'quay'
app.config['FEATURE_PROXY_STORAGE'] = is_proxying_enabled
app.config['DISTRIBUTED_STORAGE_CONFIG'] = {
'test': ['FakeStorage', {}],
}
app.config['DISTRIBUTED_STORAGE_PREFERENCE'] = ['test']
return app
@pytest.fixture()
def instance_keys(liveserver_app):
return InstanceKeys(liveserver_app)
@pytest.fixture()
def storage(liveserver_app, instance_keys):
return Storage(liveserver_app, instance_keys=instance_keys)
@pytest.fixture()
def app_reloader(liveserver, server_executor):
server_executor.on(liveserver).reload_app(liveserver.url)
yield
@pytest.mark.skipif(os.environ.get('TEST_DATABASE_URI') is not None,
reason="not supported for non SQLite testing")
def test_storage_proxy_auth(storage, liveserver_app, liveserver_session, is_proxying_enabled,
app_reloader):
# Activate direct download on the fake storage.
storage.put_content(['test'], 'supports_direct_download', 'true')
# Get the unwrapped URL.
direct_download_url = storage.get_direct_download_url(['test'], 'somepath')
proxy_index = direct_download_url.find('/_storage_proxy/')
if is_proxying_enabled:
assert proxy_index >= 0
else:
assert proxy_index == -1
# Ensure that auth returns the expected value.
headers = {
'X-Original-URI': direct_download_url[proxy_index:] if proxy_index else 'someurihere'
}
resp = liveserver_session.get('_storage_proxy_auth', headers=headers)
assert resp.status_code == (500 if not is_proxying_enabled else 200)

327
storage/test/test_swift.py Normal file
View file

@ -0,0 +1,327 @@
import io
import pytest
import hashlib
import copy
from collections import defaultdict
from mock import MagicMock, patch
from storage import StorageContext
from storage.swift import SwiftStorage, _EMPTY_SEGMENTS_KEY
from swiftclient.client import ClientException
base_args = {
'context': StorageContext('nyc', None, None, None, None),
'swift_container': 'container-name',
'storage_path': '/basepath',
'auth_url': 'https://auth.com',
'swift_user': 'root',
'swift_password': 'password',
}
class MockSwiftStorage(SwiftStorage):
def __init__(self, *args, **kwargs):
super(MockSwiftStorage, self).__init__(*args, **kwargs)
self._connection = MagicMock()
def _get_connection(self):
return self._connection
class FakeSwiftStorage(SwiftStorage):
def __init__(self, fail_checksum=False,connection=None, *args, **kwargs):
super(FakeSwiftStorage, self).__init__(*args, **kwargs)
self._connection = connection or FakeSwift(fail_checksum=fail_checksum,
temp_url_key=kwargs.get('temp_url_key'))
def _get_connection(self):
return self._connection
class FakeSwift(object):
def __init__(self, fail_checksum=False, temp_url_key=None):
self.containers = defaultdict(dict)
self.fail_checksum = fail_checksum
self.temp_url_key = temp_url_key
def get_auth(self):
if self.temp_url_key == 'exception':
raise ClientException('I failed!')
return 'http://fake/swift', None
def head_object(self, container, path):
return self.containers.get(container, {}).get(path, {}).get('headers', None)
def copy_object(self, container, path, target):
pieces = target.split('/', 2)
_, content = self.get_object(container, path)
self.put_object(pieces[1], pieces[2], content)
def get_container(self, container, prefix=None, full_listing=None):
container_entries = self.containers[container]
objs = []
for path, data in list(container_entries.iteritems()):
if not prefix or path.startswith(prefix):
objs.append({
'name': path,
'bytes': len(data['content']),
})
return {}, objs
def put_object(self, container, path, content, chunk_size=None, content_type=None, headers=None):
if not isinstance(content, str):
if hasattr(content, 'read'):
content = content.read()
else:
content = ''.join(content)
self.containers[container][path] = {
'content': content,
'chunk_size': chunk_size,
'content_type': content_type,
'headers': headers or {'is': True},
}
digest = hashlib.md5()
digest.update(content)
return digest.hexdigest() if not self.fail_checksum else 'invalid'
def get_object(self, container, path, resp_chunk_size=None):
data = self.containers[container].get(path, {})
if 'X-Object-Manifest' in data['headers']:
new_contents = []
prefix = data['headers']['X-Object-Manifest']
for key, value in self.containers[container].iteritems():
if ('container-name/' + key).startswith(prefix):
new_contents.append((key, value['content']))
new_contents.sort(key=lambda value: value[0])
data = dict(data)
data['content'] = ''.join([nc[1] for nc in new_contents])
return bool(data), data.get('content')
return bool(data), data.get('content')
def delete_object(self, container, path):
self.containers[container].pop(path, None)
class FakeQueue(object):
def __init__(self):
self.items = []
def get(self):
if not self.items:
return None
return self.items.pop()
def put(self, names, item, available_after=0):
self.items.append({
'names': names,
'item': item,
'available_after': available_after,
})
def test_fixed_path_concat():
swift = MockSwiftStorage(**base_args)
swift.exists('object/path')
swift._get_connection().head_object.assert_called_with('container-name', 'basepath/object/path')
def test_simple_path_concat():
simple_concat_args = dict(base_args)
simple_concat_args['simple_path_concat'] = True
swift = MockSwiftStorage(**simple_concat_args)
swift.exists('object/path')
swift._get_connection().head_object.assert_called_with('container-name', 'basepathobject/path')
def test_delete_unknown_path():
swift = SwiftStorage(**base_args)
with pytest.raises(IOError):
swift.remove('someunknownpath')
def test_simple_put_get():
swift = FakeSwiftStorage(**base_args)
assert not swift.exists('somepath')
swift.put_content('somepath', 'hello world!')
assert swift.exists('somepath')
assert swift.get_content('somepath') == 'hello world!'
def test_stream_read_write():
swift = FakeSwiftStorage(**base_args)
assert not swift.exists('somepath')
swift.stream_write('somepath', io.BytesIO('some content here'))
assert swift.exists('somepath')
assert swift.get_content('somepath') == 'some content here'
assert ''.join(list(swift.stream_read('somepath'))) == 'some content here'
def test_stream_read_write_invalid_checksum():
swift = FakeSwiftStorage(fail_checksum=True, **base_args)
assert not swift.exists('somepath')
with pytest.raises(IOError):
swift.stream_write('somepath', io.BytesIO('some content here'))
def test_remove():
swift = FakeSwiftStorage(**base_args)
assert not swift.exists('somepath')
swift.put_content('somepath', 'hello world!')
assert swift.exists('somepath')
swift.remove('somepath')
assert not swift.exists('somepath')
def test_copy_to():
swift = FakeSwiftStorage(**base_args)
modified_args = copy.deepcopy(base_args)
modified_args['swift_container'] = 'another_container'
another_swift = FakeSwiftStorage(connection=swift._connection, **modified_args)
swift.put_content('somepath', 'some content here')
swift.copy_to(another_swift, 'somepath')
assert swift.exists('somepath')
assert another_swift.exists('somepath')
assert swift.get_content('somepath') == 'some content here'
assert another_swift.get_content('somepath') == 'some content here'
def test_copy_to_different():
swift = FakeSwiftStorage(**base_args)
modified_args = copy.deepcopy(base_args)
modified_args['swift_user'] = 'foobarbaz'
modified_args['swift_container'] = 'another_container'
another_swift = FakeSwiftStorage(**modified_args)
swift.put_content('somepath', 'some content here')
swift.copy_to(another_swift, 'somepath')
assert swift.exists('somepath')
assert another_swift.exists('somepath')
assert swift.get_content('somepath') == 'some content here'
assert another_swift.get_content('somepath') == 'some content here'
def test_checksum():
swift = FakeSwiftStorage(**base_args)
swift.put_content('somepath', 'hello world!')
assert swift.get_checksum('somepath') is not None
@pytest.mark.parametrize('read_until_end', [
(True,),
(False,),
])
@pytest.mark.parametrize('max_chunk_size', [
(10000000),
(10),
(5),
(2),
(1),
])
@pytest.mark.parametrize('chunks', [
(['this', 'is', 'some', 'chunked', 'data', '']),
(['this is a very large chunk of data', '']),
(['h', 'e', 'l', 'l', 'o', '']),
])
def test_chunked_upload(chunks, max_chunk_size, read_until_end):
swift = FakeSwiftStorage(**base_args)
uuid, metadata = swift.initiate_chunked_upload()
offset = 0
with patch('storage.swift._MAXIMUM_SEGMENT_SIZE', max_chunk_size):
for chunk in chunks:
chunk_length = len(chunk) if not read_until_end else -1
bytes_written, metadata, error = swift.stream_upload_chunk(uuid, offset, chunk_length,
io.BytesIO(chunk), metadata)
assert error is None
assert len(chunk) == bytes_written
offset += len(chunk)
swift.complete_chunked_upload(uuid, 'somepath', metadata)
assert swift.get_content('somepath') == ''.join(chunks)
# Ensure each of the segments exist.
for segment in metadata['segments']:
assert swift.exists(segment.path)
# Delete the file and ensure all of its segments were removed.
swift.remove('somepath')
assert not swift.exists('somepath')
for segment in metadata['segments']:
assert not swift.exists(segment.path)
def test_cancel_chunked_upload():
chunk_cleanup_queue = FakeQueue()
args = dict(base_args)
args['context'] = StorageContext('nyc', None, chunk_cleanup_queue, None, None)
swift = FakeSwiftStorage(**args)
uuid, metadata = swift.initiate_chunked_upload()
chunks = ['this', 'is', 'some', 'chunked', 'data', '']
offset = 0
for chunk in chunks:
bytes_written, metadata, error = swift.stream_upload_chunk(uuid, offset, len(chunk),
io.BytesIO(chunk), metadata)
assert error is None
assert len(chunk) == bytes_written
offset += len(chunk)
swift.cancel_chunked_upload(uuid, metadata)
found = chunk_cleanup_queue.get()
assert found is not None
def test_empty_chunks_queued_for_deletion():
chunk_cleanup_queue = FakeQueue()
args = dict(base_args)
args['context'] = StorageContext('nyc', None, chunk_cleanup_queue, None, None)
swift = FakeSwiftStorage(**args)
uuid, metadata = swift.initiate_chunked_upload()
chunks = ['this', '', 'is', 'some', '', 'chunked', 'data', '']
offset = 0
for chunk in chunks:
length = len(chunk)
if length == 0:
length = 1
bytes_written, metadata, error = swift.stream_upload_chunk(uuid, offset, length,
io.BytesIO(chunk), metadata)
assert error is None
assert len(chunk) == bytes_written
offset += len(chunk)
swift.complete_chunked_upload(uuid, 'somepath', metadata)
assert ''.join(chunks) == swift.get_content('somepath')
# Check the chunk deletion queue and ensure we have the last chunk queued.
found = chunk_cleanup_queue.get()
assert found is not None
found2 = chunk_cleanup_queue.get()
assert found2 is None
@pytest.mark.parametrize('temp_url_key, expects_url', [
(None, False),
('foobarbaz', True),
('exception', False),
])
def test_get_direct_download_url(temp_url_key, expects_url):
swift = FakeSwiftStorage(temp_url_key=temp_url_key, **base_args)
swift.put_content('somepath', 'hello world!')
assert (swift.get_direct_download_url('somepath') is not None) == expects_url