2013-09-25 21:50:03 +00:00
|
|
|
import cStringIO as StringIO
|
|
|
|
import os
|
2013-10-20 06:39:23 +00:00
|
|
|
import logging
|
2015-09-02 21:31:44 +00:00
|
|
|
import copy
|
2013-09-25 21:50:03 +00:00
|
|
|
|
2017-09-26 20:08:50 +00:00
|
|
|
from cryptography.hazmat.backends import default_backend
|
|
|
|
from cryptography.hazmat.primitives import hashes
|
|
|
|
from cryptography.hazmat.primitives import serialization
|
|
|
|
from cryptography.hazmat.primitives.asymmetric import padding
|
|
|
|
|
|
|
|
from cachetools import lru_cache
|
2016-09-27 10:23:32 +00:00
|
|
|
from itertools import chain
|
|
|
|
|
2017-09-26 20:08:50 +00:00
|
|
|
from datetime import datetime, timedelta
|
|
|
|
|
|
|
|
from botocore.signers import CloudFrontSigner
|
2016-08-18 15:56:23 +00:00
|
|
|
from boto.exception import S3ResponseError
|
2013-09-25 21:50:03 +00:00
|
|
|
import boto.s3.connection
|
2015-08-26 21:08:42 +00:00
|
|
|
import boto.s3.multipart
|
2014-08-12 06:06:44 +00:00
|
|
|
import boto.gs.connection
|
2013-09-25 21:50:03 +00:00
|
|
|
import boto.s3.key
|
2014-08-12 06:06:44 +00:00
|
|
|
import boto.gs.key
|
2013-09-25 21:50:03 +00:00
|
|
|
|
2014-09-10 02:28:25 +00:00
|
|
|
from io import BufferedIOBase
|
2015-08-26 21:08:42 +00:00
|
|
|
from uuid import uuid4
|
2015-09-02 21:31:44 +00:00
|
|
|
from collections import namedtuple
|
|
|
|
|
|
|
|
from util.registry import filelike
|
2016-06-28 18:36:17 +00:00
|
|
|
from storage.basestorage import BaseStorageV2
|
2013-09-25 21:50:03 +00:00
|
|
|
|
|
|
|
|
2013-10-20 06:39:23 +00:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
2015-09-02 21:31:44 +00:00
|
|
|
_PartUploadMetadata = namedtuple('_PartUploadMetadata', ['path', 'offset', 'length'])
|
|
|
|
_CHUNKS_KEY = 'chunks'
|
2015-08-26 21:08:42 +00:00
|
|
|
|
|
|
|
|
2014-09-09 22:30:14 +00:00
|
|
|
class StreamReadKeyAsFile(BufferedIOBase):
|
2013-10-31 15:32:08 +00:00
|
|
|
def __init__(self, key):
|
|
|
|
self._key = key
|
2013-10-20 06:39:23 +00:00
|
|
|
|
2013-10-31 15:32:08 +00:00
|
|
|
def read(self, amt=None):
|
2014-09-10 02:28:25 +00:00
|
|
|
if self.closed:
|
2013-10-31 15:32:08 +00:00
|
|
|
return None
|
2013-10-20 06:39:23 +00:00
|
|
|
|
2013-10-31 15:32:08 +00:00
|
|
|
resp = self._key.read(amt)
|
|
|
|
return resp
|
2013-10-20 06:39:23 +00:00
|
|
|
|
2014-09-09 22:30:14 +00:00
|
|
|
def readable(self):
|
|
|
|
return True
|
|
|
|
|
|
|
|
@property
|
|
|
|
def closed(self):
|
|
|
|
return self._key.closed
|
|
|
|
|
|
|
|
def close(self):
|
|
|
|
self._key.close(fast=True)
|
|
|
|
|
2013-10-20 06:39:23 +00:00
|
|
|
|
2015-08-26 21:08:42 +00:00
|
|
|
class _CloudStorage(BaseStorageV2):
|
2016-11-10 18:54:04 +00:00
|
|
|
def __init__(self, context, connection_class, key_class, connect_kwargs, upload_params,
|
2016-08-11 21:17:36 +00:00
|
|
|
storage_path, bucket_name, access_key=None, secret_key=None):
|
2015-08-26 21:08:42 +00:00
|
|
|
super(_CloudStorage, self).__init__()
|
|
|
|
|
2016-09-27 10:23:32 +00:00
|
|
|
self.minimum_chunk_size = 5 * 1024 * 1024
|
|
|
|
self.maximum_chunk_size = None
|
2015-08-26 21:08:42 +00:00
|
|
|
|
2013-12-04 00:39:07 +00:00
|
|
|
self._initialized = False
|
2014-08-12 06:06:44 +00:00
|
|
|
self._bucket_name = bucket_name
|
|
|
|
self._access_key = access_key
|
|
|
|
self._secret_key = secret_key
|
2013-10-31 15:32:08 +00:00
|
|
|
self._root_path = storage_path
|
2014-08-12 06:06:44 +00:00
|
|
|
self._connection_class = connection_class
|
|
|
|
self._key_class = key_class
|
|
|
|
self._upload_params = upload_params
|
2014-09-09 19:54:03 +00:00
|
|
|
self._connect_kwargs = connect_kwargs
|
2014-08-12 06:06:44 +00:00
|
|
|
self._cloud_conn = None
|
|
|
|
self._cloud_bucket = None
|
2016-11-10 18:54:04 +00:00
|
|
|
self._context = context
|
2013-12-04 00:39:07 +00:00
|
|
|
|
2014-08-12 06:06:44 +00:00
|
|
|
def _initialize_cloud_conn(self):
|
2013-12-04 00:39:07 +00:00
|
|
|
if not self._initialized:
|
2014-09-09 19:54:03 +00:00
|
|
|
self._cloud_conn = self._connection_class(self._access_key, self._secret_key,
|
|
|
|
**self._connect_kwargs)
|
2017-07-26 22:14:53 +00:00
|
|
|
self._cloud_bucket = self._cloud_conn.get_bucket(self._bucket_name, validate=False)
|
2013-12-04 00:39:07 +00:00
|
|
|
self._initialized = True
|
2013-10-31 15:32:08 +00:00
|
|
|
|
|
|
|
def _debug_key(self, key):
|
|
|
|
"""Used for debugging only."""
|
|
|
|
orig_meth = key.bucket.connection.make_request
|
|
|
|
|
|
|
|
def new_meth(*args, **kwargs):
|
|
|
|
print '#' * 16
|
|
|
|
print args
|
|
|
|
print kwargs
|
|
|
|
print '#' * 16
|
|
|
|
return orig_meth(*args, **kwargs)
|
|
|
|
key.bucket.connection.make_request = new_meth
|
|
|
|
|
|
|
|
def _init_path(self, path=None):
|
|
|
|
path = os.path.join(self._root_path, path) if path else self._root_path
|
|
|
|
if path and path[0] == '/':
|
|
|
|
return path[1:]
|
|
|
|
return path
|
|
|
|
|
2015-01-16 21:10:40 +00:00
|
|
|
def get_cloud_conn(self):
|
|
|
|
self._initialize_cloud_conn()
|
|
|
|
return self._cloud_conn
|
|
|
|
|
|
|
|
def get_cloud_bucket(self):
|
|
|
|
return self._cloud_bucket
|
|
|
|
|
2013-10-31 15:32:08 +00:00
|
|
|
def get_content(self, path):
|
2014-08-12 06:06:44 +00:00
|
|
|
self._initialize_cloud_conn()
|
2013-10-31 15:32:08 +00:00
|
|
|
path = self._init_path(path)
|
2014-08-12 06:06:44 +00:00
|
|
|
key = self._key_class(self._cloud_bucket, path)
|
2013-10-31 15:32:08 +00:00
|
|
|
if not key.exists():
|
|
|
|
raise IOError('No such key: \'{0}\''.format(path))
|
|
|
|
return key.get_contents_as_string()
|
|
|
|
|
|
|
|
def put_content(self, path, content):
|
2014-08-12 06:06:44 +00:00
|
|
|
self._initialize_cloud_conn()
|
2013-10-31 15:32:08 +00:00
|
|
|
path = self._init_path(path)
|
2014-08-12 06:06:44 +00:00
|
|
|
key = self._key_class(self._cloud_bucket, path)
|
|
|
|
key.set_contents_from_string(content, **self._upload_params)
|
2013-10-31 15:32:08 +00:00
|
|
|
return path
|
|
|
|
|
2014-09-09 19:54:03 +00:00
|
|
|
def get_supports_resumable_downloads(self):
|
2014-07-02 04:39:59 +00:00
|
|
|
return True
|
|
|
|
|
2017-09-25 21:14:28 +00:00
|
|
|
def get_direct_download_url(self, path, request_ip=None, expires_in=60, requires_cors=False, head=False):
|
2014-08-12 06:06:44 +00:00
|
|
|
self._initialize_cloud_conn()
|
2013-12-04 00:39:07 +00:00
|
|
|
path = self._init_path(path)
|
2014-08-12 06:06:44 +00:00
|
|
|
k = self._key_class(self._cloud_bucket, path)
|
2016-02-11 22:00:38 +00:00
|
|
|
if head:
|
|
|
|
return k.generate_url(expires_in, 'HEAD')
|
2013-12-04 00:39:07 +00:00
|
|
|
return k.generate_url(expires_in)
|
|
|
|
|
2014-09-09 19:54:03 +00:00
|
|
|
def get_direct_upload_url(self, path, mime_type, requires_cors=True):
|
|
|
|
self._initialize_cloud_conn()
|
|
|
|
path = self._init_path(path)
|
|
|
|
key = self._key_class(self._cloud_bucket, path)
|
|
|
|
url = key.generate_url(300, 'PUT', headers={'Content-Type': mime_type}, encrypt_key=True)
|
|
|
|
return url
|
|
|
|
|
2013-10-31 15:32:08 +00:00
|
|
|
def stream_read(self, path):
|
2014-08-12 06:06:44 +00:00
|
|
|
self._initialize_cloud_conn()
|
2013-10-31 15:32:08 +00:00
|
|
|
path = self._init_path(path)
|
2014-08-12 06:06:44 +00:00
|
|
|
key = self._key_class(self._cloud_bucket, path)
|
2013-10-31 15:32:08 +00:00
|
|
|
if not key.exists():
|
|
|
|
raise IOError('No such key: \'{0}\''.format(path))
|
|
|
|
while True:
|
|
|
|
buf = key.read(self.buffer_size)
|
|
|
|
if not buf:
|
|
|
|
break
|
|
|
|
yield buf
|
|
|
|
|
|
|
|
def stream_read_file(self, path):
|
2014-08-12 06:06:44 +00:00
|
|
|
self._initialize_cloud_conn()
|
2013-10-31 15:32:08 +00:00
|
|
|
path = self._init_path(path)
|
2014-08-12 06:06:44 +00:00
|
|
|
key = self._key_class(self._cloud_bucket, path)
|
2013-10-31 15:32:08 +00:00
|
|
|
if not key.exists():
|
|
|
|
raise IOError('No such key: \'{0}\''.format(path))
|
|
|
|
return StreamReadKeyAsFile(key)
|
|
|
|
|
2015-08-26 21:08:42 +00:00
|
|
|
def __initiate_multipart_upload(self, path, content_type, content_encoding):
|
2013-10-31 15:32:08 +00:00
|
|
|
# Minimum size of upload part size on S3 is 5MB
|
2014-08-12 06:06:44 +00:00
|
|
|
self._initialize_cloud_conn()
|
2013-10-31 15:32:08 +00:00
|
|
|
path = self._init_path(path)
|
2014-09-09 20:52:53 +00:00
|
|
|
|
|
|
|
metadata = {}
|
|
|
|
if content_type is not None:
|
|
|
|
metadata['Content-Type'] = content_type
|
|
|
|
|
2014-09-11 19:33:10 +00:00
|
|
|
if content_encoding is not None:
|
|
|
|
metadata['Content-Encoding'] = content_encoding
|
|
|
|
|
2016-11-10 18:54:04 +00:00
|
|
|
if self._context.metric_queue is not None:
|
|
|
|
self._context.metric_queue.multipart_upload_start.Inc()
|
2016-03-22 22:16:48 +00:00
|
|
|
|
2015-08-26 21:08:42 +00:00
|
|
|
return self._cloud_bucket.initiate_multipart_upload(path, metadata=metadata,
|
|
|
|
**self._upload_params)
|
|
|
|
|
|
|
|
def stream_write(self, path, fp, content_type=None, content_encoding=None):
|
2015-12-10 04:16:33 +00:00
|
|
|
self._stream_write_internal(path, fp, content_type, content_encoding)
|
2015-09-02 21:31:44 +00:00
|
|
|
|
|
|
|
def _stream_write_internal(self, path, fp, content_type=None, content_encoding=None,
|
|
|
|
cancel_on_error=True, size=filelike.READ_UNTIL_END):
|
2016-07-20 21:53:43 +00:00
|
|
|
write_error = None
|
2015-08-26 21:08:42 +00:00
|
|
|
mp = self.__initiate_multipart_upload(path, content_type, content_encoding)
|
2015-09-02 21:31:44 +00:00
|
|
|
|
|
|
|
# We are going to reuse this but be VERY careful to only read the number of bytes written to it
|
|
|
|
buf = StringIO.StringIO()
|
|
|
|
|
2013-10-31 15:32:08 +00:00
|
|
|
num_part = 1
|
2015-09-02 21:31:44 +00:00
|
|
|
total_bytes_written = 0
|
|
|
|
while size == filelike.READ_UNTIL_END or total_bytes_written < size:
|
2016-09-27 10:23:32 +00:00
|
|
|
bytes_to_copy = self.minimum_chunk_size
|
2015-09-02 21:31:44 +00:00
|
|
|
if size != filelike.READ_UNTIL_END:
|
|
|
|
# We never want to ask for more bytes than our caller has indicated to copy
|
|
|
|
bytes_to_copy = min(bytes_to_copy, size - total_bytes_written)
|
|
|
|
|
|
|
|
buf.seek(0)
|
2013-10-31 15:32:08 +00:00
|
|
|
try:
|
2015-09-02 21:31:44 +00:00
|
|
|
# Stage the bytes into the buffer for use with the multipart upload file API
|
|
|
|
bytes_staged = self.stream_write_to_fp(fp, buf, bytes_to_copy)
|
|
|
|
if bytes_staged == 0:
|
2013-10-31 15:32:08 +00:00
|
|
|
break
|
2015-08-26 21:08:42 +00:00
|
|
|
|
2015-09-02 21:31:44 +00:00
|
|
|
buf.seek(0)
|
|
|
|
mp.upload_part_from_file(buf, num_part, size=bytes_staged)
|
|
|
|
total_bytes_written += bytes_staged
|
2013-10-31 15:32:08 +00:00
|
|
|
num_part += 1
|
2016-07-20 21:53:43 +00:00
|
|
|
except IOError as e:
|
|
|
|
logger.warn('Error when writing to stream in stream_write_internal at path %s: %s', path, e)
|
|
|
|
write_error = e
|
2016-03-22 22:16:48 +00:00
|
|
|
|
2016-11-10 18:54:04 +00:00
|
|
|
if self._context.metric_queue is not None:
|
|
|
|
self._context.metric_queue.multipart_upload_end.Inc(labelvalues=['failure'])
|
2015-11-20 20:32:17 +00:00
|
|
|
|
2015-09-02 21:31:44 +00:00
|
|
|
if cancel_on_error:
|
|
|
|
mp.cancel_upload()
|
2016-07-20 21:53:43 +00:00
|
|
|
return 0, write_error
|
2015-12-10 04:16:33 +00:00
|
|
|
else:
|
|
|
|
break
|
2015-09-02 21:31:44 +00:00
|
|
|
|
|
|
|
if total_bytes_written > 0:
|
2016-11-10 18:54:04 +00:00
|
|
|
if self._context.metric_queue is not None:
|
|
|
|
self._context.metric_queue.multipart_upload_end.Inc(labelvalues=['success'])
|
2016-03-22 22:16:48 +00:00
|
|
|
|
2016-10-04 17:18:33 +00:00
|
|
|
self._perform_action_with_retry(mp.complete_upload)
|
2015-11-20 20:32:17 +00:00
|
|
|
|
2016-07-20 21:53:43 +00:00
|
|
|
return total_bytes_written, write_error
|
2013-10-31 15:32:08 +00:00
|
|
|
|
|
|
|
def exists(self, path):
|
2014-08-12 06:06:44 +00:00
|
|
|
self._initialize_cloud_conn()
|
2013-10-31 15:32:08 +00:00
|
|
|
path = self._init_path(path)
|
2014-08-12 06:06:44 +00:00
|
|
|
key = self._key_class(self._cloud_bucket, path)
|
2013-10-31 15:32:08 +00:00
|
|
|
return key.exists()
|
|
|
|
|
|
|
|
def remove(self, path):
|
2014-08-12 06:06:44 +00:00
|
|
|
self._initialize_cloud_conn()
|
2013-10-31 15:32:08 +00:00
|
|
|
path = self._init_path(path)
|
2014-08-12 06:06:44 +00:00
|
|
|
key = self._key_class(self._cloud_bucket, path)
|
2013-10-31 15:32:08 +00:00
|
|
|
if key.exists():
|
|
|
|
# It's a file
|
|
|
|
key.delete()
|
|
|
|
return
|
|
|
|
# We assume it's a directory
|
|
|
|
if not path.endswith('/'):
|
|
|
|
path += '/'
|
2014-08-12 06:06:44 +00:00
|
|
|
for key in self._cloud_bucket.list(prefix=path):
|
2013-10-31 15:32:08 +00:00
|
|
|
key.delete()
|
2014-08-12 06:06:44 +00:00
|
|
|
|
2014-09-09 19:54:03 +00:00
|
|
|
def get_checksum(self, path):
|
|
|
|
self._initialize_cloud_conn()
|
|
|
|
path = self._init_path(path)
|
|
|
|
key = self._key_class(self._cloud_bucket, path)
|
|
|
|
k = self._cloud_bucket.lookup(key)
|
2014-09-15 15:27:33 +00:00
|
|
|
if k is None:
|
|
|
|
raise IOError('No such key: \'{0}\''.format(path))
|
|
|
|
|
2014-09-09 19:54:03 +00:00
|
|
|
return k.etag[1:-1][:7]
|
|
|
|
|
2015-06-28 10:29:22 +00:00
|
|
|
def copy_to(self, destination, path):
|
2015-09-08 20:55:47 +00:00
|
|
|
self._initialize_cloud_conn()
|
|
|
|
|
2015-06-28 10:29:22 +00:00
|
|
|
# First try to copy directly via boto, but only if the storages are the
|
|
|
|
# same type, with the same access information.
|
|
|
|
if (self.__class__ == destination.__class__ and
|
2016-08-11 21:17:36 +00:00
|
|
|
self._access_key and self._secret_key and
|
2015-06-28 10:29:22 +00:00
|
|
|
self._access_key == destination._access_key and
|
2017-03-20 17:37:28 +00:00
|
|
|
self._secret_key == destination._secret_key and
|
|
|
|
self._connect_kwargs == destination._connect_kwargs):
|
|
|
|
|
|
|
|
# Initialize the cloud connection on the destination as well.
|
|
|
|
destination._initialize_cloud_conn()
|
|
|
|
|
|
|
|
# Check the buckets for both the source and destination locations.
|
|
|
|
if self._cloud_bucket is None:
|
|
|
|
logger.error('Cloud bucket not found for location %s; Configuration is probably invalid!',
|
|
|
|
self._bucket_name)
|
|
|
|
return
|
|
|
|
|
|
|
|
if destination._cloud_bucket is None:
|
|
|
|
logger.error('Cloud bucket not found for location %s; Configuration is probably invalid!',
|
|
|
|
destination._bucket_name)
|
|
|
|
return
|
|
|
|
|
|
|
|
# Perform the copy.
|
2015-06-28 10:29:22 +00:00
|
|
|
logger.debug('Copying file from %s to %s via a direct boto copy', self._cloud_bucket,
|
|
|
|
destination._cloud_bucket)
|
|
|
|
|
|
|
|
source_path = self._init_path(path)
|
|
|
|
source_key = self._key_class(self._cloud_bucket, source_path)
|
|
|
|
|
|
|
|
dest_path = destination._init_path(path)
|
|
|
|
source_key.copy(destination._cloud_bucket, dest_path)
|
|
|
|
return
|
|
|
|
|
|
|
|
# Fallback to a slower, default copy.
|
|
|
|
logger.debug('Copying file from %s to %s via a streamed copy', self._cloud_bucket,
|
|
|
|
destination)
|
|
|
|
with self.stream_read_file(path) as fp:
|
|
|
|
destination.stream_write(path, fp)
|
|
|
|
|
2015-08-26 21:08:42 +00:00
|
|
|
def _rel_upload_path(self, uuid):
|
|
|
|
return 'uploads/{0}'.format(uuid)
|
|
|
|
|
|
|
|
def initiate_chunked_upload(self):
|
|
|
|
self._initialize_cloud_conn()
|
|
|
|
random_uuid = str(uuid4())
|
|
|
|
|
|
|
|
metadata = {
|
2015-09-02 21:31:44 +00:00
|
|
|
_CHUNKS_KEY: [],
|
2015-08-26 21:08:42 +00:00
|
|
|
}
|
|
|
|
|
2015-09-02 21:31:44 +00:00
|
|
|
return random_uuid, metadata
|
2015-08-26 21:08:42 +00:00
|
|
|
|
2015-11-30 20:45:45 +00:00
|
|
|
def stream_upload_chunk(self, uuid, offset, length, in_fp, storage_metadata, content_type=None):
|
2015-08-26 21:08:42 +00:00
|
|
|
self._initialize_cloud_conn()
|
|
|
|
|
2015-09-02 21:31:44 +00:00
|
|
|
# We are going to upload each chunk to a separate key
|
|
|
|
chunk_path = self._rel_upload_path(str(uuid4()))
|
2016-07-20 21:53:43 +00:00
|
|
|
bytes_written, write_error = self._stream_write_internal(chunk_path, in_fp,
|
|
|
|
cancel_on_error=False, size=length,
|
|
|
|
content_type=content_type)
|
2015-08-26 21:08:42 +00:00
|
|
|
|
2015-09-02 21:31:44 +00:00
|
|
|
new_metadata = copy.deepcopy(storage_metadata)
|
2015-08-26 21:08:42 +00:00
|
|
|
|
2015-09-02 21:31:44 +00:00
|
|
|
# We are only going to track keys to which data was confirmed written
|
|
|
|
if bytes_written > 0:
|
|
|
|
new_metadata[_CHUNKS_KEY].append(_PartUploadMetadata(chunk_path, offset, bytes_written))
|
|
|
|
|
2016-07-20 21:53:43 +00:00
|
|
|
return bytes_written, new_metadata, write_error
|
2015-09-02 21:31:44 +00:00
|
|
|
|
|
|
|
def _chunk_generator(self, chunk_list):
|
|
|
|
for chunk in chunk_list:
|
|
|
|
yield filelike.StreamSlice(self.stream_read_file(chunk.path), 0, chunk.length)
|
2015-08-26 21:08:42 +00:00
|
|
|
|
2015-09-02 21:31:44 +00:00
|
|
|
@staticmethod
|
|
|
|
def _chunk_list_from_metadata(storage_metadata):
|
|
|
|
return [_PartUploadMetadata(*chunk_args) for chunk_args in storage_metadata[_CHUNKS_KEY]]
|
2015-08-26 21:08:42 +00:00
|
|
|
|
2015-09-30 21:46:22 +00:00
|
|
|
def _client_side_chunk_join(self, final_path, chunk_list):
|
|
|
|
# If there's only one chunk, just "move" (copy and delete) the key and call it a day.
|
|
|
|
if len(chunk_list) == 1:
|
2016-03-22 21:30:08 +00:00
|
|
|
chunk_path = self._init_path(chunk_list[0].path)
|
|
|
|
abs_final_path = self._init_path(final_path)
|
|
|
|
|
2015-09-30 21:46:22 +00:00
|
|
|
# Let the copy raise an exception if it fails.
|
2016-03-22 21:30:08 +00:00
|
|
|
self._cloud_bucket.copy_key(abs_final_path, self._bucket_name, chunk_path)
|
2015-09-30 21:46:22 +00:00
|
|
|
|
|
|
|
# Attempt to clean up the old chunk.
|
|
|
|
try:
|
|
|
|
self._cloud_bucket.delete_key(chunk_path)
|
|
|
|
except IOError:
|
|
|
|
# We failed to delete a chunk. This sucks, but we shouldn't fail the push.
|
|
|
|
msg = 'Failed to clean up chunk %s for move of %s'
|
2016-03-22 21:30:08 +00:00
|
|
|
logger.exception(msg, chunk_path, abs_final_path)
|
2015-09-30 21:46:22 +00:00
|
|
|
else:
|
|
|
|
# Concatenate and write all the chunks as one key.
|
|
|
|
concatenated = filelike.FilelikeStreamConcat(self._chunk_generator(chunk_list))
|
|
|
|
self.stream_write(final_path, concatenated)
|
|
|
|
|
|
|
|
# Attempt to clean up all the chunks.
|
|
|
|
for chunk in chunk_list:
|
|
|
|
try:
|
2016-03-22 21:30:08 +00:00
|
|
|
self._cloud_bucket.delete_key(self._init_path(chunk.path))
|
2015-09-30 21:46:22 +00:00
|
|
|
except IOError:
|
|
|
|
# We failed to delete a chunk. This sucks, but we shouldn't fail the push.
|
|
|
|
msg = 'Failed to clean up chunk %s for reupload of %s'
|
|
|
|
logger.exception(msg, chunk.path, final_path)
|
|
|
|
|
2016-09-02 15:59:36 +00:00
|
|
|
@staticmethod
|
2016-10-04 17:18:33 +00:00
|
|
|
def _perform_action_with_retry(action, *args, **kwargs):
|
2016-09-02 15:59:36 +00:00
|
|
|
# Note: Sometimes Amazon S3 simply raises an internal error when trying to complete a
|
2016-10-04 17:18:33 +00:00
|
|
|
# an action. The recommendation is to simply try calling the action again.
|
|
|
|
for remaining_retries in range(2, -1, -1):
|
2016-09-02 15:59:36 +00:00
|
|
|
try:
|
2016-10-04 17:18:33 +00:00
|
|
|
action(*args, **kwargs)
|
2016-09-02 15:59:36 +00:00
|
|
|
break
|
|
|
|
except S3ResponseError as s3re:
|
|
|
|
if remaining_retries and s3re.status == 200 and s3re.error_code == 'InternalError':
|
|
|
|
# Weird internal error case. Retry.
|
|
|
|
continue
|
|
|
|
|
|
|
|
# Otherwise, raise it.
|
2016-10-04 17:18:33 +00:00
|
|
|
logger.exception('Exception trying to perform action %s', action)
|
2016-09-02 15:59:36 +00:00
|
|
|
raise s3re
|
|
|
|
|
2016-09-27 10:23:32 +00:00
|
|
|
@staticmethod
|
|
|
|
def _rechunk(chunk, max_chunk_size):
|
|
|
|
""" Rechunks the chunk list to meet maximum chunk size restrictions for the storage engine. """
|
|
|
|
if max_chunk_size is None or chunk.length <= max_chunk_size:
|
|
|
|
yield chunk
|
|
|
|
else:
|
|
|
|
newchunk_length = chunk.length / 2
|
|
|
|
first_subchunk = _PartUploadMetadata(chunk.path, chunk.offset, newchunk_length)
|
|
|
|
second_subchunk = _PartUploadMetadata(chunk.path,
|
|
|
|
chunk.offset + newchunk_length,
|
|
|
|
chunk.length - newchunk_length)
|
|
|
|
for subchunk in chain(_CloudStorage._rechunk(first_subchunk, max_chunk_size),
|
|
|
|
_CloudStorage._rechunk(second_subchunk, max_chunk_size)):
|
|
|
|
yield subchunk
|
|
|
|
|
|
|
|
|
2016-03-22 22:16:48 +00:00
|
|
|
def complete_chunked_upload(self, uuid, final_path, storage_metadata, force_client_side=False):
|
2015-09-02 21:31:44 +00:00
|
|
|
self._initialize_cloud_conn()
|
2016-03-22 22:16:48 +00:00
|
|
|
chunk_list = self._chunk_list_from_metadata(storage_metadata)
|
2015-09-02 21:31:44 +00:00
|
|
|
|
|
|
|
# Here is where things get interesting: we are going to try to assemble this server side
|
|
|
|
# In order to be a candidate all parts (after offsets have been computed) must be at least 5MB
|
2016-03-22 22:16:48 +00:00
|
|
|
server_side_assembly = False
|
|
|
|
if not force_client_side:
|
|
|
|
server_side_assembly = True
|
|
|
|
for chunk_offset, chunk in enumerate(chunk_list):
|
|
|
|
# If the chunk is both too small, and not the last chunk, we rule out server side assembly
|
2016-09-27 10:23:32 +00:00
|
|
|
if chunk.length < self.minimum_chunk_size and (chunk_offset + 1) < len(chunk_list):
|
2016-03-22 22:16:48 +00:00
|
|
|
server_side_assembly = False
|
|
|
|
break
|
2015-09-02 21:31:44 +00:00
|
|
|
|
|
|
|
if server_side_assembly:
|
|
|
|
logger.debug('Performing server side assembly of multi-part upload for: %s', final_path)
|
|
|
|
try:
|
|
|
|
# Awesome, we can do this completely server side, now we have to start a new multipart
|
|
|
|
# upload and use copy_part_from_key to set all of the chunks.
|
|
|
|
mpu = self.__initiate_multipart_upload(final_path, content_type=None, content_encoding=None)
|
2016-09-27 10:23:32 +00:00
|
|
|
updated_chunks = chain.from_iterable([_CloudStorage._rechunk(c, self.maximum_chunk_size)
|
|
|
|
for c in chunk_list])
|
2015-09-02 21:31:44 +00:00
|
|
|
|
2016-09-27 10:23:32 +00:00
|
|
|
for index, chunk in enumerate(updated_chunks):
|
2015-09-02 21:31:44 +00:00
|
|
|
abs_chunk_path = self._init_path(chunk.path)
|
2016-10-04 17:18:33 +00:00
|
|
|
self._perform_action_with_retry(mpu.copy_part_from_key, self.get_cloud_bucket().name,
|
2016-09-27 10:23:32 +00:00
|
|
|
abs_chunk_path, index + 1, start=chunk.offset,
|
|
|
|
end=chunk.length + chunk.offset - 1)
|
2016-08-18 15:56:23 +00:00
|
|
|
|
2016-10-04 17:18:33 +00:00
|
|
|
self._perform_action_with_retry(mpu.complete_upload)
|
2015-09-02 21:31:44 +00:00
|
|
|
except IOError as ioe:
|
|
|
|
# Something bad happened, log it and then give up
|
|
|
|
msg = 'Exception when attempting server-side assembly for: %s'
|
|
|
|
logger.exception(msg, final_path)
|
|
|
|
mpu.cancel_upload()
|
|
|
|
raise ioe
|
|
|
|
|
|
|
|
else:
|
|
|
|
# We are going to turn all of the server side objects into a single file-like stream, and
|
|
|
|
# pass that to stream_write to chunk and upload the final object.
|
2015-09-30 21:46:22 +00:00
|
|
|
self._client_side_chunk_join(final_path, chunk_list)
|
2015-09-02 21:31:44 +00:00
|
|
|
|
2015-08-26 21:08:42 +00:00
|
|
|
|
|
|
|
def cancel_chunked_upload(self, uuid, storage_metadata):
|
2015-09-02 21:31:44 +00:00
|
|
|
self._initialize_cloud_conn()
|
|
|
|
|
|
|
|
# We have to go through and delete all of the uploaded chunks
|
|
|
|
for chunk in self._chunk_list_from_metadata(storage_metadata):
|
|
|
|
self.remove(chunk.path)
|
2015-08-26 21:08:42 +00:00
|
|
|
|
2014-08-12 06:06:44 +00:00
|
|
|
|
|
|
|
class S3Storage(_CloudStorage):
|
2016-11-10 18:54:04 +00:00
|
|
|
def __init__(self, context, storage_path, s3_bucket, s3_access_key=None,
|
2016-12-01 18:22:27 +00:00
|
|
|
s3_secret_key=None, host=None, port=None):
|
2014-08-12 06:06:44 +00:00
|
|
|
upload_params = {
|
|
|
|
'encrypt_key': True,
|
|
|
|
}
|
2014-09-09 19:54:03 +00:00
|
|
|
connect_kwargs = {}
|
2015-11-18 22:19:33 +00:00
|
|
|
if host:
|
2015-12-04 20:40:33 +00:00
|
|
|
if host.startswith('http:') or host.startswith('https:'):
|
|
|
|
raise ValueError('host name must not start with http:// or https://')
|
|
|
|
|
2015-11-18 22:19:33 +00:00
|
|
|
connect_kwargs['host'] = host
|
2016-12-01 18:22:27 +00:00
|
|
|
|
|
|
|
if port:
|
|
|
|
connect_kwargs['port'] = int(port)
|
|
|
|
|
2016-11-10 18:54:04 +00:00
|
|
|
super(S3Storage, self).__init__(context, boto.s3.connection.S3Connection, boto.s3.key.Key,
|
2016-08-11 21:17:36 +00:00
|
|
|
connect_kwargs, upload_params, storage_path, s3_bucket,
|
|
|
|
access_key=s3_access_key or None,
|
|
|
|
secret_key=s3_secret_key or None)
|
2014-08-12 06:06:44 +00:00
|
|
|
|
2016-09-27 10:23:32 +00:00
|
|
|
self.maximum_chunk_size = 5 * 1024 * 1024 * 1024 # 5GB.
|
|
|
|
|
2015-01-16 21:10:40 +00:00
|
|
|
def setup(self):
|
|
|
|
self.get_cloud_bucket().set_cors_xml("""<?xml version="1.0" encoding="UTF-8"?>
|
|
|
|
<CORSConfiguration xmlns="http://s3.amazonaws.com/doc/2006-03-01/">
|
|
|
|
<CORSRule>
|
|
|
|
<AllowedOrigin>*</AllowedOrigin>
|
|
|
|
<AllowedMethod>GET</AllowedMethod>
|
|
|
|
<MaxAgeSeconds>3000</MaxAgeSeconds>
|
|
|
|
<AllowedHeader>Authorization</AllowedHeader>
|
|
|
|
</CORSRule>
|
|
|
|
<CORSRule>
|
|
|
|
<AllowedOrigin>*</AllowedOrigin>
|
|
|
|
<AllowedMethod>PUT</AllowedMethod>
|
|
|
|
<MaxAgeSeconds>3000</MaxAgeSeconds>
|
|
|
|
<AllowedHeader>Content-Type</AllowedHeader>
|
|
|
|
<AllowedHeader>x-amz-acl</AllowedHeader>
|
|
|
|
<AllowedHeader>origin</AllowedHeader>
|
|
|
|
</CORSRule>
|
|
|
|
</CORSConfiguration>""")
|
|
|
|
|
2014-08-12 06:06:44 +00:00
|
|
|
class GoogleCloudStorage(_CloudStorage):
|
2016-11-10 18:54:04 +00:00
|
|
|
def __init__(self, context, storage_path, access_key, secret_key, bucket_name):
|
2014-09-09 19:54:03 +00:00
|
|
|
upload_params = {}
|
|
|
|
connect_kwargs = {}
|
2016-11-10 18:54:04 +00:00
|
|
|
super(GoogleCloudStorage, self).__init__(context, boto.gs.connection.GSConnection,
|
2016-01-15 16:15:40 +00:00
|
|
|
boto.gs.key.Key, connect_kwargs, upload_params,
|
2016-08-11 21:17:36 +00:00
|
|
|
storage_path, bucket_name, access_key, secret_key)
|
2014-08-12 06:06:44 +00:00
|
|
|
|
2015-01-16 21:10:40 +00:00
|
|
|
def setup(self):
|
|
|
|
self.get_cloud_bucket().set_cors_xml("""<?xml version="1.0" encoding="UTF-8"?>
|
|
|
|
<CorsConfig>
|
|
|
|
<Cors>
|
|
|
|
<Origins>
|
|
|
|
<Origin>*</Origin>
|
|
|
|
</Origins>
|
|
|
|
<Methods>
|
|
|
|
<Method>GET</Method>
|
|
|
|
<Method>PUT</Method>
|
|
|
|
</Methods>
|
|
|
|
<ResponseHeaders>
|
|
|
|
<ResponseHeader>Content-Type</ResponseHeader>
|
|
|
|
</ResponseHeaders>
|
|
|
|
<MaxAgeSec>3000</MaxAgeSec>
|
|
|
|
</Cors>
|
|
|
|
</CorsConfig>""")
|
|
|
|
|
2015-09-02 21:31:44 +00:00
|
|
|
def _stream_write_internal(self, path, fp, content_type=None, content_encoding=None,
|
|
|
|
cancel_on_error=True, size=filelike.READ_UNTIL_END):
|
2014-08-12 06:06:44 +00:00
|
|
|
# Minimum size of upload part size on S3 is 5MB
|
|
|
|
self._initialize_cloud_conn()
|
|
|
|
path = self._init_path(path)
|
|
|
|
key = self._key_class(self._cloud_bucket, path)
|
2014-09-09 20:52:53 +00:00
|
|
|
|
|
|
|
if content_type is not None:
|
|
|
|
key.set_metadata('Content-Type', content_type)
|
|
|
|
|
2014-09-11 19:33:10 +00:00
|
|
|
if content_encoding is not None:
|
|
|
|
key.set_metadata('Content-Encoding', content_encoding)
|
|
|
|
|
2015-09-02 21:31:44 +00:00
|
|
|
if size != filelike.READ_UNTIL_END:
|
|
|
|
fp = filelike.StreamSlice(fp, 0, size)
|
|
|
|
|
|
|
|
# TODO figure out how to handle cancel_on_error=False
|
2015-12-10 04:16:33 +00:00
|
|
|
try:
|
|
|
|
key.set_contents_from_stream(fp)
|
|
|
|
except IOError as ex:
|
|
|
|
return 0, ex
|
2014-09-09 19:54:03 +00:00
|
|
|
|
2015-12-10 04:16:33 +00:00
|
|
|
return key.size, None
|
2015-09-28 19:27:56 +00:00
|
|
|
|
2015-10-02 18:57:39 +00:00
|
|
|
def complete_chunked_upload(self, uuid, final_path, storage_metadata):
|
|
|
|
self._initialize_cloud_conn()
|
|
|
|
|
|
|
|
# Boto does not support GCS's multipart upload API because it differs from S3, so
|
|
|
|
# we are forced to join it all locally and then reupload.
|
|
|
|
# See https://github.com/boto/boto/issues/3355
|
|
|
|
chunk_list = self._chunk_list_from_metadata(storage_metadata)
|
|
|
|
self._client_side_chunk_join(final_path, chunk_list)
|
|
|
|
|
2014-09-09 19:54:03 +00:00
|
|
|
|
|
|
|
class RadosGWStorage(_CloudStorage):
|
2016-11-10 18:54:04 +00:00
|
|
|
def __init__(self, context, hostname, is_secure, storage_path, access_key, secret_key,
|
2016-12-01 18:22:27 +00:00
|
|
|
bucket_name, port=None):
|
2014-09-09 19:54:03 +00:00
|
|
|
upload_params = {}
|
|
|
|
connect_kwargs = {
|
|
|
|
'host': hostname,
|
|
|
|
'is_secure': is_secure,
|
|
|
|
'calling_format': boto.s3.connection.OrdinaryCallingFormat(),
|
|
|
|
}
|
2016-10-31 16:43:45 +00:00
|
|
|
|
2016-12-01 18:22:27 +00:00
|
|
|
if port:
|
|
|
|
connect_kwargs['port'] = int(port)
|
|
|
|
|
2016-11-10 18:54:04 +00:00
|
|
|
super(RadosGWStorage, self).__init__(context, boto.s3.connection.S3Connection,
|
2016-01-15 16:15:40 +00:00
|
|
|
boto.s3.key.Key, connect_kwargs, upload_params,
|
2016-08-11 21:17:36 +00:00
|
|
|
storage_path, bucket_name, access_key, secret_key)
|
2014-09-09 19:54:03 +00:00
|
|
|
|
|
|
|
# TODO remove when radosgw supports cors: http://tracker.ceph.com/issues/8718#change-38624
|
2017-10-17 18:12:14 +00:00
|
|
|
def get_direct_download_url(self, path, request_ip=None, expires_in=60, requires_cors=False,
|
|
|
|
head=False):
|
2014-09-09 19:54:03 +00:00
|
|
|
if requires_cors:
|
|
|
|
return None
|
|
|
|
|
2017-10-17 18:12:14 +00:00
|
|
|
return super(RadosGWStorage, self).get_direct_download_url(path, request_ip, expires_in,
|
|
|
|
requires_cors, head)
|
2014-09-09 19:54:03 +00:00
|
|
|
|
|
|
|
# TODO remove when radosgw supports cors: http://tracker.ceph.com/issues/8718#change-38624
|
|
|
|
def get_direct_upload_url(self, path, mime_type, requires_cors=True):
|
|
|
|
if requires_cors:
|
|
|
|
return None
|
|
|
|
|
|
|
|
return super(RadosGWStorage, self).get_direct_upload_url(path, mime_type, requires_cors)
|
2015-09-30 19:11:28 +00:00
|
|
|
|
|
|
|
def complete_chunked_upload(self, uuid, final_path, storage_metadata):
|
|
|
|
self._initialize_cloud_conn()
|
|
|
|
|
2015-09-30 21:46:22 +00:00
|
|
|
# RadosGW does not support multipart copying from keys, so we are forced to join
|
2015-09-30 19:11:28 +00:00
|
|
|
# it all locally and then reupload.
|
|
|
|
# See https://github.com/ceph/ceph/pull/5139
|
|
|
|
chunk_list = self._chunk_list_from_metadata(storage_metadata)
|
2015-09-30 21:46:22 +00:00
|
|
|
self._client_side_chunk_join(final_path, chunk_list)
|
2017-09-26 20:08:50 +00:00
|
|
|
|
|
|
|
|
|
|
|
class CloudFrontedS3Storage(S3Storage):
|
|
|
|
""" An S3Storage engine that redirects to CloudFront for all requests outside of AWS. """
|
|
|
|
def __init__(self, context, cloudfront_distribution_domain, cloudfront_key_id,
|
|
|
|
cloudfront_privatekey_filename, storage_path, s3_bucket, *args, **kwargs):
|
|
|
|
super(CloudFrontedS3Storage, self).__init__(context, storage_path, s3_bucket, *args, **kwargs)
|
|
|
|
|
|
|
|
self.cloudfront_distribution_domain = cloudfront_distribution_domain
|
|
|
|
self.cloudfront_key_id = cloudfront_key_id
|
|
|
|
self.cloudfront_privatekey = self._load_private_key(cloudfront_privatekey_filename)
|
|
|
|
|
2017-10-06 17:54:49 +00:00
|
|
|
def get_direct_download_url(self, path, request_ip=None, expires_in=60, requires_cors=False,
|
|
|
|
head=False):
|
2017-10-07 04:11:44 +00:00
|
|
|
# If CloudFront could not be loaded, fall back to normal S3.
|
|
|
|
if self.cloudfront_privatekey is None:
|
|
|
|
return super(CloudFrontedS3Storage, self).get_direct_download_url(path, request_ip,
|
|
|
|
expires_in, requires_cors,
|
|
|
|
head)
|
|
|
|
|
2017-10-06 17:54:49 +00:00
|
|
|
resolved_ip_info = None
|
2017-09-26 20:08:50 +00:00
|
|
|
logger.debug('Got direct download request for path "%s" with IP "%s"', path, request_ip)
|
2017-10-17 18:29:40 +00:00
|
|
|
if request_ip is not None:
|
2017-10-17 18:12:14 +00:00
|
|
|
# Lookup the IP address in our resolution table and determine whether it is under AWS.
|
|
|
|
# If it is, then return an S3 signed URL, since we are in-network.
|
2017-09-26 20:08:50 +00:00
|
|
|
resolved_ip_info = self._context.ip_resolver.resolve_ip(request_ip)
|
|
|
|
logger.debug('Resolved IP information for IP %s: %s', request_ip, resolved_ip_info)
|
2017-09-28 21:29:00 +00:00
|
|
|
if resolved_ip_info and resolved_ip_info.provider == 'aws':
|
2017-10-06 17:54:49 +00:00
|
|
|
return super(CloudFrontedS3Storage, self).get_direct_download_url(path, request_ip,
|
|
|
|
expires_in, requires_cors,
|
2017-09-28 21:29:00 +00:00
|
|
|
head)
|
2017-10-06 17:54:49 +00:00
|
|
|
|
2017-09-28 21:29:00 +00:00
|
|
|
url = 'https://%s/%s' % (self.cloudfront_distribution_domain, path)
|
|
|
|
expire_date = datetime.now() + timedelta(seconds=expires_in)
|
|
|
|
signer = self._get_cloudfront_signer()
|
|
|
|
signed_url = signer.generate_presigned_url(url, date_less_than=expire_date)
|
2017-10-06 17:54:49 +00:00
|
|
|
logger.debug('Returning CloudFront URL for path "%s" with IP "%s": %s', path, resolved_ip_info,
|
|
|
|
signed_url)
|
2017-09-28 21:29:00 +00:00
|
|
|
return signed_url
|
2017-09-26 20:08:50 +00:00
|
|
|
|
|
|
|
@lru_cache(maxsize=1)
|
|
|
|
def _get_cloudfront_signer(self):
|
|
|
|
return CloudFrontSigner(self.cloudfront_key_id, self._get_rsa_signer())
|
|
|
|
|
|
|
|
@lru_cache(maxsize=1)
|
|
|
|
def _get_rsa_signer(self):
|
|
|
|
private_key = self.cloudfront_privatekey
|
|
|
|
def handler(message):
|
|
|
|
signer = private_key.signer(padding.PKCS1v15(), hashes.SHA1())
|
|
|
|
signer.update(message)
|
|
|
|
return signer.finalize()
|
|
|
|
|
|
|
|
return handler
|
|
|
|
|
|
|
|
@lru_cache(maxsize=1)
|
|
|
|
def _load_private_key(self, cloudfront_privatekey_filename):
|
|
|
|
""" Returns the private key, loaded from the config provider, used to sign direct
|
|
|
|
download URLs to CloudFront.
|
|
|
|
"""
|
2017-10-07 04:11:44 +00:00
|
|
|
if self._context.config_provider is None:
|
|
|
|
return None
|
|
|
|
|
2017-09-26 20:08:50 +00:00
|
|
|
with self._context.config_provider.get_volume_file(cloudfront_privatekey_filename) as key_file:
|
|
|
|
return serialization.load_pem_private_key(
|
|
|
|
key_file.read(),
|
|
|
|
password=None,
|
|
|
|
backend=default_backend()
|
|
|
|
)
|