2013-09-25 21:50:03 +00:00
|
|
|
import cStringIO as StringIO
|
|
|
|
import os
|
2013-10-20 06:39:23 +00:00
|
|
|
import logging
|
2013-09-25 21:50:03 +00:00
|
|
|
|
|
|
|
import boto.s3.connection
|
2014-08-12 06:06:44 +00:00
|
|
|
import boto.gs.connection
|
2013-09-25 21:50:03 +00:00
|
|
|
import boto.s3.key
|
2014-08-12 06:06:44 +00:00
|
|
|
import boto.gs.key
|
2013-09-25 21:50:03 +00:00
|
|
|
|
2014-09-10 02:28:25 +00:00
|
|
|
from io import BufferedIOBase
|
2014-09-09 22:30:14 +00:00
|
|
|
|
2015-08-31 17:48:52 +00:00
|
|
|
import app
|
2014-04-03 21:31:46 +00:00
|
|
|
from storage.basestorage import BaseStorage
|
2013-09-25 21:50:03 +00:00
|
|
|
|
|
|
|
|
2013-10-20 06:39:23 +00:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
2014-09-09 22:30:14 +00:00
|
|
|
class StreamReadKeyAsFile(BufferedIOBase):
|
2013-10-31 15:32:08 +00:00
|
|
|
def __init__(self, key):
|
|
|
|
self._key = key
|
2013-10-20 06:39:23 +00:00
|
|
|
|
2013-10-31 15:32:08 +00:00
|
|
|
def read(self, amt=None):
|
2014-09-10 02:28:25 +00:00
|
|
|
if self.closed:
|
2013-10-31 15:32:08 +00:00
|
|
|
return None
|
2013-10-20 06:39:23 +00:00
|
|
|
|
2013-10-31 15:32:08 +00:00
|
|
|
resp = self._key.read(amt)
|
|
|
|
return resp
|
2013-10-20 06:39:23 +00:00
|
|
|
|
2014-09-09 22:30:14 +00:00
|
|
|
def readable(self):
|
|
|
|
return True
|
|
|
|
|
|
|
|
@property
|
|
|
|
def closed(self):
|
|
|
|
return self._key.closed
|
|
|
|
|
|
|
|
def close(self):
|
|
|
|
self._key.close(fast=True)
|
|
|
|
|
2013-10-20 06:39:23 +00:00
|
|
|
|
2014-08-12 06:06:44 +00:00
|
|
|
class _CloudStorage(BaseStorage):
|
2014-09-09 19:54:03 +00:00
|
|
|
def __init__(self, connection_class, key_class, connect_kwargs, upload_params, storage_path,
|
|
|
|
access_key, secret_key, bucket_name):
|
2013-12-04 00:39:07 +00:00
|
|
|
self._initialized = False
|
2014-08-12 06:06:44 +00:00
|
|
|
self._bucket_name = bucket_name
|
|
|
|
self._access_key = access_key
|
|
|
|
self._secret_key = secret_key
|
2013-10-31 15:32:08 +00:00
|
|
|
self._root_path = storage_path
|
2014-08-12 06:06:44 +00:00
|
|
|
self._connection_class = connection_class
|
|
|
|
self._key_class = key_class
|
|
|
|
self._upload_params = upload_params
|
2014-09-09 19:54:03 +00:00
|
|
|
self._connect_kwargs = connect_kwargs
|
2014-08-12 06:06:44 +00:00
|
|
|
self._cloud_conn = None
|
|
|
|
self._cloud_bucket = None
|
2013-12-04 00:39:07 +00:00
|
|
|
|
2014-08-12 06:06:44 +00:00
|
|
|
def _initialize_cloud_conn(self):
|
2013-12-04 00:39:07 +00:00
|
|
|
if not self._initialized:
|
2014-09-09 19:54:03 +00:00
|
|
|
self._cloud_conn = self._connection_class(self._access_key, self._secret_key,
|
|
|
|
**self._connect_kwargs)
|
2014-08-12 06:06:44 +00:00
|
|
|
self._cloud_bucket = self._cloud_conn.get_bucket(self._bucket_name)
|
2013-12-04 00:39:07 +00:00
|
|
|
self._initialized = True
|
2013-10-31 15:32:08 +00:00
|
|
|
|
|
|
|
def _debug_key(self, key):
|
|
|
|
"""Used for debugging only."""
|
|
|
|
orig_meth = key.bucket.connection.make_request
|
|
|
|
|
|
|
|
def new_meth(*args, **kwargs):
|
|
|
|
print '#' * 16
|
|
|
|
print args
|
|
|
|
print kwargs
|
|
|
|
print '#' * 16
|
|
|
|
return orig_meth(*args, **kwargs)
|
|
|
|
key.bucket.connection.make_request = new_meth
|
|
|
|
|
|
|
|
def _init_path(self, path=None):
|
|
|
|
path = os.path.join(self._root_path, path) if path else self._root_path
|
|
|
|
if path and path[0] == '/':
|
|
|
|
return path[1:]
|
|
|
|
return path
|
|
|
|
|
2015-01-16 21:10:40 +00:00
|
|
|
def get_cloud_conn(self):
|
|
|
|
self._initialize_cloud_conn()
|
|
|
|
return self._cloud_conn
|
|
|
|
|
|
|
|
def get_cloud_bucket(self):
|
|
|
|
return self._cloud_bucket
|
|
|
|
|
2013-10-31 15:32:08 +00:00
|
|
|
def get_content(self, path):
|
2014-08-12 06:06:44 +00:00
|
|
|
self._initialize_cloud_conn()
|
2013-10-31 15:32:08 +00:00
|
|
|
path = self._init_path(path)
|
2014-08-12 06:06:44 +00:00
|
|
|
key = self._key_class(self._cloud_bucket, path)
|
2013-10-31 15:32:08 +00:00
|
|
|
if not key.exists():
|
|
|
|
raise IOError('No such key: \'{0}\''.format(path))
|
|
|
|
return key.get_contents_as_string()
|
|
|
|
|
|
|
|
def put_content(self, path, content):
|
2014-08-12 06:06:44 +00:00
|
|
|
self._initialize_cloud_conn()
|
2013-10-31 15:32:08 +00:00
|
|
|
path = self._init_path(path)
|
2014-08-12 06:06:44 +00:00
|
|
|
key = self._key_class(self._cloud_bucket, path)
|
|
|
|
key.set_contents_from_string(content, **self._upload_params)
|
2013-10-31 15:32:08 +00:00
|
|
|
return path
|
|
|
|
|
2014-09-09 19:54:03 +00:00
|
|
|
def get_supports_resumable_downloads(self):
|
2014-07-02 04:39:59 +00:00
|
|
|
return True
|
|
|
|
|
2014-09-09 19:54:03 +00:00
|
|
|
def get_direct_download_url(self, path, expires_in=60, requires_cors=False):
|
2014-08-12 06:06:44 +00:00
|
|
|
self._initialize_cloud_conn()
|
2013-12-04 00:39:07 +00:00
|
|
|
path = self._init_path(path)
|
2014-08-12 06:06:44 +00:00
|
|
|
k = self._key_class(self._cloud_bucket, path)
|
2013-12-04 00:39:07 +00:00
|
|
|
return k.generate_url(expires_in)
|
|
|
|
|
2014-09-09 19:54:03 +00:00
|
|
|
def get_direct_upload_url(self, path, mime_type, requires_cors=True):
|
|
|
|
self._initialize_cloud_conn()
|
|
|
|
path = self._init_path(path)
|
|
|
|
key = self._key_class(self._cloud_bucket, path)
|
|
|
|
url = key.generate_url(300, 'PUT', headers={'Content-Type': mime_type}, encrypt_key=True)
|
|
|
|
return url
|
|
|
|
|
2013-10-31 15:32:08 +00:00
|
|
|
def stream_read(self, path):
|
2014-08-12 06:06:44 +00:00
|
|
|
self._initialize_cloud_conn()
|
2013-10-31 15:32:08 +00:00
|
|
|
path = self._init_path(path)
|
2014-08-12 06:06:44 +00:00
|
|
|
key = self._key_class(self._cloud_bucket, path)
|
2013-10-31 15:32:08 +00:00
|
|
|
if not key.exists():
|
|
|
|
raise IOError('No such key: \'{0}\''.format(path))
|
|
|
|
while True:
|
|
|
|
buf = key.read(self.buffer_size)
|
|
|
|
if not buf:
|
|
|
|
break
|
|
|
|
yield buf
|
|
|
|
|
|
|
|
def stream_read_file(self, path):
|
2014-08-12 06:06:44 +00:00
|
|
|
self._initialize_cloud_conn()
|
2013-10-31 15:32:08 +00:00
|
|
|
path = self._init_path(path)
|
2014-08-12 06:06:44 +00:00
|
|
|
key = self._key_class(self._cloud_bucket, path)
|
2013-10-31 15:32:08 +00:00
|
|
|
if not key.exists():
|
|
|
|
raise IOError('No such key: \'{0}\''.format(path))
|
|
|
|
return StreamReadKeyAsFile(key)
|
|
|
|
|
2014-09-11 19:33:10 +00:00
|
|
|
def stream_write(self, path, fp, content_type=None, content_encoding=None):
|
2013-10-31 15:32:08 +00:00
|
|
|
# Minimum size of upload part size on S3 is 5MB
|
2014-08-12 06:06:44 +00:00
|
|
|
self._initialize_cloud_conn()
|
2013-10-31 15:32:08 +00:00
|
|
|
buffer_size = 5 * 1024 * 1024
|
|
|
|
if self.buffer_size > buffer_size:
|
|
|
|
buffer_size = self.buffer_size
|
|
|
|
path = self._init_path(path)
|
2014-09-09 20:52:53 +00:00
|
|
|
|
|
|
|
metadata = {}
|
|
|
|
if content_type is not None:
|
|
|
|
metadata['Content-Type'] = content_type
|
|
|
|
|
2014-09-11 19:33:10 +00:00
|
|
|
if content_encoding is not None:
|
|
|
|
metadata['Content-Encoding'] = content_encoding
|
|
|
|
|
2014-09-09 20:52:53 +00:00
|
|
|
mp = self._cloud_bucket.initiate_multipart_upload(path, metadata=metadata,
|
|
|
|
**self._upload_params)
|
2015-08-31 17:48:52 +00:00
|
|
|
app.metric_queue.put('MultipartUploadStart', 1)
|
2013-10-31 15:32:08 +00:00
|
|
|
num_part = 1
|
|
|
|
while True:
|
|
|
|
try:
|
|
|
|
buf = fp.read(buffer_size)
|
|
|
|
if not buf:
|
|
|
|
break
|
|
|
|
io = StringIO.StringIO(buf)
|
|
|
|
mp.upload_part_from_file(io, num_part)
|
|
|
|
num_part += 1
|
|
|
|
io.close()
|
|
|
|
except IOError:
|
2015-08-31 17:48:52 +00:00
|
|
|
app.metric_queue.put('MultipartUploadFailure', 1)
|
2015-08-31 07:07:44 +00:00
|
|
|
mp.cancel_upload()
|
2015-09-01 19:53:32 +00:00
|
|
|
raise
|
|
|
|
|
2015-08-31 17:48:52 +00:00
|
|
|
app.metric_queue.put('MultipartUploadSuccess', 1)
|
2013-10-31 15:32:08 +00:00
|
|
|
mp.complete_upload()
|
|
|
|
|
|
|
|
def list_directory(self, path=None):
|
2014-08-12 06:06:44 +00:00
|
|
|
self._initialize_cloud_conn()
|
2013-10-31 15:32:08 +00:00
|
|
|
path = self._init_path(path)
|
|
|
|
if not path.endswith('/'):
|
|
|
|
path += '/'
|
|
|
|
ln = 0
|
|
|
|
if self._root_path != '/':
|
|
|
|
ln = len(self._root_path)
|
|
|
|
exists = False
|
2014-08-12 06:06:44 +00:00
|
|
|
for key in self._cloud_bucket.list(prefix=path, delimiter='/'):
|
2013-10-31 15:32:08 +00:00
|
|
|
exists = True
|
|
|
|
name = key.name
|
|
|
|
if name.endswith('/'):
|
|
|
|
yield name[ln:-1]
|
|
|
|
else:
|
|
|
|
yield name[ln:]
|
|
|
|
if exists is False:
|
|
|
|
# In order to be compliant with the LocalStorage API. Even though
|
|
|
|
# S3 does not have a concept of folders.
|
|
|
|
raise OSError('No such directory: \'{0}\''.format(path))
|
|
|
|
|
|
|
|
def exists(self, path):
|
2014-08-12 06:06:44 +00:00
|
|
|
self._initialize_cloud_conn()
|
2013-10-31 15:32:08 +00:00
|
|
|
path = self._init_path(path)
|
2014-08-12 06:06:44 +00:00
|
|
|
key = self._key_class(self._cloud_bucket, path)
|
2013-10-31 15:32:08 +00:00
|
|
|
return key.exists()
|
|
|
|
|
|
|
|
def remove(self, path):
|
2014-08-12 06:06:44 +00:00
|
|
|
self._initialize_cloud_conn()
|
2013-10-31 15:32:08 +00:00
|
|
|
path = self._init_path(path)
|
2014-08-12 06:06:44 +00:00
|
|
|
key = self._key_class(self._cloud_bucket, path)
|
2013-10-31 15:32:08 +00:00
|
|
|
if key.exists():
|
|
|
|
# It's a file
|
|
|
|
key.delete()
|
|
|
|
return
|
|
|
|
# We assume it's a directory
|
|
|
|
if not path.endswith('/'):
|
|
|
|
path += '/'
|
2014-08-12 06:06:44 +00:00
|
|
|
for key in self._cloud_bucket.list(prefix=path):
|
2013-10-31 15:32:08 +00:00
|
|
|
key.delete()
|
2014-08-12 06:06:44 +00:00
|
|
|
|
2014-09-09 19:54:03 +00:00
|
|
|
def get_checksum(self, path):
|
|
|
|
self._initialize_cloud_conn()
|
|
|
|
path = self._init_path(path)
|
|
|
|
key = self._key_class(self._cloud_bucket, path)
|
|
|
|
k = self._cloud_bucket.lookup(key)
|
2014-09-15 15:27:33 +00:00
|
|
|
if k is None:
|
|
|
|
raise IOError('No such key: \'{0}\''.format(path))
|
|
|
|
|
2014-09-09 19:54:03 +00:00
|
|
|
return k.etag[1:-1][:7]
|
|
|
|
|
2015-06-28 10:29:22 +00:00
|
|
|
def copy_to(self, destination, path):
|
2015-09-08 20:55:47 +00:00
|
|
|
self._initialize_cloud_conn()
|
|
|
|
|
2015-06-28 10:29:22 +00:00
|
|
|
# First try to copy directly via boto, but only if the storages are the
|
|
|
|
# same type, with the same access information.
|
|
|
|
if (self.__class__ == destination.__class__ and
|
|
|
|
self._access_key == destination._access_key and
|
|
|
|
self._secret_key == destination._secret_key):
|
|
|
|
logger.debug('Copying file from %s to %s via a direct boto copy', self._cloud_bucket,
|
|
|
|
destination._cloud_bucket)
|
|
|
|
|
|
|
|
source_path = self._init_path(path)
|
|
|
|
source_key = self._key_class(self._cloud_bucket, source_path)
|
|
|
|
|
2015-09-08 20:55:47 +00:00
|
|
|
destination._initialize_cloud_conn()
|
2015-06-28 10:29:22 +00:00
|
|
|
dest_path = destination._init_path(path)
|
|
|
|
source_key.copy(destination._cloud_bucket, dest_path)
|
|
|
|
return
|
|
|
|
|
|
|
|
# Fallback to a slower, default copy.
|
|
|
|
logger.debug('Copying file from %s to %s via a streamed copy', self._cloud_bucket,
|
|
|
|
destination)
|
|
|
|
with self.stream_read_file(path) as fp:
|
|
|
|
destination.stream_write(path, fp)
|
|
|
|
|
2014-08-12 06:06:44 +00:00
|
|
|
|
|
|
|
class S3Storage(_CloudStorage):
|
|
|
|
def __init__(self, storage_path, s3_access_key, s3_secret_key, s3_bucket):
|
|
|
|
upload_params = {
|
|
|
|
'encrypt_key': True,
|
|
|
|
}
|
2014-09-09 19:54:03 +00:00
|
|
|
connect_kwargs = {}
|
2014-08-12 06:06:44 +00:00
|
|
|
super(S3Storage, self).__init__(boto.s3.connection.S3Connection, boto.s3.key.Key,
|
2014-09-09 19:54:03 +00:00
|
|
|
connect_kwargs, upload_params, storage_path, s3_access_key,
|
|
|
|
s3_secret_key, s3_bucket)
|
2014-08-12 06:06:44 +00:00
|
|
|
|
2015-01-16 21:10:40 +00:00
|
|
|
def setup(self):
|
|
|
|
self.get_cloud_bucket().set_cors_xml("""<?xml version="1.0" encoding="UTF-8"?>
|
|
|
|
<CORSConfiguration xmlns="http://s3.amazonaws.com/doc/2006-03-01/">
|
|
|
|
<CORSRule>
|
|
|
|
<AllowedOrigin>*</AllowedOrigin>
|
|
|
|
<AllowedMethod>GET</AllowedMethod>
|
|
|
|
<MaxAgeSeconds>3000</MaxAgeSeconds>
|
|
|
|
<AllowedHeader>Authorization</AllowedHeader>
|
|
|
|
</CORSRule>
|
|
|
|
<CORSRule>
|
|
|
|
<AllowedOrigin>*</AllowedOrigin>
|
|
|
|
<AllowedMethod>PUT</AllowedMethod>
|
|
|
|
<MaxAgeSeconds>3000</MaxAgeSeconds>
|
|
|
|
<AllowedHeader>Content-Type</AllowedHeader>
|
|
|
|
<AllowedHeader>x-amz-acl</AllowedHeader>
|
|
|
|
<AllowedHeader>origin</AllowedHeader>
|
|
|
|
</CORSRule>
|
|
|
|
</CORSConfiguration>""")
|
|
|
|
|
2014-08-12 06:06:44 +00:00
|
|
|
class GoogleCloudStorage(_CloudStorage):
|
|
|
|
def __init__(self, storage_path, access_key, secret_key, bucket_name):
|
2014-09-09 19:54:03 +00:00
|
|
|
upload_params = {}
|
|
|
|
connect_kwargs = {}
|
|
|
|
super(GoogleCloudStorage, self).__init__(boto.gs.connection.GSConnection, boto.gs.key.Key,
|
|
|
|
connect_kwargs, upload_params, storage_path,
|
|
|
|
access_key, secret_key, bucket_name)
|
2014-08-12 06:06:44 +00:00
|
|
|
|
2015-01-16 21:10:40 +00:00
|
|
|
def setup(self):
|
|
|
|
self.get_cloud_bucket().set_cors_xml("""<?xml version="1.0" encoding="UTF-8"?>
|
|
|
|
<CorsConfig>
|
|
|
|
<Cors>
|
|
|
|
<Origins>
|
|
|
|
<Origin>*</Origin>
|
|
|
|
</Origins>
|
|
|
|
<Methods>
|
|
|
|
<Method>GET</Method>
|
|
|
|
<Method>PUT</Method>
|
|
|
|
</Methods>
|
|
|
|
<ResponseHeaders>
|
|
|
|
<ResponseHeader>Content-Type</ResponseHeader>
|
|
|
|
</ResponseHeaders>
|
|
|
|
<MaxAgeSec>3000</MaxAgeSec>
|
|
|
|
</Cors>
|
|
|
|
</CorsConfig>""")
|
|
|
|
|
2014-09-11 19:33:10 +00:00
|
|
|
def stream_write(self, path, fp, content_type=None, content_encoding=None):
|
2014-08-12 06:06:44 +00:00
|
|
|
# Minimum size of upload part size on S3 is 5MB
|
|
|
|
self._initialize_cloud_conn()
|
|
|
|
path = self._init_path(path)
|
|
|
|
key = self._key_class(self._cloud_bucket, path)
|
2014-09-09 20:52:53 +00:00
|
|
|
|
|
|
|
if content_type is not None:
|
|
|
|
key.set_metadata('Content-Type', content_type)
|
|
|
|
|
2014-09-11 19:33:10 +00:00
|
|
|
if content_encoding is not None:
|
|
|
|
key.set_metadata('Content-Encoding', content_encoding)
|
|
|
|
|
2014-08-12 06:06:44 +00:00
|
|
|
key.set_contents_from_stream(fp)
|
2014-09-09 19:54:03 +00:00
|
|
|
|
|
|
|
|
|
|
|
class RadosGWStorage(_CloudStorage):
|
|
|
|
def __init__(self, hostname, is_secure, storage_path, access_key, secret_key, bucket_name):
|
|
|
|
upload_params = {}
|
|
|
|
connect_kwargs = {
|
|
|
|
'host': hostname,
|
|
|
|
'is_secure': is_secure,
|
|
|
|
'calling_format': boto.s3.connection.OrdinaryCallingFormat(),
|
|
|
|
}
|
|
|
|
super(RadosGWStorage, self).__init__(boto.s3.connection.S3Connection, boto.s3.key.Key,
|
|
|
|
connect_kwargs, upload_params, storage_path, access_key,
|
|
|
|
secret_key, bucket_name)
|
|
|
|
|
|
|
|
# TODO remove when radosgw supports cors: http://tracker.ceph.com/issues/8718#change-38624
|
|
|
|
def get_direct_download_url(self, path, expires_in=60, requires_cors=False):
|
|
|
|
if requires_cors:
|
|
|
|
return None
|
|
|
|
|
|
|
|
return super(RadosGWStorage, self).get_direct_download_url(path, expires_in, requires_cors)
|
|
|
|
|
|
|
|
# TODO remove when radosgw supports cors: http://tracker.ceph.com/issues/8718#change-38624
|
|
|
|
def get_direct_upload_url(self, path, mime_type, requires_cors=True):
|
|
|
|
if requires_cors:
|
|
|
|
return None
|
|
|
|
|
|
|
|
return super(RadosGWStorage, self).get_direct_upload_url(path, mime_type, requires_cors)
|