This repository has been archived on 2020-03-24. You can view files and clone it, but cannot push or open issues or pull requests.
quay/storage/cloud.py

270 lines
8.4 KiB
Python
Raw Normal View History

2013-09-25 21:50:03 +00:00
import cStringIO as StringIO
import os
import logging
2013-09-25 21:50:03 +00:00
import boto.s3.connection
2014-08-12 06:06:44 +00:00
import boto.gs.connection
2013-09-25 21:50:03 +00:00
import boto.s3.key
2014-08-12 06:06:44 +00:00
import boto.gs.key
2013-09-25 21:50:03 +00:00
from io import BufferedIOBase
from storage.basestorage import BaseStorage
2013-09-25 21:50:03 +00:00
logger = logging.getLogger(__name__)
class StreamReadKeyAsFile(BufferedIOBase):
def __init__(self, key):
self._key = key
def read(self, amt=None):
if self.closed:
return None
resp = self._key.read(amt)
return resp
def readable(self):
return True
@property
def closed(self):
return self._key.closed
def close(self):
self._key.close(fast=True)
2014-08-12 06:06:44 +00:00
class _CloudStorage(BaseStorage):
def __init__(self, connection_class, key_class, connect_kwargs, upload_params, storage_path,
access_key, secret_key, bucket_name):
self._initialized = False
2014-08-12 06:06:44 +00:00
self._bucket_name = bucket_name
self._access_key = access_key
self._secret_key = secret_key
self._root_path = storage_path
2014-08-12 06:06:44 +00:00
self._connection_class = connection_class
self._key_class = key_class
self._upload_params = upload_params
self._connect_kwargs = connect_kwargs
2014-08-12 06:06:44 +00:00
self._cloud_conn = None
self._cloud_bucket = None
2014-08-12 06:06:44 +00:00
def _initialize_cloud_conn(self):
if not self._initialized:
self._cloud_conn = self._connection_class(self._access_key, self._secret_key,
**self._connect_kwargs)
2014-08-12 06:06:44 +00:00
self._cloud_bucket = self._cloud_conn.get_bucket(self._bucket_name)
self._initialized = True
def _debug_key(self, key):
"""Used for debugging only."""
orig_meth = key.bucket.connection.make_request
def new_meth(*args, **kwargs):
print '#' * 16
print args
print kwargs
print '#' * 16
return orig_meth(*args, **kwargs)
key.bucket.connection.make_request = new_meth
def _init_path(self, path=None):
path = os.path.join(self._root_path, path) if path else self._root_path
if path and path[0] == '/':
return path[1:]
return path
def get_content(self, path):
2014-08-12 06:06:44 +00:00
self._initialize_cloud_conn()
path = self._init_path(path)
2014-08-12 06:06:44 +00:00
key = self._key_class(self._cloud_bucket, path)
if not key.exists():
raise IOError('No such key: \'{0}\''.format(path))
return key.get_contents_as_string()
def put_content(self, path, content):
2014-08-12 06:06:44 +00:00
self._initialize_cloud_conn()
path = self._init_path(path)
2014-08-12 06:06:44 +00:00
key = self._key_class(self._cloud_bucket, path)
key.set_contents_from_string(content, **self._upload_params)
return path
def get_supports_resumable_downloads(self):
return True
def get_direct_download_url(self, path, expires_in=60, requires_cors=False):
2014-08-12 06:06:44 +00:00
self._initialize_cloud_conn()
path = self._init_path(path)
2014-08-12 06:06:44 +00:00
k = self._key_class(self._cloud_bucket, path)
return k.generate_url(expires_in)
def get_direct_upload_url(self, path, mime_type, requires_cors=True):
self._initialize_cloud_conn()
path = self._init_path(path)
key = self._key_class(self._cloud_bucket, path)
url = key.generate_url(300, 'PUT', headers={'Content-Type': mime_type}, encrypt_key=True)
return url
def stream_read(self, path):
2014-08-12 06:06:44 +00:00
self._initialize_cloud_conn()
path = self._init_path(path)
2014-08-12 06:06:44 +00:00
key = self._key_class(self._cloud_bucket, path)
if not key.exists():
raise IOError('No such key: \'{0}\''.format(path))
while True:
buf = key.read(self.buffer_size)
if not buf:
break
yield buf
def stream_read_file(self, path):
2014-08-12 06:06:44 +00:00
self._initialize_cloud_conn()
path = self._init_path(path)
2014-08-12 06:06:44 +00:00
key = self._key_class(self._cloud_bucket, path)
if not key.exists():
raise IOError('No such key: \'{0}\''.format(path))
return StreamReadKeyAsFile(key)
def stream_write(self, path, fp, content_type=None, content_encoding=None):
# Minimum size of upload part size on S3 is 5MB
2014-08-12 06:06:44 +00:00
self._initialize_cloud_conn()
buffer_size = 5 * 1024 * 1024
if self.buffer_size > buffer_size:
buffer_size = self.buffer_size
path = self._init_path(path)
metadata = {}
if content_type is not None:
metadata['Content-Type'] = content_type
if content_encoding is not None:
metadata['Content-Encoding'] = content_encoding
mp = self._cloud_bucket.initiate_multipart_upload(path, metadata=metadata,
**self._upload_params)
num_part = 1
while True:
try:
buf = fp.read(buffer_size)
if not buf:
break
io = StringIO.StringIO(buf)
mp.upload_part_from_file(io, num_part)
num_part += 1
io.close()
except IOError:
break
mp.complete_upload()
def list_directory(self, path=None):
2014-08-12 06:06:44 +00:00
self._initialize_cloud_conn()
path = self._init_path(path)
if not path.endswith('/'):
path += '/'
ln = 0
if self._root_path != '/':
ln = len(self._root_path)
exists = False
2014-08-12 06:06:44 +00:00
for key in self._cloud_bucket.list(prefix=path, delimiter='/'):
exists = True
name = key.name
if name.endswith('/'):
yield name[ln:-1]
else:
yield name[ln:]
if exists is False:
# In order to be compliant with the LocalStorage API. Even though
# S3 does not have a concept of folders.
raise OSError('No such directory: \'{0}\''.format(path))
def exists(self, path):
2014-08-12 06:06:44 +00:00
self._initialize_cloud_conn()
path = self._init_path(path)
2014-08-12 06:06:44 +00:00
key = self._key_class(self._cloud_bucket, path)
return key.exists()
def remove(self, path):
2014-08-12 06:06:44 +00:00
self._initialize_cloud_conn()
path = self._init_path(path)
2014-08-12 06:06:44 +00:00
key = self._key_class(self._cloud_bucket, path)
if key.exists():
# It's a file
key.delete()
return
# We assume it's a directory
if not path.endswith('/'):
path += '/'
2014-08-12 06:06:44 +00:00
for key in self._cloud_bucket.list(prefix=path):
key.delete()
2014-08-12 06:06:44 +00:00
def get_checksum(self, path):
self._initialize_cloud_conn()
path = self._init_path(path)
key = self._key_class(self._cloud_bucket, path)
k = self._cloud_bucket.lookup(key)
return k.etag[1:-1][:7]
2014-08-12 06:06:44 +00:00
class S3Storage(_CloudStorage):
def __init__(self, storage_path, s3_access_key, s3_secret_key, s3_bucket):
upload_params = {
'encrypt_key': True,
}
connect_kwargs = {}
2014-08-12 06:06:44 +00:00
super(S3Storage, self).__init__(boto.s3.connection.S3Connection, boto.s3.key.Key,
connect_kwargs, upload_params, storage_path, s3_access_key,
s3_secret_key, s3_bucket)
2014-08-12 06:06:44 +00:00
class GoogleCloudStorage(_CloudStorage):
def __init__(self, storage_path, access_key, secret_key, bucket_name):
upload_params = {}
connect_kwargs = {}
super(GoogleCloudStorage, self).__init__(boto.gs.connection.GSConnection, boto.gs.key.Key,
connect_kwargs, upload_params, storage_path,
access_key, secret_key, bucket_name)
2014-08-12 06:06:44 +00:00
def stream_write(self, path, fp, content_type=None, content_encoding=None):
2014-08-12 06:06:44 +00:00
# Minimum size of upload part size on S3 is 5MB
self._initialize_cloud_conn()
path = self._init_path(path)
key = self._key_class(self._cloud_bucket, path)
if content_type is not None:
key.set_metadata('Content-Type', content_type)
if content_encoding is not None:
key.set_metadata('Content-Encoding', content_encoding)
2014-08-12 06:06:44 +00:00
key.set_contents_from_stream(fp)
class RadosGWStorage(_CloudStorage):
def __init__(self, hostname, is_secure, storage_path, access_key, secret_key, bucket_name):
upload_params = {}
connect_kwargs = {
'host': hostname,
'is_secure': is_secure,
'calling_format': boto.s3.connection.OrdinaryCallingFormat(),
}
super(RadosGWStorage, self).__init__(boto.s3.connection.S3Connection, boto.s3.key.Key,
connect_kwargs, upload_params, storage_path, access_key,
secret_key, bucket_name)
# TODO remove when radosgw supports cors: http://tracker.ceph.com/issues/8718#change-38624
def get_direct_download_url(self, path, expires_in=60, requires_cors=False):
if requires_cors:
return None
return super(RadosGWStorage, self).get_direct_download_url(path, expires_in, requires_cors)
# TODO remove when radosgw supports cors: http://tracker.ceph.com/issues/8718#change-38624
def get_direct_upload_url(self, path, mime_type, requires_cors=True):
if requires_cors:
return None
return super(RadosGWStorage, self).get_direct_upload_url(path, mime_type, requires_cors)