This repository has been archived on 2020-03-24. You can view files and clone it, but cannot push or open issues or pull requests.
quay/storage/cloud.py
2015-08-27 11:29:19 -04:00

379 lines
12 KiB
Python

import cStringIO as StringIO
import os
import logging
import boto.s3.connection
import boto.s3.multipart
import boto.gs.connection
import boto.s3.key
import boto.gs.key
from io import BufferedIOBase
from uuid import uuid4
from storage.basestorage import BaseStorageV2, InvalidChunkException
logger = logging.getLogger(__name__)
_MULTIPART_UPLOAD_ID_KEY = 'upload_id'
_LAST_PART_KEY = 'last_part_num'
_LAST_CHUNK_ENCOUNTERED = 'last_chunk_encountered'
class StreamReadKeyAsFile(BufferedIOBase):
def __init__(self, key):
self._key = key
def read(self, amt=None):
if self.closed:
return None
resp = self._key.read(amt)
return resp
def readable(self):
return True
@property
def closed(self):
return self._key.closed
def close(self):
self._key.close(fast=True)
class _CloudStorage(BaseStorageV2):
def __init__(self, connection_class, key_class, connect_kwargs, upload_params, storage_path,
access_key, secret_key, bucket_name):
super(_CloudStorage, self).__init__()
self.upload_chunk_size = 5 * 1024 * 1024
self._initialized = False
self._bucket_name = bucket_name
self._access_key = access_key
self._secret_key = secret_key
self._root_path = storage_path
self._connection_class = connection_class
self._key_class = key_class
self._upload_params = upload_params
self._connect_kwargs = connect_kwargs
self._cloud_conn = None
self._cloud_bucket = None
def _initialize_cloud_conn(self):
if not self._initialized:
self._cloud_conn = self._connection_class(self._access_key, self._secret_key,
**self._connect_kwargs)
self._cloud_bucket = self._cloud_conn.get_bucket(self._bucket_name)
self._initialized = True
def _debug_key(self, key):
"""Used for debugging only."""
orig_meth = key.bucket.connection.make_request
def new_meth(*args, **kwargs):
print '#' * 16
print args
print kwargs
print '#' * 16
return orig_meth(*args, **kwargs)
key.bucket.connection.make_request = new_meth
def _init_path(self, path=None):
path = os.path.join(self._root_path, path) if path else self._root_path
if path and path[0] == '/':
return path[1:]
return path
def get_cloud_conn(self):
self._initialize_cloud_conn()
return self._cloud_conn
def get_cloud_bucket(self):
return self._cloud_bucket
def get_content(self, path):
self._initialize_cloud_conn()
path = self._init_path(path)
key = self._key_class(self._cloud_bucket, path)
if not key.exists():
raise IOError('No such key: \'{0}\''.format(path))
return key.get_contents_as_string()
def put_content(self, path, content):
self._initialize_cloud_conn()
path = self._init_path(path)
key = self._key_class(self._cloud_bucket, path)
key.set_contents_from_string(content, **self._upload_params)
return path
def get_supports_resumable_downloads(self):
return True
def get_direct_download_url(self, path, expires_in=60, requires_cors=False):
self._initialize_cloud_conn()
path = self._init_path(path)
k = self._key_class(self._cloud_bucket, path)
return k.generate_url(expires_in)
def get_direct_upload_url(self, path, mime_type, requires_cors=True):
self._initialize_cloud_conn()
path = self._init_path(path)
key = self._key_class(self._cloud_bucket, path)
url = key.generate_url(300, 'PUT', headers={'Content-Type': mime_type}, encrypt_key=True)
return url
def stream_read(self, path):
self._initialize_cloud_conn()
path = self._init_path(path)
key = self._key_class(self._cloud_bucket, path)
if not key.exists():
raise IOError('No such key: \'{0}\''.format(path))
while True:
buf = key.read(self.buffer_size)
if not buf:
break
yield buf
def stream_read_file(self, path):
self._initialize_cloud_conn()
path = self._init_path(path)
key = self._key_class(self._cloud_bucket, path)
if not key.exists():
raise IOError('No such key: \'{0}\''.format(path))
return StreamReadKeyAsFile(key)
def __initiate_multipart_upload(self, path, content_type, content_encoding):
# Minimum size of upload part size on S3 is 5MB
self._initialize_cloud_conn()
path = self._init_path(path)
metadata = {}
if content_type is not None:
metadata['Content-Type'] = content_type
if content_encoding is not None:
metadata['Content-Encoding'] = content_encoding
return self._cloud_bucket.initiate_multipart_upload(path, metadata=metadata,
**self._upload_params)
def stream_write(self, path, fp, content_type=None, content_encoding=None):
mp = self.__initiate_multipart_upload(path, content_type, content_encoding)
num_part = 1
while True:
try:
buf = StringIO.StringIO()
bytes_written = self.stream_write_to_fp(fp, buf, self.upload_chunk_size)
if bytes_written == 0:
break
mp.upload_part_from_file(buf, num_part)
num_part += 1
io.close()
except IOError:
break
mp.complete_upload()
def list_directory(self, path=None):
self._initialize_cloud_conn()
path = self._init_path(path)
if not path.endswith('/'):
path += '/'
ln = 0
if self._root_path != '/':
ln = len(self._root_path)
exists = False
for key in self._cloud_bucket.list(prefix=path, delimiter='/'):
exists = True
name = key.name
if name.endswith('/'):
yield name[ln:-1]
else:
yield name[ln:]
if exists is False:
# In order to be compliant with the LocalStorage API. Even though
# S3 does not have a concept of folders.
raise OSError('No such directory: \'{0}\''.format(path))
def exists(self, path):
self._initialize_cloud_conn()
path = self._init_path(path)
key = self._key_class(self._cloud_bucket, path)
return key.exists()
def remove(self, path):
self._initialize_cloud_conn()
path = self._init_path(path)
key = self._key_class(self._cloud_bucket, path)
if key.exists():
# It's a file
key.delete()
return
# We assume it's a directory
if not path.endswith('/'):
path += '/'
for key in self._cloud_bucket.list(prefix=path):
key.delete()
def get_checksum(self, path):
self._initialize_cloud_conn()
path = self._init_path(path)
key = self._key_class(self._cloud_bucket, path)
k = self._cloud_bucket.lookup(key)
if k is None:
raise IOError('No such key: \'{0}\''.format(path))
return k.etag[1:-1][:7]
def _rel_upload_path(self, uuid):
return 'uploads/{0}'.format(uuid)
def initiate_chunked_upload(self):
self._initialize_cloud_conn()
random_uuid = str(uuid4())
path = self._init_path(self._rel_upload_path(random_uuid))
mpu = self.__initiate_multipart_upload(path, content_type=None, content_encoding=None)
metadata = {
_MULTIPART_UPLOAD_ID_KEY: mpu.id,
_LAST_PART_KEY: 0,
_LAST_CHUNK_ENCOUNTERED: False,
}
return mpu.id, metadata
def _get_multipart_upload_key(self, uuid, storage_metadata):
mpu = boto.s3.multipart.MultiPartUpload(self._cloud_bucket)
mpu.id = storage_metadata[_MULTIPART_UPLOAD_ID_KEY]
mpu.key = self._init_path(self._rel_upload_path(uuid))
return mpu
def stream_upload_chunk(self, uuid, offset, length, in_fp, storage_metadata):
self._initialize_cloud_conn()
mpu = self._get_multipart_upload_key(uuid, storage_metadata)
last_part_num = storage_metadata[_LAST_PART_KEY]
if storage_metadata[_LAST_CHUNK_ENCOUNTERED] and length != 0:
msg = 'Length must be at least the the upload chunk size: %s' % self.upload_chunk_size
raise InvalidChunkException(msg)
part_num = last_part_num + 1
mpu.upload_part_from_file(in_fp, part_num, length)
new_metadata = {
_MULTIPART_UPLOAD_ID_KEY: mpu.id,
_LAST_PART_KEY: part_num,
_LAST_CHUNK_ENCOUNTERED: True,
}
return length, new_metadata
def complete_chunked_upload(self, uuid, final_path, storage_metadata):
mpu = self._get_multipart_upload_key(uuid, storage_metadata)
mpu.complete_upload()
def cancel_chunked_upload(self, uuid, storage_metadata):
mpu = self._get_multipart_upload_key(uuid, storage_metadata)
mpu.cancel_multipart_upload()
class S3Storage(_CloudStorage):
def __init__(self, storage_path, s3_access_key, s3_secret_key, s3_bucket):
upload_params = {
'encrypt_key': True,
}
connect_kwargs = {}
super(S3Storage, self).__init__(boto.s3.connection.S3Connection, boto.s3.key.Key,
connect_kwargs, upload_params, storage_path, s3_access_key,
s3_secret_key, s3_bucket)
def setup(self):
self.get_cloud_bucket().set_cors_xml("""<?xml version="1.0" encoding="UTF-8"?>
<CORSConfiguration xmlns="http://s3.amazonaws.com/doc/2006-03-01/">
<CORSRule>
<AllowedOrigin>*</AllowedOrigin>
<AllowedMethod>GET</AllowedMethod>
<MaxAgeSeconds>3000</MaxAgeSeconds>
<AllowedHeader>Authorization</AllowedHeader>
</CORSRule>
<CORSRule>
<AllowedOrigin>*</AllowedOrigin>
<AllowedMethod>PUT</AllowedMethod>
<MaxAgeSeconds>3000</MaxAgeSeconds>
<AllowedHeader>Content-Type</AllowedHeader>
<AllowedHeader>x-amz-acl</AllowedHeader>
<AllowedHeader>origin</AllowedHeader>
</CORSRule>
</CORSConfiguration>""")
class GoogleCloudStorage(_CloudStorage):
def __init__(self, storage_path, access_key, secret_key, bucket_name):
upload_params = {}
connect_kwargs = {}
super(GoogleCloudStorage, self).__init__(boto.gs.connection.GSConnection, boto.gs.key.Key,
connect_kwargs, upload_params, storage_path,
access_key, secret_key, bucket_name)
def setup(self):
self.get_cloud_bucket().set_cors_xml("""<?xml version="1.0" encoding="UTF-8"?>
<CorsConfig>
<Cors>
<Origins>
<Origin>*</Origin>
</Origins>
<Methods>
<Method>GET</Method>
<Method>PUT</Method>
</Methods>
<ResponseHeaders>
<ResponseHeader>Content-Type</ResponseHeader>
</ResponseHeaders>
<MaxAgeSec>3000</MaxAgeSec>
</Cors>
</CorsConfig>""")
def stream_write(self, path, fp, content_type=None, content_encoding=None):
# Minimum size of upload part size on S3 is 5MB
self._initialize_cloud_conn()
path = self._init_path(path)
key = self._key_class(self._cloud_bucket, path)
if content_type is not None:
key.set_metadata('Content-Type', content_type)
if content_encoding is not None:
key.set_metadata('Content-Encoding', content_encoding)
key.set_contents_from_stream(fp)
class RadosGWStorage(_CloudStorage):
def __init__(self, hostname, is_secure, storage_path, access_key, secret_key, bucket_name):
upload_params = {}
connect_kwargs = {
'host': hostname,
'is_secure': is_secure,
'calling_format': boto.s3.connection.OrdinaryCallingFormat(),
}
super(RadosGWStorage, self).__init__(boto.s3.connection.S3Connection, boto.s3.key.Key,
connect_kwargs, upload_params, storage_path, access_key,
secret_key, bucket_name)
# TODO remove when radosgw supports cors: http://tracker.ceph.com/issues/8718#change-38624
def get_direct_download_url(self, path, expires_in=60, requires_cors=False):
if requires_cors:
return None
return super(RadosGWStorage, self).get_direct_download_url(path, expires_in, requires_cors)
# TODO remove when radosgw supports cors: http://tracker.ceph.com/issues/8718#change-38624
def get_direct_upload_url(self, path, mime_type, requires_cors=True):
if requires_cors:
return None
return super(RadosGWStorage, self).get_direct_upload_url(path, mime_type, requires_cors)