Implement some new methods on the storage engines.

This commit is contained in:
Jake Moshenko 2015-08-26 17:08:42 -04:00
parent 4fa37a46d1
commit 398202e6fc
10 changed files with 211 additions and 98 deletions

View file

@ -3,18 +3,25 @@ import os
import logging
import boto.s3.connection
import boto.s3.multipart
import boto.gs.connection
import boto.s3.key
import boto.gs.key
from io import BufferedIOBase
from uuid import uuid4
from storage.basestorage import BaseStorage
from storage.basestorage import BaseStorageV2, InvalidChunkException
logger = logging.getLogger(__name__)
_MULTIPART_UPLOAD_ID_KEY = 'upload_id'
_LAST_PART_KEY = 'last_part_num'
_LAST_CHUNK_ENCOUNTERED = 'last_chunk_encountered'
class StreamReadKeyAsFile(BufferedIOBase):
def __init__(self, key):
self._key = key
@ -37,9 +44,13 @@ class StreamReadKeyAsFile(BufferedIOBase):
self._key.close(fast=True)
class _CloudStorage(BaseStorage):
class _CloudStorage(BaseStorageV2):
def __init__(self, connection_class, key_class, connect_kwargs, upload_params, storage_path,
access_key, secret_key, bucket_name):
super(_CloudStorage, self).__init__()
self.upload_chunk_size = 5 * 1024 * 1024
self._initialized = False
self._bucket_name = bucket_name
self._access_key = access_key
@ -135,12 +146,9 @@ class _CloudStorage(BaseStorage):
raise IOError('No such key: \'{0}\''.format(path))
return StreamReadKeyAsFile(key)
def stream_write(self, path, fp, content_type=None, content_encoding=None):
def __initiate_multipart_upload(self, path, content_type, content_encoding):
# Minimum size of upload part size on S3 is 5MB
self._initialize_cloud_conn()
buffer_size = 5 * 1024 * 1024
if self.buffer_size > buffer_size:
buffer_size = self.buffer_size
path = self._init_path(path)
metadata = {}
@ -150,16 +158,20 @@ class _CloudStorage(BaseStorage):
if content_encoding is not None:
metadata['Content-Encoding'] = content_encoding
mp = self._cloud_bucket.initiate_multipart_upload(path, metadata=metadata,
**self._upload_params)
return self._cloud_bucket.initiate_multipart_upload(path, metadata=metadata,
**self._upload_params)
def stream_write(self, path, fp, content_type=None, content_encoding=None):
mp = self.__initiate_multipart_upload(path, content_type, content_encoding)
num_part = 1
while True:
try:
buf = fp.read(buffer_size)
if not buf:
buf = StringIO.StringIO()
bytes_written = self.stream_write_to_fp(fp, buf, self.upload_chunk_size)
if bytes_written == 0:
break
io = StringIO.StringIO(buf)
mp.upload_part_from_file(io, num_part)
mp.upload_part_from_file(buf, num_part)
num_part += 1
io.close()
except IOError:
@ -217,6 +229,57 @@ class _CloudStorage(BaseStorage):
return k.etag[1:-1][:7]
def _rel_upload_path(self, uuid):
return 'uploads/{0}'.format(uuid)
def initiate_chunked_upload(self):
self._initialize_cloud_conn()
random_uuid = str(uuid4())
path = self._init_path(self._rel_upload_path(random_uuid))
mpu = self.__initiate_multipart_upload(path, content_type=None, content_encoding=None)
metadata = {
_MULTIPART_UPLOAD_ID_KEY: mpu.id,
_LAST_PART_KEY: 0,
_LAST_CHUNK_ENCOUNTERED: False,
}
return mpu.id, metadata
def _get_multipart_upload_key(self, uuid, storage_metadata):
mpu = boto.s3.multipart.MultiPartUpload(self._cloud_bucket)
mpu.id = storage_metadata[_MULTIPART_UPLOAD_ID_KEY]
mpu.key = self._init_path(self._rel_upload_path(uuid))
return mpu
def stream_upload_chunk(self, uuid, offset, length, in_fp, storage_metadata):
self._initialize_cloud_conn()
mpu = self._get_multipart_upload_key(uuid, storage_metadata)
last_part_num = storage_metadata[_LAST_PART_KEY]
if storage_metadata[_LAST_CHUNK_ENCOUNTERED] and length != 0:
msg = 'Length must be at least the the upload chunk size: %s' % self.upload_chunk_size
raise InvalidChunkException(msg)
part_num = last_part_num + 1
mpu.upload_part_from_file(in_fp, part_num, length)
new_metadata = {
_MULTIPART_UPLOAD_ID_KEY: mpu.id,
_LAST_PART_KEY: part_num,
_LAST_CHUNK_ENCOUNTERED: True,
}
return length, new_metadata
def complete_chunked_upload(self, uuid, final_path, storage_metadata):
mpu = self._get_multipart_upload_key(uuid, storage_metadata)
mpu.complete_upload()
def cancel_chunked_upload(self, uuid, storage_metadata):
mpu = self._get_multipart_upload_key(uuid, storage_metadata)
mpu.cancel_multipart_upload()
class S3Storage(_CloudStorage):
def __init__(self, storage_path, s3_access_key, s3_secret_key, s3_bucket):