Implement some new methods on the storage engines.

This commit is contained in:
Jake Moshenko 2015-08-26 17:08:42 -04:00
parent 4fa37a46d1
commit 398202e6fc
10 changed files with 211 additions and 98 deletions

View file

@ -42,18 +42,9 @@ class StoragePaths(object):
class BaseStorage(StoragePaths):
"""Storage is organized as follow:
$ROOT/images/<image_id>/json
$ROOT/images/<image_id>/layer
$ROOT/repositories/<namespace>/<repository_name>/<tag_name>
"""
# Useful if we want to change those locations later without rewriting
# the code which uses Storage
repositories = 'repositories'
images = 'images'
# Set the IO buffer to 64kB
buffer_size = 64 * 1024
def __init__(self):
# Set the IO buffer to 64kB
self.buffer_size = 64 * 1024
def setup(self):
""" Called to perform any storage system setup. """
@ -99,31 +90,55 @@ class BaseStorage(StoragePaths):
def get_checksum(self, path):
raise NotImplementedError
def stream_write_to_fp(self, in_fp, out_fp, num_bytes=-1):
""" Copy the specified number of bytes from the input file stream to the output stream. If
num_bytes < 0 copy until the stream ends.
"""
bytes_copied = 0
while bytes_copied < num_bytes or num_bytes == -1:
size_to_read = min(num_bytes - bytes_copied, self.buffer_size)
if size_to_read < 0:
size_to_read = self.buffer_size
try:
buf = in_fp.read(size_to_read)
if not buf:
break
out_fp.write(buf)
bytes_copied += len(buf)
except IOError:
break
return bytes_copied
class InvalidChunkException(RuntimeError):
pass
class BaseStorageV2(BaseStorage):
def initiate_chunked_upload(self, upload_uuid):
""" Start a new chunked upload
def initiate_chunked_upload(self):
""" Start a new chunked upload, returning the uuid and any associated storage metadata
"""
raise NotImplementedError
def stream_upload_chunk(self, uuid, offset, length, in_fp, hash_obj):
def stream_upload_chunk(self, uuid, offset, length, in_fp, storage_metadata):
""" Upload the specified amount of data from the given file pointer to the chunked destination
specified, starting at the given offset. Raises InvalidChunkException if the offset or
length can not be accepted.
specified, starting at the given offset. Returns the number of bytes uploaded, and a new
version of the storage_metadata. Raises InvalidChunkException if the offset or length can
not be accepted.
"""
raise NotImplementedError
def complete_chunked_upload(self, uuid, final_path):
def complete_chunked_upload(self, uuid, final_path, storage_metadata):
""" Complete the chunked upload and store the final results in the path indicated.
Returns nothing.
"""
raise NotImplementedError
def cancel_chunked_upload(self, uuid):
def cancel_chunked_upload(self, uuid, storage_metadata):
""" Cancel the chunked upload and clean up any outstanding partially uploaded data.
Returns nothing.
"""
raise NotImplementedError

View file

@ -3,18 +3,25 @@ import os
import logging
import boto.s3.connection
import boto.s3.multipart
import boto.gs.connection
import boto.s3.key
import boto.gs.key
from io import BufferedIOBase
from uuid import uuid4
from storage.basestorage import BaseStorage
from storage.basestorage import BaseStorageV2, InvalidChunkException
logger = logging.getLogger(__name__)
_MULTIPART_UPLOAD_ID_KEY = 'upload_id'
_LAST_PART_KEY = 'last_part_num'
_LAST_CHUNK_ENCOUNTERED = 'last_chunk_encountered'
class StreamReadKeyAsFile(BufferedIOBase):
def __init__(self, key):
self._key = key
@ -37,9 +44,13 @@ class StreamReadKeyAsFile(BufferedIOBase):
self._key.close(fast=True)
class _CloudStorage(BaseStorage):
class _CloudStorage(BaseStorageV2):
def __init__(self, connection_class, key_class, connect_kwargs, upload_params, storage_path,
access_key, secret_key, bucket_name):
super(_CloudStorage, self).__init__()
self.upload_chunk_size = 5 * 1024 * 1024
self._initialized = False
self._bucket_name = bucket_name
self._access_key = access_key
@ -135,12 +146,9 @@ class _CloudStorage(BaseStorage):
raise IOError('No such key: \'{0}\''.format(path))
return StreamReadKeyAsFile(key)
def stream_write(self, path, fp, content_type=None, content_encoding=None):
def __initiate_multipart_upload(self, path, content_type, content_encoding):
# Minimum size of upload part size on S3 is 5MB
self._initialize_cloud_conn()
buffer_size = 5 * 1024 * 1024
if self.buffer_size > buffer_size:
buffer_size = self.buffer_size
path = self._init_path(path)
metadata = {}
@ -150,16 +158,20 @@ class _CloudStorage(BaseStorage):
if content_encoding is not None:
metadata['Content-Encoding'] = content_encoding
mp = self._cloud_bucket.initiate_multipart_upload(path, metadata=metadata,
**self._upload_params)
return self._cloud_bucket.initiate_multipart_upload(path, metadata=metadata,
**self._upload_params)
def stream_write(self, path, fp, content_type=None, content_encoding=None):
mp = self.__initiate_multipart_upload(path, content_type, content_encoding)
num_part = 1
while True:
try:
buf = fp.read(buffer_size)
if not buf:
buf = StringIO.StringIO()
bytes_written = self.stream_write_to_fp(fp, buf, self.upload_chunk_size)
if bytes_written == 0:
break
io = StringIO.StringIO(buf)
mp.upload_part_from_file(io, num_part)
mp.upload_part_from_file(buf, num_part)
num_part += 1
io.close()
except IOError:
@ -217,6 +229,57 @@ class _CloudStorage(BaseStorage):
return k.etag[1:-1][:7]
def _rel_upload_path(self, uuid):
return 'uploads/{0}'.format(uuid)
def initiate_chunked_upload(self):
self._initialize_cloud_conn()
random_uuid = str(uuid4())
path = self._init_path(self._rel_upload_path(random_uuid))
mpu = self.__initiate_multipart_upload(path, content_type=None, content_encoding=None)
metadata = {
_MULTIPART_UPLOAD_ID_KEY: mpu.id,
_LAST_PART_KEY: 0,
_LAST_CHUNK_ENCOUNTERED: False,
}
return mpu.id, metadata
def _get_multipart_upload_key(self, uuid, storage_metadata):
mpu = boto.s3.multipart.MultiPartUpload(self._cloud_bucket)
mpu.id = storage_metadata[_MULTIPART_UPLOAD_ID_KEY]
mpu.key = self._init_path(self._rel_upload_path(uuid))
return mpu
def stream_upload_chunk(self, uuid, offset, length, in_fp, storage_metadata):
self._initialize_cloud_conn()
mpu = self._get_multipart_upload_key(uuid, storage_metadata)
last_part_num = storage_metadata[_LAST_PART_KEY]
if storage_metadata[_LAST_CHUNK_ENCOUNTERED] and length != 0:
msg = 'Length must be at least the the upload chunk size: %s' % self.upload_chunk_size
raise InvalidChunkException(msg)
part_num = last_part_num + 1
mpu.upload_part_from_file(in_fp, part_num, length)
new_metadata = {
_MULTIPART_UPLOAD_ID_KEY: mpu.id,
_LAST_PART_KEY: part_num,
_LAST_CHUNK_ENCOUNTERED: True,
}
return length, new_metadata
def complete_chunked_upload(self, uuid, final_path, storage_metadata):
mpu = self._get_multipart_upload_key(uuid, storage_metadata)
mpu.complete_upload()
def cancel_chunked_upload(self, uuid, storage_metadata):
mpu = self._get_multipart_upload_key(uuid, storage_metadata)
mpu.cancel_multipart_upload()
class S3Storage(_CloudStorage):
def __init__(self, storage_path, s3_access_key, s3_secret_key, s3_bucket):

View file

@ -1,8 +1,14 @@
from storage.basestorage import BaseStorage
import cStringIO as StringIO
import hashlib
_FAKE_STORAGE_MAP = {}
from collections import defaultdict
from uuid import uuid4
class FakeStorage(BaseStorage):
from storage.basestorage import BaseStorageV2
_FAKE_STORAGE_MAP = defaultdict(StringIO.StringIO)
class FakeStorage(BaseStorageV2):
def _init_path(self, path=None, create=False):
return path
@ -10,16 +16,26 @@ class FakeStorage(BaseStorage):
if not path in _FAKE_STORAGE_MAP:
raise IOError('Fake file %s not found' % path)
return _FAKE_STORAGE_MAP.get(path)
_FAKE_STORAGE_MAP.get(path).seek(0)
return _FAKE_STORAGE_MAP.get(path).read()
def put_content(self, path, content):
_FAKE_STORAGE_MAP[path] = content
_FAKE_STORAGE_MAP.pop(path, None)
_FAKE_STORAGE_MAP[path].write(content)
def stream_read(self, path):
yield _FAKE_STORAGE_MAP[path]
io_obj = _FAKE_STORAGE_MAP[path]
io_obj.seek(0)
while True:
buf = io_obj.read(self.buffer_size)
if not buf:
break
yield buf
def stream_write(self, path, fp, content_type=None, content_encoding=None):
_FAKE_STORAGE_MAP[path] = fp.read()
out_fp = _FAKE_STORAGE_MAP[path]
out_fp.seek(0)
self.stream_write_to_fp(fp, out_fp)
def remove(self, path):
_FAKE_STORAGE_MAP.pop(path, None)
@ -28,4 +44,21 @@ class FakeStorage(BaseStorage):
return path in _FAKE_STORAGE_MAP
def get_checksum(self, path):
return path
return hashlib.sha256(_FAKE_STORAGE_MAP[path].read()).hexdigest()[:7]
def initiate_chunked_upload(self):
new_uuid = str(uuid4())
_FAKE_STORAGE_MAP[new_uuid].seek(0)
return new_uuid, {}
def stream_upload_chunk(self, uuid, offset, length, in_fp, _):
upload_storage = _FAKE_STORAGE_MAP[uuid]
upload_storage.seek(offset)
return self.stream_write_to_fp(in_fp, upload_storage, length)
def complete_chunked_upload(self, uuid, final_path, _):
_FAKE_STORAGE_MAP[final_path] = _FAKE_STORAGE_MAP[uuid]
_FAKE_STORAGE_MAP.pop(uuid, None)
def cancel_chunked_upload(self, uuid, _):
_FAKE_STORAGE_MAP.pop(uuid, None)

View file

@ -14,8 +14,8 @@ logger = logging.getLogger(__name__)
class LocalStorage(BaseStorageV2):
def __init__(self, storage_path):
super(LocalStorage, self).__init__()
self._root_path = storage_path
def _init_path(self, path=None, create=False):
@ -54,28 +54,7 @@ class LocalStorage(BaseStorageV2):
# Size is mandatory
path = self._init_path(path, create=True)
with open(path, mode='wb') as out_fp:
self._stream_write_to_fp(fp, out_fp)
def _stream_write_to_fp(self, in_fp, out_fp, num_bytes=-1):
""" Copy the specified number of bytes from the input file stream to the output stream. If
num_bytes < 0 copy until the stream ends.
"""
bytes_copied = 0
while bytes_copied < num_bytes or num_bytes == -1:
size_to_read = min(num_bytes - bytes_copied, self.buffer_size)
if size_to_read < 0:
size_to_read = self.buffer_size
try:
buf = in_fp.read(size_to_read)
if not buf:
break
out_fp.write(buf)
bytes_copied += len(buf)
except IOError:
break
return bytes_copied
self.stream_write_to_fp(fp, out_fp)
def list_directory(self, path=None):
path = self._init_path(path)
@ -124,14 +103,14 @@ class LocalStorage(BaseStorageV2):
with open(self._init_path(self._rel_upload_path(new_uuid), create=True), 'w'):
pass
return new_uuid
return new_uuid, {}
def stream_upload_chunk(self, uuid, offset, length, in_fp):
def stream_upload_chunk(self, uuid, offset, length, in_fp, _):
with open(self._init_path(self._rel_upload_path(uuid)), 'r+b') as upload_storage:
upload_storage.seek(offset)
return self._stream_write_to_fp(in_fp, upload_storage, length)
return self.stream_write_to_fp(in_fp, upload_storage, length), {}
def complete_chunked_upload(self, uuid, final_path):
def complete_chunked_upload(self, uuid, final_path, _):
content_path = self._rel_upload_path(uuid)
final_path_abs = self._init_path(final_path, create=True)
if not self.exists(final_path_abs):
@ -140,7 +119,7 @@ class LocalStorage(BaseStorageV2):
else:
logger.debug('Content already exists at path: %s', final_path_abs)
def cancel_chunked_upload(self, uuid):
def cancel_chunked_upload(self, uuid, _):
content_path = self._init_path(self._rel_upload_path(uuid))
os.remove(content_path)

View file

@ -13,6 +13,8 @@ logger = logging.getLogger(__name__)
class SwiftStorage(BaseStorage):
def __init__(self, swift_container, storage_path, auth_url, swift_user,
swift_password, auth_version=None, os_options=None, ca_cert_path=None):
super(SwiftStorage, self).__init__()
self._swift_container = swift_container
self._storage_path = storage_path