Implement some new methods on the storage engines.

2015-08-26 17:08:42 -04:00 · 2015-08-26 17:08:42 -04:00 · 398202e6fc
commit 398202e6fc
parent 4fa37a46d1
10 changed files with 211 additions and 98 deletions
--- a/storage/basestorage.py
+++ b/storage/basestorage.py
@ -42,18 +42,9 @@ class StoragePaths(object):


 class BaseStorage(StoragePaths):
-  """Storage is organized as follow:
-  $ROOT/images/<image_id>/json
-  $ROOT/images/<image_id>/layer
-  $ROOT/repositories/<namespace>/<repository_name>/<tag_name>
-  """
-
-  # Useful if we want to change those locations later without rewriting
-  # the code which uses Storage
-  repositories = 'repositories'
-  images = 'images'
-  # Set the IO buffer to 64kB
-  buffer_size = 64 * 1024
+  def __init__(self):
+    # Set the IO buffer to 64kB
+    self.buffer_size = 64 * 1024

  def setup(self):
    """ Called to perform any storage system setup. """
@ -99,31 +90,55 @@ class BaseStorage(StoragePaths):
  def get_checksum(self, path):
    raise NotImplementedError

+  def stream_write_to_fp(self, in_fp, out_fp, num_bytes=-1):
+    """ Copy the specified number of bytes from the input file stream to the output stream. If
+        num_bytes < 0 copy until the stream ends.
+    """
+    bytes_copied = 0
+    while bytes_copied < num_bytes or num_bytes == -1:
+      size_to_read = min(num_bytes - bytes_copied, self.buffer_size)
+      if size_to_read < 0:
+        size_to_read = self.buffer_size
+
+      try:
+        buf = in_fp.read(size_to_read)
+        if not buf:
+          break
+        out_fp.write(buf)
+        bytes_copied += len(buf)
+      except IOError:
+        break
+
+    return bytes_copied
+

 class InvalidChunkException(RuntimeError):
  pass


 class BaseStorageV2(BaseStorage):
-  def initiate_chunked_upload(self, upload_uuid):
-    """ Start a new chunked upload
+  def initiate_chunked_upload(self):
+    """ Start a new chunked upload, returning the uuid and any associated storage metadata
    """
    raise NotImplementedError

-  def stream_upload_chunk(self, uuid, offset, length, in_fp, hash_obj):
+  def stream_upload_chunk(self, uuid, offset, length, in_fp, storage_metadata):
    """ Upload the specified amount of data from the given file pointer to the chunked destination
-        specified, starting at the given offset. Raises InvalidChunkException if the offset or
-        length can not be accepted.
+        specified, starting at the given offset. Returns the number of bytes uploaded, and a new
+        version of the storage_metadata. Raises InvalidChunkException if the offset or length can
+        not be accepted.
    """
    raise NotImplementedError

-  def complete_chunked_upload(self, uuid, final_path):
+  def complete_chunked_upload(self, uuid, final_path, storage_metadata):
    """ Complete the chunked upload and store the final results in the path indicated.
+        Returns nothing.
    """
    raise NotImplementedError

-  def cancel_chunked_upload(self, uuid):
+  def cancel_chunked_upload(self, uuid, storage_metadata):
    """ Cancel the chunked upload and clean up any outstanding partially uploaded data.
+        Returns nothing.
    """
    raise NotImplementedError

--- a/storage/cloud.py
+++ b/storage/cloud.py
@ -3,18 +3,25 @@ import os
 import logging

 import boto.s3.connection
+import boto.s3.multipart
 import boto.gs.connection
 import boto.s3.key
 import boto.gs.key

 from io import BufferedIOBase
+from uuid import uuid4

-from storage.basestorage import BaseStorage
+from storage.basestorage import BaseStorageV2, InvalidChunkException


 logger = logging.getLogger(__name__)


+_MULTIPART_UPLOAD_ID_KEY = 'upload_id'
+_LAST_PART_KEY = 'last_part_num'
+_LAST_CHUNK_ENCOUNTERED = 'last_chunk_encountered'
+
+
 class StreamReadKeyAsFile(BufferedIOBase):
  def __init__(self, key):
    self._key = key
@ -37,9 +44,13 @@ class StreamReadKeyAsFile(BufferedIOBase):
    self._key.close(fast=True)


-class _CloudStorage(BaseStorage):
+class _CloudStorage(BaseStorageV2):
  def __init__(self, connection_class, key_class, connect_kwargs, upload_params, storage_path,
               access_key, secret_key, bucket_name):
+    super(_CloudStorage, self).__init__()
+
+    self.upload_chunk_size = 5 * 1024 * 1024
+
    self._initialized = False
    self._bucket_name = bucket_name
    self._access_key = access_key
@ -135,12 +146,9 @@ class _CloudStorage(BaseStorage):
      raise IOError('No such key: \'{0}\''.format(path))
    return StreamReadKeyAsFile(key)

-  def stream_write(self, path, fp, content_type=None, content_encoding=None):
+  def __initiate_multipart_upload(self, path, content_type, content_encoding):
    # Minimum size of upload part size on S3 is 5MB
    self._initialize_cloud_conn()
-    buffer_size = 5 * 1024 * 1024
-    if self.buffer_size > buffer_size:
-      buffer_size = self.buffer_size
    path = self._init_path(path)

    metadata = {}
@ -150,16 +158,20 @@ class _CloudStorage(BaseStorage):
    if content_encoding is not None:
      metadata['Content-Encoding'] = content_encoding

-    mp = self._cloud_bucket.initiate_multipart_upload(path, metadata=metadata,
-                                                      **self._upload_params)
+    return self._cloud_bucket.initiate_multipart_upload(path, metadata=metadata,
+                                                        **self._upload_params)
+
+  def stream_write(self, path, fp, content_type=None, content_encoding=None):
+    mp = self.__initiate_multipart_upload(path, content_type, content_encoding)
    num_part = 1
    while True:
      try:
-        buf = fp.read(buffer_size)
-        if not buf:
+        buf = StringIO.StringIO()
+        bytes_written = self.stream_write_to_fp(fp, buf, self.upload_chunk_size)
+        if bytes_written == 0:
          break
-        io = StringIO.StringIO(buf)
-        mp.upload_part_from_file(io, num_part)
+
+        mp.upload_part_from_file(buf, num_part)
        num_part += 1
        io.close()
      except IOError:
@ -217,6 +229,57 @@ class _CloudStorage(BaseStorage):

    return k.etag[1:-1][:7]

+  def _rel_upload_path(self, uuid):
+    return 'uploads/{0}'.format(uuid)
+
+  def initiate_chunked_upload(self):
+    self._initialize_cloud_conn()
+    random_uuid = str(uuid4())
+    path = self._init_path(self._rel_upload_path(random_uuid))
+    mpu = self.__initiate_multipart_upload(path, content_type=None, content_encoding=None)
+
+    metadata = {
+      _MULTIPART_UPLOAD_ID_KEY: mpu.id,
+      _LAST_PART_KEY: 0,
+      _LAST_CHUNK_ENCOUNTERED: False,
+    }
+
+    return mpu.id, metadata
+
+  def _get_multipart_upload_key(self, uuid, storage_metadata):
+    mpu = boto.s3.multipart.MultiPartUpload(self._cloud_bucket)
+    mpu.id = storage_metadata[_MULTIPART_UPLOAD_ID_KEY]
+    mpu.key = self._init_path(self._rel_upload_path(uuid))
+    return mpu
+
+  def stream_upload_chunk(self, uuid, offset, length, in_fp, storage_metadata):
+    self._initialize_cloud_conn()
+    mpu = self._get_multipart_upload_key(uuid, storage_metadata)
+    last_part_num = storage_metadata[_LAST_PART_KEY]
+
+    if storage_metadata[_LAST_CHUNK_ENCOUNTERED] and length != 0:
+      msg = 'Length must be at least the the upload chunk size: %s' % self.upload_chunk_size
+      raise InvalidChunkException(msg)
+
+    part_num = last_part_num + 1
+    mpu.upload_part_from_file(in_fp, part_num, length)
+
+    new_metadata = {
+      _MULTIPART_UPLOAD_ID_KEY: mpu.id,
+      _LAST_PART_KEY: part_num,
+      _LAST_CHUNK_ENCOUNTERED: True,
+    }
+
+    return length, new_metadata
+
+  def complete_chunked_upload(self, uuid, final_path, storage_metadata):
+    mpu = self._get_multipart_upload_key(uuid, storage_metadata)
+    mpu.complete_upload()
+
+  def cancel_chunked_upload(self, uuid, storage_metadata):
+    mpu = self._get_multipart_upload_key(uuid, storage_metadata)
+    mpu.cancel_multipart_upload()
+

 class S3Storage(_CloudStorage):
  def __init__(self, storage_path, s3_access_key, s3_secret_key, s3_bucket):
--- a/storage/fakestorage.py
+++ b/storage/fakestorage.py
@ -1,8 +1,14 @@
-from storage.basestorage import BaseStorage
+import cStringIO as StringIO
+import hashlib

-_FAKE_STORAGE_MAP = {}
+from collections import defaultdict
+from uuid import uuid4

-class FakeStorage(BaseStorage):
+from storage.basestorage import BaseStorageV2
+
+_FAKE_STORAGE_MAP = defaultdict(StringIO.StringIO)
+
+class FakeStorage(BaseStorageV2):
  def _init_path(self, path=None, create=False):
    return path

@ -10,16 +16,26 @@ class FakeStorage(BaseStorage):
    if not path in _FAKE_STORAGE_MAP:
      raise IOError('Fake file %s not found' % path)

-    return _FAKE_STORAGE_MAP.get(path)
+    _FAKE_STORAGE_MAP.get(path).seek(0)
+    return _FAKE_STORAGE_MAP.get(path).read()

  def put_content(self, path, content):
-    _FAKE_STORAGE_MAP[path] = content
+    _FAKE_STORAGE_MAP.pop(path, None)
+    _FAKE_STORAGE_MAP[path].write(content)

  def stream_read(self, path):
-    yield _FAKE_STORAGE_MAP[path]
+    io_obj = _FAKE_STORAGE_MAP[path]
+    io_obj.seek(0)
+    while True:
+      buf = io_obj.read(self.buffer_size)
+      if not buf:
+        break
+      yield buf

  def stream_write(self, path, fp, content_type=None, content_encoding=None):
-    _FAKE_STORAGE_MAP[path] = fp.read()
+    out_fp = _FAKE_STORAGE_MAP[path]
+    out_fp.seek(0)
+    self.stream_write_to_fp(fp, out_fp)

  def remove(self, path):
    _FAKE_STORAGE_MAP.pop(path, None)
@ -28,4 +44,21 @@ class FakeStorage(BaseStorage):
    return path in _FAKE_STORAGE_MAP

  def get_checksum(self, path):
-    return path
+    return hashlib.sha256(_FAKE_STORAGE_MAP[path].read()).hexdigest()[:7]
+
+  def initiate_chunked_upload(self):
+    new_uuid = str(uuid4())
+    _FAKE_STORAGE_MAP[new_uuid].seek(0)
+    return new_uuid, {}
+
+  def stream_upload_chunk(self, uuid, offset, length, in_fp, _):
+    upload_storage = _FAKE_STORAGE_MAP[uuid]
+    upload_storage.seek(offset)
+    return self.stream_write_to_fp(in_fp, upload_storage, length)
+
+  def complete_chunked_upload(self, uuid, final_path, _):
+    _FAKE_STORAGE_MAP[final_path] = _FAKE_STORAGE_MAP[uuid]
+    _FAKE_STORAGE_MAP.pop(uuid, None)
+
+  def cancel_chunked_upload(self, uuid, _):
+    _FAKE_STORAGE_MAP.pop(uuid, None)
--- a/storage/local.py
+++ b/storage/local.py
@ -14,8 +14,8 @@ logger = logging.getLogger(__name__)


 class LocalStorage(BaseStorageV2):
-
  def __init__(self, storage_path):
+    super(LocalStorage, self).__init__()
    self._root_path = storage_path

  def _init_path(self, path=None, create=False):
@ -54,28 +54,7 @@ class LocalStorage(BaseStorageV2):
    # Size is mandatory
    path = self._init_path(path, create=True)
    with open(path, mode='wb') as out_fp:
-      self._stream_write_to_fp(fp, out_fp)
-
-  def _stream_write_to_fp(self, in_fp, out_fp, num_bytes=-1):
-    """ Copy the specified number of bytes from the input file stream to the output stream. If
-        num_bytes < 0 copy until the stream ends.
-    """
-    bytes_copied = 0
-    while bytes_copied < num_bytes or num_bytes == -1:
-      size_to_read = min(num_bytes - bytes_copied, self.buffer_size)
-      if size_to_read < 0:
-        size_to_read = self.buffer_size
-
-      try:
-        buf = in_fp.read(size_to_read)
-        if not buf:
-          break
-        out_fp.write(buf)
-        bytes_copied += len(buf)
-      except IOError:
-        break
-
-    return bytes_copied
+      self.stream_write_to_fp(fp, out_fp)

  def list_directory(self, path=None):
    path = self._init_path(path)
@ -124,14 +103,14 @@ class LocalStorage(BaseStorageV2):
    with open(self._init_path(self._rel_upload_path(new_uuid), create=True), 'w'):
      pass

-    return new_uuid
+    return new_uuid, {}

-  def stream_upload_chunk(self, uuid, offset, length, in_fp):
+  def stream_upload_chunk(self, uuid, offset, length, in_fp, _):
    with open(self._init_path(self._rel_upload_path(uuid)), 'r+b') as upload_storage:
      upload_storage.seek(offset)
-      return self._stream_write_to_fp(in_fp, upload_storage, length)
+      return self.stream_write_to_fp(in_fp, upload_storage, length), {}

-  def complete_chunked_upload(self, uuid, final_path):
+  def complete_chunked_upload(self, uuid, final_path, _):
    content_path = self._rel_upload_path(uuid)
    final_path_abs = self._init_path(final_path, create=True)
    if not self.exists(final_path_abs):
@ -140,7 +119,7 @@ class LocalStorage(BaseStorageV2):
    else:
      logger.debug('Content already exists at path: %s', final_path_abs)

-  def cancel_chunked_upload(self, uuid):
+  def cancel_chunked_upload(self, uuid, _):
    content_path = self._init_path(self._rel_upload_path(uuid))
    os.remove(content_path)

--- a/storage/swift.py
+++ b/storage/swift.py
@ -13,6 +13,8 @@ logger = logging.getLogger(__name__)
 class SwiftStorage(BaseStorage):
  def __init__(self, swift_container, storage_path, auth_url, swift_user,
               swift_password, auth_version=None, os_options=None, ca_cert_path=None):
+    super(SwiftStorage, self).__init__()
+
    self._swift_container = swift_container
    self._storage_path = storage_path