Add automatic storage replication
Adds a worker to automatically replicate data between storages and update the database accordingly
This commit is contained in:
parent
c693afca6a
commit
724b1607d7
18 changed files with 259 additions and 35 deletions
|
@ -39,7 +39,8 @@ class Storage(object):
|
|||
if not preference:
|
||||
preference = storages.keys()
|
||||
|
||||
d_storage = DistributedStorage(storages, preference)
|
||||
default_locations = app.config.get('DISTRIBUTED_STORAGE_DEFAULT_LOCATIONS') or []
|
||||
d_storage = DistributedStorage(storages, preference, default_locations)
|
||||
|
||||
# register extension with app
|
||||
app.extensions = getattr(app, 'extensions', {})
|
||||
|
|
|
@ -98,6 +98,9 @@ class BaseStorage(StoragePaths):
|
|||
def get_checksum(self, path):
|
||||
raise NotImplementedError
|
||||
|
||||
def copy_to(self, destination, path):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class DigestInvalidException(RuntimeError):
|
||||
pass
|
||||
|
@ -119,6 +122,3 @@ class BaseStorageV2(BaseStorage):
|
|||
""" Complete the chunked upload and store the final results in the path indicated.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -222,6 +222,28 @@ class _CloudStorage(BaseStorage):
|
|||
|
||||
return k.etag[1:-1][:7]
|
||||
|
||||
def copy_to(self, destination, path):
|
||||
# First try to copy directly via boto, but only if the storages are the
|
||||
# same type, with the same access information.
|
||||
if (self.__class__ == destination.__class__ and
|
||||
self._access_key == destination._access_key and
|
||||
self._secret_key == destination._secret_key):
|
||||
logger.debug('Copying file from %s to %s via a direct boto copy', self._cloud_bucket,
|
||||
destination._cloud_bucket)
|
||||
|
||||
source_path = self._init_path(path)
|
||||
source_key = self._key_class(self._cloud_bucket, source_path)
|
||||
|
||||
dest_path = destination._init_path(path)
|
||||
source_key.copy(destination._cloud_bucket, dest_path)
|
||||
return
|
||||
|
||||
# Fallback to a slower, default copy.
|
||||
logger.debug('Copying file from %s to %s via a streamed copy', self._cloud_bucket,
|
||||
destination)
|
||||
with self.stream_read_file(path) as fp:
|
||||
destination.stream_write(path, fp)
|
||||
|
||||
|
||||
class S3Storage(_CloudStorage):
|
||||
def __init__(self, storage_path, s3_access_key, s3_secret_key, s3_bucket):
|
||||
|
@ -252,7 +274,6 @@ class S3Storage(_CloudStorage):
|
|||
</CORSRule>
|
||||
</CORSConfiguration>""")
|
||||
|
||||
|
||||
class GoogleCloudStorage(_CloudStorage):
|
||||
def __init__(self, storage_path, access_key, secret_key, bucket_name):
|
||||
upload_params = {}
|
||||
|
|
|
@ -26,9 +26,15 @@ def _location_aware(unbound_func):
|
|||
|
||||
|
||||
class DistributedStorage(StoragePaths):
|
||||
def __init__(self, storages, preferred_locations=[]):
|
||||
def __init__(self, storages, preferred_locations=[], default_locations=[]):
|
||||
self._storages = dict(storages)
|
||||
self.preferred_locations = list(preferred_locations)
|
||||
self.default_locations = list(default_locations)
|
||||
|
||||
@property
|
||||
def locations(self):
|
||||
""" Returns the names of the locations supported. """
|
||||
return list(self._storages.keys())
|
||||
|
||||
get_direct_download_url = _location_aware(BaseStorage.get_direct_download_url)
|
||||
get_direct_upload_url = _location_aware(BaseStorage.get_direct_upload_url)
|
||||
|
@ -42,6 +48,14 @@ class DistributedStorage(StoragePaths):
|
|||
remove = _location_aware(BaseStorage.remove)
|
||||
get_checksum = _location_aware(BaseStorage.get_checksum)
|
||||
get_supports_resumable_downloads = _location_aware(BaseStorage.get_supports_resumable_downloads)
|
||||
|
||||
initiate_chunked_upload = _location_aware(BaseStorageV2.initiate_chunked_upload)
|
||||
stream_upload_chunk = _location_aware(BaseStorageV2.stream_upload_chunk)
|
||||
complete_chunked_upload = _location_aware(BaseStorageV2.complete_chunked_upload)
|
||||
|
||||
def copy_between(self, path, source_location, destination_location):
|
||||
""" Copies a file between the source location and the destination location. """
|
||||
source_storage = self._storages[source_location]
|
||||
destination_storage = self._storages[destination_location]
|
||||
source_storage.copy_to(destination_storage, path)
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
from storage.basestorage import BaseStorage
|
||||
from cStringIO import StringIO
|
||||
|
||||
_FAKE_STORAGE_MAP = {}
|
||||
|
||||
|
@ -18,6 +19,9 @@ class FakeStorage(BaseStorage):
|
|||
def stream_read(self, path):
|
||||
yield _FAKE_STORAGE_MAP[path]
|
||||
|
||||
def stream_read_file(self, path):
|
||||
return StringIO(_FAKE_STORAGE_MAP[path])
|
||||
|
||||
def stream_write(self, path, fp, content_type=None, content_encoding=None):
|
||||
_FAKE_STORAGE_MAP[path] = fp.read()
|
||||
|
||||
|
|
|
@ -112,11 +112,9 @@ class LocalStorage(BaseStorageV2):
|
|||
sha_hash.update(buf)
|
||||
return sha_hash.hexdigest()[:7]
|
||||
|
||||
|
||||
def _rel_upload_path(self, uuid):
|
||||
return 'uploads/{0}'.format(uuid)
|
||||
|
||||
|
||||
def initiate_chunked_upload(self):
|
||||
new_uuid = str(uuid4())
|
||||
|
||||
|
@ -162,3 +160,7 @@ class LocalStorage(BaseStorageV2):
|
|||
raise Exception('Storage path %s is not under a mounted volume.\n\n'
|
||||
'Registry data must be stored under a mounted volume '
|
||||
'to prevent data loss' % self._root_path)
|
||||
|
||||
def copy_to(self, destination, path):
|
||||
with self.stream_read_file(path) as fp:
|
||||
destination.stream_write(path, fp)
|
||||
|
|
Reference in a new issue