Add a chunk cleanup queue for async GC of empty chunks
Instead of having the Swift storage engine try to delete the empty chunk(s) synchronously, we simply queue them and have a worker come along after 30s to delete the empty chunks. This has a few key benefits: it is async (doesn't slow down the push code), helps deal with Swift's eventual consistency (less retries necessary) and is generic for other storage engines if/when they need this as well
This commit is contained in:
parent
59cb6bd216
commit
5f99448adc
12 changed files with 191 additions and 59 deletions
10
app.py
10
app.py
|
@ -172,13 +172,17 @@ app.url_map.converters['apirepopath'] = APIRepositoryPathConverter
|
||||||
|
|
||||||
Principal(app, use_sessions=False)
|
Principal(app, use_sessions=False)
|
||||||
|
|
||||||
|
tf = app.config['DB_TRANSACTION_FACTORY']
|
||||||
|
|
||||||
|
chunk_cleanup_queue = WorkQueue(app.config['CHUNK_CLEANUP_QUEUE_NAME'], tf)
|
||||||
|
|
||||||
avatar = Avatar(app)
|
avatar = Avatar(app)
|
||||||
login_manager = LoginManager(app)
|
login_manager = LoginManager(app)
|
||||||
mail = Mail(app)
|
mail = Mail(app)
|
||||||
prometheus = PrometheusPlugin(app)
|
prometheus = PrometheusPlugin(app)
|
||||||
metric_queue = MetricQueue(prometheus)
|
metric_queue = MetricQueue(prometheus)
|
||||||
instance_keys = InstanceKeys(app)
|
instance_keys = InstanceKeys(app)
|
||||||
storage = Storage(app, metric_queue, instance_keys)
|
storage = Storage(app, metric_queue, chunk_cleanup_queue, instance_keys)
|
||||||
userfiles = Userfiles(app, storage)
|
userfiles = Userfiles(app, storage)
|
||||||
log_archive = LogArchive(app, storage)
|
log_archive = LogArchive(app, storage)
|
||||||
analytics = Analytics(app)
|
analytics = Analytics(app)
|
||||||
|
@ -198,8 +202,6 @@ license_validator.start()
|
||||||
|
|
||||||
start_cloudwatch_sender(metric_queue, app)
|
start_cloudwatch_sender(metric_queue, app)
|
||||||
|
|
||||||
tf = app.config['DB_TRANSACTION_FACTORY']
|
|
||||||
|
|
||||||
github_login = GithubOAuthConfig(app.config, 'GITHUB_LOGIN_CONFIG')
|
github_login = GithubOAuthConfig(app.config, 'GITHUB_LOGIN_CONFIG')
|
||||||
github_trigger = GithubOAuthConfig(app.config, 'GITHUB_TRIGGER_CONFIG')
|
github_trigger = GithubOAuthConfig(app.config, 'GITHUB_TRIGGER_CONFIG')
|
||||||
gitlab_trigger = GitLabOAuthConfig(app.config, 'GITLAB_TRIGGER_CONFIG')
|
gitlab_trigger = GitLabOAuthConfig(app.config, 'GITLAB_TRIGGER_CONFIG')
|
||||||
|
@ -217,7 +219,7 @@ secscan_notification_queue = WorkQueue(app.config['SECSCAN_NOTIFICATION_QUEUE_NA
|
||||||
has_namespace=False)
|
has_namespace=False)
|
||||||
|
|
||||||
all_queues = [image_replication_queue, dockerfile_build_queue, notification_queue,
|
all_queues = [image_replication_queue, dockerfile_build_queue, notification_queue,
|
||||||
secscan_notification_queue]
|
secscan_notification_queue, chunk_cleanup_queue]
|
||||||
|
|
||||||
secscan_api = SecurityScannerAPI(app, app.config, storage)
|
secscan_api = SecurityScannerAPI(app, app.config, storage)
|
||||||
|
|
||||||
|
|
7
conf/init/service/chunkcleanupworker/log/run
Executable file
7
conf/init/service/chunkcleanupworker/log/run
Executable file
|
@ -0,0 +1,7 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
# Ensure dependencies start before the logger
|
||||||
|
sv check syslog-ng > /dev/null || exit 1
|
||||||
|
|
||||||
|
# Start the logger
|
||||||
|
exec logger -i -t chunkcleanupworker
|
8
conf/init/service/chunkcleanupworker/run
Executable file
8
conf/init/service/chunkcleanupworker/run
Executable file
|
@ -0,0 +1,8 @@
|
||||||
|
#! /bin/bash
|
||||||
|
|
||||||
|
echo 'Starting chunk cleanup worker'
|
||||||
|
|
||||||
|
cd /
|
||||||
|
venv/bin/python -m workers.chunkcleanupworker 2>&1
|
||||||
|
|
||||||
|
echo 'Chunk cleanup worker exited'
|
|
@ -137,6 +137,7 @@ class DefaultConfig(object):
|
||||||
DOCKERFILE_BUILD_QUEUE_NAME = 'dockerfilebuild'
|
DOCKERFILE_BUILD_QUEUE_NAME = 'dockerfilebuild'
|
||||||
REPLICATION_QUEUE_NAME = 'imagestoragereplication'
|
REPLICATION_QUEUE_NAME = 'imagestoragereplication'
|
||||||
SECSCAN_NOTIFICATION_QUEUE_NAME = 'security_notification'
|
SECSCAN_NOTIFICATION_QUEUE_NAME = 'security_notification'
|
||||||
|
CHUNK_CLEANUP_QUEUE_NAME = 'chunk_cleanup'
|
||||||
|
|
||||||
# Super user config. Note: This MUST BE an empty list for the default config.
|
# Super user config. Note: This MUST BE an empty list for the default config.
|
||||||
SUPER_USERS = []
|
SUPER_USERS = []
|
||||||
|
|
|
@ -15,27 +15,36 @@ STORAGE_DRIVER_CLASSES = {
|
||||||
'SwiftStorage': SwiftStorage,
|
'SwiftStorage': SwiftStorage,
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_storage_driver(metric_queue, storage_params):
|
def get_storage_driver(location, metric_queue, chunk_cleanup_queue, storage_params):
|
||||||
""" Returns a storage driver class for the given storage configuration
|
""" Returns a storage driver class for the given storage configuration
|
||||||
(a pair of string name and a dict of parameters). """
|
(a pair of string name and a dict of parameters). """
|
||||||
driver = storage_params[0]
|
driver = storage_params[0]
|
||||||
parameters = storage_params[1]
|
parameters = storage_params[1]
|
||||||
driver_class = STORAGE_DRIVER_CLASSES.get(driver, FakeStorage)
|
driver_class = STORAGE_DRIVER_CLASSES.get(driver, FakeStorage)
|
||||||
return driver_class(metric_queue, **parameters)
|
context = StorageContext(location, metric_queue, chunk_cleanup_queue)
|
||||||
|
return driver_class(context, **parameters)
|
||||||
|
|
||||||
|
|
||||||
|
class StorageContext(object):
|
||||||
|
def __init__(self, location, metric_queue, chunk_cleanup_queue):
|
||||||
|
self.location = location
|
||||||
|
self.metric_queue = metric_queue
|
||||||
|
self.chunk_cleanup_queue = chunk_cleanup_queue
|
||||||
|
|
||||||
|
|
||||||
class Storage(object):
|
class Storage(object):
|
||||||
def __init__(self, app=None, metric_queue=None, instance_keys=None):
|
def __init__(self, app=None, metric_queue=None, chunk_cleanup_queue=None, instance_keys=None):
|
||||||
self.app = app
|
self.app = app
|
||||||
if app is not None:
|
if app is not None:
|
||||||
self.state = self.init_app(app, metric_queue, instance_keys)
|
self.state = self.init_app(app, metric_queue, chunk_cleanup_queue, instance_keys)
|
||||||
else:
|
else:
|
||||||
self.state = None
|
self.state = None
|
||||||
|
|
||||||
def init_app(self, app, metric_queue, instance_keys):
|
def init_app(self, app, metric_queue, chunk_cleanup_queue, instance_keys):
|
||||||
storages = {}
|
storages = {}
|
||||||
for location, storage_params in app.config.get('DISTRIBUTED_STORAGE_CONFIG').items():
|
for location, storage_params in app.config.get('DISTRIBUTED_STORAGE_CONFIG').items():
|
||||||
storages[location] = get_storage_driver(metric_queue, storage_params)
|
storages[location] = get_storage_driver(location, metric_queue, chunk_cleanup_queue,
|
||||||
|
storage_params)
|
||||||
|
|
||||||
preference = app.config.get('DISTRIBUTED_STORAGE_PREFERENCE', None)
|
preference = app.config.get('DISTRIBUTED_STORAGE_PREFERENCE', None)
|
||||||
if not preference:
|
if not preference:
|
||||||
|
|
|
@ -49,7 +49,7 @@ class StreamReadKeyAsFile(BufferedIOBase):
|
||||||
|
|
||||||
|
|
||||||
class _CloudStorage(BaseStorageV2):
|
class _CloudStorage(BaseStorageV2):
|
||||||
def __init__(self, metric_queue, connection_class, key_class, connect_kwargs, upload_params,
|
def __init__(self, context, connection_class, key_class, connect_kwargs, upload_params,
|
||||||
storage_path, bucket_name, access_key=None, secret_key=None):
|
storage_path, bucket_name, access_key=None, secret_key=None):
|
||||||
super(_CloudStorage, self).__init__()
|
super(_CloudStorage, self).__init__()
|
||||||
|
|
||||||
|
@ -67,7 +67,7 @@ class _CloudStorage(BaseStorageV2):
|
||||||
self._connect_kwargs = connect_kwargs
|
self._connect_kwargs = connect_kwargs
|
||||||
self._cloud_conn = None
|
self._cloud_conn = None
|
||||||
self._cloud_bucket = None
|
self._cloud_bucket = None
|
||||||
self._metric_queue = metric_queue
|
self._context = context
|
||||||
|
|
||||||
def _initialize_cloud_conn(self):
|
def _initialize_cloud_conn(self):
|
||||||
if not self._initialized:
|
if not self._initialized:
|
||||||
|
@ -166,9 +166,9 @@ class _CloudStorage(BaseStorageV2):
|
||||||
if content_encoding is not None:
|
if content_encoding is not None:
|
||||||
metadata['Content-Encoding'] = content_encoding
|
metadata['Content-Encoding'] = content_encoding
|
||||||
|
|
||||||
if self._metric_queue is not None:
|
if self._context.metric_queue is not None:
|
||||||
self._metric_queue.put_deprecated('MultipartUploadStart', 1)
|
self._context.metric_queue.put_deprecated('MultipartUploadStart', 1)
|
||||||
self._metric_queue.multipart_upload_start.Inc()
|
self._context.metric_queue.multipart_upload_start.Inc()
|
||||||
|
|
||||||
return self._cloud_bucket.initiate_multipart_upload(path, metadata=metadata,
|
return self._cloud_bucket.initiate_multipart_upload(path, metadata=metadata,
|
||||||
**self._upload_params)
|
**self._upload_params)
|
||||||
|
@ -207,9 +207,9 @@ class _CloudStorage(BaseStorageV2):
|
||||||
logger.warn('Error when writing to stream in stream_write_internal at path %s: %s', path, e)
|
logger.warn('Error when writing to stream in stream_write_internal at path %s: %s', path, e)
|
||||||
write_error = e
|
write_error = e
|
||||||
|
|
||||||
if self._metric_queue is not None:
|
if self._context.metric_queue is not None:
|
||||||
self._metric_queue.put_deprecated('MultipartUploadFailure', 1)
|
self._context.metric_queue.put_deprecated('MultipartUploadFailure', 1)
|
||||||
self._metric_queue.multipart_upload_end.Inc(labelvalues=['failure'])
|
self._context.metric_queue.multipart_upload_end.Inc(labelvalues=['failure'])
|
||||||
|
|
||||||
if cancel_on_error:
|
if cancel_on_error:
|
||||||
mp.cancel_upload()
|
mp.cancel_upload()
|
||||||
|
@ -218,9 +218,9 @@ class _CloudStorage(BaseStorageV2):
|
||||||
break
|
break
|
||||||
|
|
||||||
if total_bytes_written > 0:
|
if total_bytes_written > 0:
|
||||||
if self._metric_queue is not None:
|
if self._context.metric_queue is not None:
|
||||||
self._metric_queue.put_deprecated('MultipartUploadSuccess', 1)
|
self._context.metric_queue.put_deprecated('MultipartUploadSuccess', 1)
|
||||||
self._metric_queue.multipart_upload_end.Inc(labelvalues=['success'])
|
self._context.metric_queue.multipart_upload_end.Inc(labelvalues=['success'])
|
||||||
|
|
||||||
self._perform_action_with_retry(mp.complete_upload)
|
self._perform_action_with_retry(mp.complete_upload)
|
||||||
|
|
||||||
|
@ -436,8 +436,8 @@ class _CloudStorage(BaseStorageV2):
|
||||||
|
|
||||||
|
|
||||||
class S3Storage(_CloudStorage):
|
class S3Storage(_CloudStorage):
|
||||||
def __init__(self, metric_queue, storage_path, s3_bucket, s3_access_key=None, s3_secret_key=None,
|
def __init__(self, context, storage_path, s3_bucket, s3_access_key=None,
|
||||||
host=None):
|
s3_secret_key=None, host=None):
|
||||||
upload_params = {
|
upload_params = {
|
||||||
'encrypt_key': True,
|
'encrypt_key': True,
|
||||||
}
|
}
|
||||||
|
@ -447,7 +447,7 @@ class S3Storage(_CloudStorage):
|
||||||
raise ValueError('host name must not start with http:// or https://')
|
raise ValueError('host name must not start with http:// or https://')
|
||||||
|
|
||||||
connect_kwargs['host'] = host
|
connect_kwargs['host'] = host
|
||||||
super(S3Storage, self).__init__(metric_queue, boto.s3.connection.S3Connection, boto.s3.key.Key,
|
super(S3Storage, self).__init__(context, boto.s3.connection.S3Connection, boto.s3.key.Key,
|
||||||
connect_kwargs, upload_params, storage_path, s3_bucket,
|
connect_kwargs, upload_params, storage_path, s3_bucket,
|
||||||
access_key=s3_access_key or None,
|
access_key=s3_access_key or None,
|
||||||
secret_key=s3_secret_key or None)
|
secret_key=s3_secret_key or None)
|
||||||
|
@ -474,10 +474,10 @@ class S3Storage(_CloudStorage):
|
||||||
</CORSConfiguration>""")
|
</CORSConfiguration>""")
|
||||||
|
|
||||||
class GoogleCloudStorage(_CloudStorage):
|
class GoogleCloudStorage(_CloudStorage):
|
||||||
def __init__(self, metric_queue, storage_path, access_key, secret_key, bucket_name):
|
def __init__(self, context, storage_path, access_key, secret_key, bucket_name):
|
||||||
upload_params = {}
|
upload_params = {}
|
||||||
connect_kwargs = {}
|
connect_kwargs = {}
|
||||||
super(GoogleCloudStorage, self).__init__(metric_queue, boto.gs.connection.GSConnection,
|
super(GoogleCloudStorage, self).__init__(context, boto.gs.connection.GSConnection,
|
||||||
boto.gs.key.Key, connect_kwargs, upload_params,
|
boto.gs.key.Key, connect_kwargs, upload_params,
|
||||||
storage_path, bucket_name, access_key, secret_key)
|
storage_path, bucket_name, access_key, secret_key)
|
||||||
|
|
||||||
|
@ -534,7 +534,7 @@ class GoogleCloudStorage(_CloudStorage):
|
||||||
|
|
||||||
|
|
||||||
class RadosGWStorage(_CloudStorage):
|
class RadosGWStorage(_CloudStorage):
|
||||||
def __init__(self, metric_queue, hostname, is_secure, storage_path, access_key, secret_key,
|
def __init__(self, context, hostname, is_secure, storage_path, access_key, secret_key,
|
||||||
bucket_name):
|
bucket_name):
|
||||||
upload_params = {}
|
upload_params = {}
|
||||||
connect_kwargs = {
|
connect_kwargs = {
|
||||||
|
@ -543,7 +543,7 @@ class RadosGWStorage(_CloudStorage):
|
||||||
'calling_format': boto.s3.connection.OrdinaryCallingFormat(),
|
'calling_format': boto.s3.connection.OrdinaryCallingFormat(),
|
||||||
}
|
}
|
||||||
|
|
||||||
super(RadosGWStorage, self).__init__(metric_queue, boto.s3.connection.S3Connection,
|
super(RadosGWStorage, self).__init__(context, boto.s3.connection.S3Connection,
|
||||||
boto.s3.key.Key, connect_kwargs, upload_params,
|
boto.s3.key.Key, connect_kwargs, upload_params,
|
||||||
storage_path, bucket_name, access_key, secret_key)
|
storage_path, bucket_name, access_key, secret_key)
|
||||||
|
|
||||||
|
|
|
@ -9,7 +9,7 @@ from storage.basestorage import BaseStorageV2
|
||||||
_FAKE_STORAGE_MAP = defaultdict(StringIO.StringIO)
|
_FAKE_STORAGE_MAP = defaultdict(StringIO.StringIO)
|
||||||
|
|
||||||
class FakeStorage(BaseStorageV2):
|
class FakeStorage(BaseStorageV2):
|
||||||
def __init__(self, metric_queue):
|
def __init__(self, context):
|
||||||
super(FakeStorage, self).__init__()
|
super(FakeStorage, self).__init__()
|
||||||
|
|
||||||
def _init_path(self, path=None, create=False):
|
def _init_path(self, path=None, create=False):
|
||||||
|
|
|
@ -14,7 +14,7 @@ logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class LocalStorage(BaseStorageV2):
|
class LocalStorage(BaseStorageV2):
|
||||||
def __init__(self, metric_queue, storage_path):
|
def __init__(self, context, storage_path):
|
||||||
super(LocalStorage, self).__init__()
|
super(LocalStorage, self).__init__()
|
||||||
self._root_path = storage_path
|
self._root_path = storage_path
|
||||||
|
|
||||||
|
|
|
@ -14,7 +14,7 @@ from swiftclient.client import Connection, ClientException
|
||||||
from urlparse import urlparse
|
from urlparse import urlparse
|
||||||
from random import SystemRandom
|
from random import SystemRandom
|
||||||
from hashlib import sha1
|
from hashlib import sha1
|
||||||
from time import time, sleep
|
from time import time
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
|
|
||||||
from util.registry import filelike
|
from util.registry import filelike
|
||||||
|
@ -26,17 +26,20 @@ logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
_PartUploadMetadata = namedtuple('_PartUploadMetadata', ['path', 'offset', 'length'])
|
_PartUploadMetadata = namedtuple('_PartUploadMetadata', ['path', 'offset', 'length'])
|
||||||
_SEGMENTS_KEY = 'segments'
|
_SEGMENTS_KEY = 'segments'
|
||||||
|
_EMPTY_SEGMENTS_KEY = 'emptysegments'
|
||||||
_SEGMENT_DIRECTORY = 'segments'
|
_SEGMENT_DIRECTORY = 'segments'
|
||||||
_MAXIMUM_SEGMENT_SIZE = 200000000 # ~200 MB
|
_MAXIMUM_SEGMENT_SIZE = 200000000 # ~200 MB
|
||||||
_DEFAULT_SWIFT_CONNECT_TIMEOUT = 5 # seconds
|
_DEFAULT_SWIFT_CONNECT_TIMEOUT = 5 # seconds
|
||||||
|
_CHUNK_CLEANUP_DELAY = 30 # seconds
|
||||||
|
|
||||||
class SwiftStorage(BaseStorage):
|
class SwiftStorage(BaseStorage):
|
||||||
def __init__(self, metric_queue, swift_container, storage_path, auth_url, swift_user,
|
def __init__(self, context, swift_container, storage_path, auth_url, swift_user, swift_password,
|
||||||
swift_password, auth_version=None, os_options=None, ca_cert_path=None,
|
auth_version=None, os_options=None, ca_cert_path=None, temp_url_key=None,
|
||||||
temp_url_key=None, simple_path_concat=False, connect_timeout=None,
|
simple_path_concat=False, connect_timeout=None, retry_count=None,
|
||||||
retry_count=None, retry_on_ratelimit=True):
|
retry_on_ratelimit=True):
|
||||||
super(SwiftStorage, self).__init__()
|
super(SwiftStorage, self).__init__()
|
||||||
self._swift_container = swift_container
|
self._swift_container = swift_container
|
||||||
|
self._context = context
|
||||||
|
|
||||||
self._storage_path = storage_path.lstrip('/')
|
self._storage_path = storage_path.lstrip('/')
|
||||||
self._simple_path_concat = simple_path_concat
|
self._simple_path_concat = simple_path_concat
|
||||||
|
@ -205,7 +208,8 @@ class SwiftStorage(BaseStorage):
|
||||||
path = self._normalize_path(path)
|
path = self._normalize_path(path)
|
||||||
try:
|
try:
|
||||||
self._get_connection().delete_object(self._swift_container, path)
|
self._get_connection().delete_object(self._swift_container, path)
|
||||||
except Exception:
|
except Exception as ex:
|
||||||
|
logger.warning('Could not delete path %s: %s', path, str(ex))
|
||||||
raise IOError('Cannot delete path: %s' % path)
|
raise IOError('Cannot delete path: %s' % path)
|
||||||
|
|
||||||
def _random_checksum(self, count):
|
def _random_checksum(self, count):
|
||||||
|
@ -220,14 +224,15 @@ class SwiftStorage(BaseStorage):
|
||||||
return headers.get('etag', '')[1:-1][:7] or self._random_checksum(7)
|
return headers.get('etag', '')[1:-1][:7] or self._random_checksum(7)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _segment_list_from_metadata(storage_metadata):
|
def _segment_list_from_metadata(storage_metadata, key=_SEGMENTS_KEY):
|
||||||
return [_PartUploadMetadata(*segment_args) for segment_args in storage_metadata[_SEGMENTS_KEY]]
|
return [_PartUploadMetadata(*segment_args) for segment_args in storage_metadata[key]]
|
||||||
|
|
||||||
def initiate_chunked_upload(self):
|
def initiate_chunked_upload(self):
|
||||||
random_uuid = str(uuid4())
|
random_uuid = str(uuid4())
|
||||||
|
|
||||||
metadata = {
|
metadata = {
|
||||||
_SEGMENTS_KEY: [],
|
_SEGMENTS_KEY: [],
|
||||||
|
_EMPTY_SEGMENTS_KEY: [],
|
||||||
}
|
}
|
||||||
|
|
||||||
return random_uuid, metadata
|
return random_uuid, metadata
|
||||||
|
@ -292,18 +297,8 @@ class SwiftStorage(BaseStorage):
|
||||||
updated_metadata[_SEGMENTS_KEY].append(_PartUploadMetadata(segment_path, offset,
|
updated_metadata[_SEGMENTS_KEY].append(_PartUploadMetadata(segment_path, offset,
|
||||||
bytes_written))
|
bytes_written))
|
||||||
else:
|
else:
|
||||||
# Try to delete the empty segment, as it is not needed. This will occasionally fail
|
updated_metadata[_EMPTY_SEGMENTS_KEY].append(_PartUploadMetadata(segment_path, offset,
|
||||||
# due to Swift's eventual consistency, so we retry a few times and then just leave it be.
|
bytes_written))
|
||||||
for remaining_retries in range(2, -1, -1):
|
|
||||||
try:
|
|
||||||
self.remove(segment_path)
|
|
||||||
except IOError:
|
|
||||||
if remaining_retries:
|
|
||||||
sleep(0.25)
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Otherwise, ignore it.
|
|
||||||
break
|
|
||||||
|
|
||||||
return bytes_written, updated_metadata
|
return bytes_written, updated_metadata
|
||||||
|
|
||||||
|
@ -311,6 +306,26 @@ class SwiftStorage(BaseStorage):
|
||||||
""" Complete the chunked upload and store the final results in the path indicated.
|
""" Complete the chunked upload and store the final results in the path indicated.
|
||||||
Returns nothing.
|
Returns nothing.
|
||||||
"""
|
"""
|
||||||
|
# Check all potentially empty segments against the segments that were uploaded; if the path
|
||||||
|
# is still empty, then we queue the segment to be deleted.
|
||||||
|
if self._context.chunk_cleanup_queue is not None:
|
||||||
|
nonempty_segments = SwiftStorage._segment_list_from_metadata(storage_metadata,
|
||||||
|
key=_SEGMENTS_KEY)
|
||||||
|
potentially_empty_segments = SwiftStorage._segment_list_from_metadata(storage_metadata,
|
||||||
|
key=_EMPTY_SEGMENTS_KEY)
|
||||||
|
|
||||||
|
nonempty_paths = set([segment.path for segment in nonempty_segments])
|
||||||
|
for segment in potentially_empty_segments:
|
||||||
|
if segment.path in nonempty_paths:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Queue the chunk to be deleted, as it is empty and therefore unused.
|
||||||
|
self._context.chunk_cleanup_queue.put(['segment/%s/%s' % (self._context.location, uuid)], {
|
||||||
|
'location': self._context.location,
|
||||||
|
'uuid': uuid,
|
||||||
|
'path': segment.path,
|
||||||
|
}, available_after=_CHUNK_CLEANUP_DELAY)
|
||||||
|
|
||||||
# Finally, we write an empty file at the proper location with a X-Object-Manifest
|
# Finally, we write an empty file at the proper location with a X-Object-Manifest
|
||||||
# header pointing to the prefix for the segments.
|
# header pointing to the prefix for the segments.
|
||||||
segments_prefix_path = self._normalize_path('%s/%s' % (_SEGMENT_DIRECTORY, uuid))
|
segments_prefix_path = self._normalize_path('%s/%s' % (_SEGMENT_DIRECTORY, uuid))
|
||||||
|
@ -323,5 +338,5 @@ class SwiftStorage(BaseStorage):
|
||||||
Returns nothing.
|
Returns nothing.
|
||||||
"""
|
"""
|
||||||
# Delete all the uploaded segments.
|
# Delete all the uploaded segments.
|
||||||
for segment in SwiftStorage._segment_list_from_metadata(storage_metadata):
|
for segment in SwiftStorage._segment_list_from_metadata(storage_metadata, key=_SEGMENTS_KEY):
|
||||||
self.remove(segment.path)
|
self.remove(segment.path)
|
||||||
|
|
|
@ -3,7 +3,7 @@ import moto
|
||||||
import boto
|
import boto
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from storage import S3Storage
|
from storage import S3Storage, StorageContext
|
||||||
from storage.cloud import _CloudStorage, _PartUploadMetadata
|
from storage.cloud import _CloudStorage, _PartUploadMetadata
|
||||||
from storage.cloud import _CHUNKS_KEY
|
from storage.cloud import _CHUNKS_KEY
|
||||||
from StringIO import StringIO
|
from StringIO import StringIO
|
||||||
|
@ -13,6 +13,7 @@ _TEST_BUCKET = 'some_bucket'
|
||||||
_TEST_USER = 'someuser'
|
_TEST_USER = 'someuser'
|
||||||
_TEST_PASSWORD = 'somepassword'
|
_TEST_PASSWORD = 'somepassword'
|
||||||
_TEST_PATH = 'some/cool/path'
|
_TEST_PATH = 'some/cool/path'
|
||||||
|
_TEST_CONTEXT = StorageContext('nyc', None, None)
|
||||||
|
|
||||||
class TestCloudStorage(unittest.TestCase):
|
class TestCloudStorage(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
|
@ -21,7 +22,7 @@ class TestCloudStorage(unittest.TestCase):
|
||||||
|
|
||||||
# Create a test bucket and put some test content.
|
# Create a test bucket and put some test content.
|
||||||
boto.connect_s3().create_bucket(_TEST_BUCKET)
|
boto.connect_s3().create_bucket(_TEST_BUCKET)
|
||||||
self.engine = S3Storage(None, 'some/path', _TEST_BUCKET, _TEST_USER, _TEST_PASSWORD)
|
self.engine = S3Storage(_TEST_CONTEXT, 'some/path', _TEST_BUCKET, _TEST_USER, _TEST_PASSWORD)
|
||||||
self.engine.put_content(_TEST_PATH, _TEST_CONTENT)
|
self.engine.put_content(_TEST_PATH, _TEST_CONTENT)
|
||||||
|
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
|
@ -51,7 +52,8 @@ class TestCloudStorage(unittest.TestCase):
|
||||||
|
|
||||||
def test_copy_samecreds(self):
|
def test_copy_samecreds(self):
|
||||||
# Copy the content to another engine.
|
# Copy the content to another engine.
|
||||||
another_engine = S3Storage(None, 'another/path', _TEST_BUCKET, _TEST_USER, _TEST_PASSWORD)
|
another_engine = S3Storage(_TEST_CONTEXT, 'another/path', _TEST_BUCKET, _TEST_USER,
|
||||||
|
_TEST_PASSWORD)
|
||||||
self.engine.copy_to(another_engine, _TEST_PATH)
|
self.engine.copy_to(another_engine, _TEST_PATH)
|
||||||
|
|
||||||
# Verify it can be retrieved.
|
# Verify it can be retrieved.
|
||||||
|
@ -59,7 +61,8 @@ class TestCloudStorage(unittest.TestCase):
|
||||||
|
|
||||||
def test_copy_differentcreds(self):
|
def test_copy_differentcreds(self):
|
||||||
# Copy the content to another engine.
|
# Copy the content to another engine.
|
||||||
another_engine = S3Storage(None, 'another/path', 'another_bucket', 'blech', 'password')
|
another_engine = S3Storage(_TEST_CONTEXT, 'another/path', 'another_bucket', 'blech',
|
||||||
|
'password')
|
||||||
boto.connect_s3().create_bucket('another_bucket')
|
boto.connect_s3().create_bucket('another_bucket')
|
||||||
|
|
||||||
self.engine.copy_to(another_engine, _TEST_PATH)
|
self.engine.copy_to(another_engine, _TEST_PATH)
|
||||||
|
|
|
@ -2,10 +2,11 @@ import io
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
from storage.swift import SwiftStorage
|
|
||||||
from mock import MagicMock
|
from mock import MagicMock
|
||||||
|
|
||||||
|
from storage import StorageContext
|
||||||
|
from storage.swift import SwiftStorage
|
||||||
|
|
||||||
|
|
||||||
class MockSwiftStorage(SwiftStorage):
|
class MockSwiftStorage(SwiftStorage):
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
|
@ -18,7 +19,7 @@ class MockSwiftStorage(SwiftStorage):
|
||||||
|
|
||||||
class MockSwiftTests(unittest.TestCase):
|
class MockSwiftTests(unittest.TestCase):
|
||||||
base_args = {
|
base_args = {
|
||||||
'metric_queue': None,
|
'context': StorageContext('nyc', None, None),
|
||||||
'swift_container': 'container-name',
|
'swift_container': 'container-name',
|
||||||
'storage_path': '/basepath',
|
'storage_path': '/basepath',
|
||||||
'auth_url': 'https://auth.com',
|
'auth_url': 'https://auth.com',
|
||||||
|
@ -93,9 +94,26 @@ class FakeSwift(object):
|
||||||
self.containers[container].pop(path, None)
|
self.containers[container].pop(path, None)
|
||||||
|
|
||||||
|
|
||||||
|
class FakeQueue(object):
|
||||||
|
def __init__(self):
|
||||||
|
self.items = []
|
||||||
|
|
||||||
|
def get(self):
|
||||||
|
if not self.items:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return self.items.pop()
|
||||||
|
|
||||||
|
def put(self, names, item, available_after=0):
|
||||||
|
self.items.append({
|
||||||
|
'names': names,
|
||||||
|
'item': item,
|
||||||
|
'available_after': available_after,
|
||||||
|
})
|
||||||
|
|
||||||
class FakeSwiftTests(unittest.TestCase):
|
class FakeSwiftTests(unittest.TestCase):
|
||||||
base_args = {
|
base_args = {
|
||||||
'metric_queue': None,
|
'context': StorageContext('nyc', None, None),
|
||||||
'swift_container': 'container-name',
|
'swift_container': 'container-name',
|
||||||
'storage_path': '/basepath',
|
'storage_path': '/basepath',
|
||||||
'auth_url': 'https://auth.com',
|
'auth_url': 'https://auth.com',
|
||||||
|
@ -170,6 +188,36 @@ class FakeSwiftTests(unittest.TestCase):
|
||||||
for segment in SwiftStorage._segment_list_from_metadata(metadata):
|
for segment in SwiftStorage._segment_list_from_metadata(metadata):
|
||||||
self.assertFalse(swift.exists(segment.path))
|
self.assertFalse(swift.exists(segment.path))
|
||||||
|
|
||||||
|
def test_empty_chunks_queued_for_deletion(self):
|
||||||
|
chunk_cleanup_queue = FakeQueue()
|
||||||
|
args = dict(self.base_args)
|
||||||
|
args['context'] = StorageContext('nyc', None, chunk_cleanup_queue)
|
||||||
|
|
||||||
|
swift = FakeSwiftStorage(**args)
|
||||||
|
uuid, metadata = swift.initiate_chunked_upload()
|
||||||
|
|
||||||
|
chunks = ['this', '', 'is', 'some', '', 'chunked', 'data', '']
|
||||||
|
offset = 0
|
||||||
|
for chunk in chunks:
|
||||||
|
length = len(chunk)
|
||||||
|
if length == 0:
|
||||||
|
length = 1
|
||||||
|
|
||||||
|
bytes_written, metadata, error = swift.stream_upload_chunk(uuid, offset, length,
|
||||||
|
io.BytesIO(chunk), metadata)
|
||||||
|
self.assertIsNone(error)
|
||||||
|
self.assertEquals(bytes_written, len(chunk))
|
||||||
|
offset += len(chunk)
|
||||||
|
|
||||||
|
swift.complete_chunked_upload(uuid, 'somepath', metadata)
|
||||||
|
self.assertEquals(''.join(chunks), swift.get_content('somepath'))
|
||||||
|
|
||||||
|
# Check the chunk deletion queue and ensure we have the last chunk queued.
|
||||||
|
found = chunk_cleanup_queue.get()
|
||||||
|
self.assertIsNotNone(found)
|
||||||
|
|
||||||
|
found2 = chunk_cleanup_queue.get()
|
||||||
|
self.assertIsNone(found2)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
39
workers/chunkcleanupworker.py
Normal file
39
workers/chunkcleanupworker.py
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
|
||||||
|
from app import app, storage, chunk_cleanup_queue
|
||||||
|
from workers.queueworker import QueueWorker, JobException
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
POLL_PERIOD_SECONDS = 10
|
||||||
|
|
||||||
|
|
||||||
|
class ChunkCleanupWorker(QueueWorker):
|
||||||
|
""" Worker which cleans up chunks enqueued by the storage engine(s). This is typically used to
|
||||||
|
cleanup empty chunks which are no longer needed.
|
||||||
|
"""
|
||||||
|
def process_queue_item(self, job_details):
|
||||||
|
logger.debug('Got chunk cleanup queue item: %s', job_details)
|
||||||
|
storage_location = job_details['location']
|
||||||
|
storage_path = job_details['path']
|
||||||
|
|
||||||
|
try:
|
||||||
|
storage.remove([storage_location], storage_path)
|
||||||
|
except IOError:
|
||||||
|
raise JobException()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
logging.config.fileConfig('conf/logging.conf', disable_existing_loggers=False)
|
||||||
|
|
||||||
|
engines = set([config[0] for config in app.config.get('DISTRIBUTED_STORAGE_CONFIG', {}).values()])
|
||||||
|
if 'SwiftStorage' not in engines:
|
||||||
|
logger.debug('Swift storage not detected; sleeping')
|
||||||
|
while True:
|
||||||
|
time.sleep(10000)
|
||||||
|
|
||||||
|
logger.debug('Starting chunk cleanup worker')
|
||||||
|
worker = ChunkCleanupWorker(chunk_cleanup_queue, poll_period_seconds=POLL_PERIOD_SECONDS)
|
||||||
|
worker.start()
|
Reference in a new issue