Add a chunk cleanup queue for async GC of empty chunks
Instead of having the Swift storage engine try to delete the empty chunk(s) synchronously, we simply queue them and have a worker come along after 30s to delete the empty chunks. This has a few key benefits: it is async (doesn't slow down the push code), helps deal with Swift's eventual consistency (less retries necessary) and is generic for other storage engines if/when they need this as well
This commit is contained in:
parent
59cb6bd216
commit
5f99448adc
12 changed files with 191 additions and 59 deletions
|
@ -49,7 +49,7 @@ class StreamReadKeyAsFile(BufferedIOBase):
|
|||
|
||||
|
||||
class _CloudStorage(BaseStorageV2):
|
||||
def __init__(self, metric_queue, connection_class, key_class, connect_kwargs, upload_params,
|
||||
def __init__(self, context, connection_class, key_class, connect_kwargs, upload_params,
|
||||
storage_path, bucket_name, access_key=None, secret_key=None):
|
||||
super(_CloudStorage, self).__init__()
|
||||
|
||||
|
@ -67,7 +67,7 @@ class _CloudStorage(BaseStorageV2):
|
|||
self._connect_kwargs = connect_kwargs
|
||||
self._cloud_conn = None
|
||||
self._cloud_bucket = None
|
||||
self._metric_queue = metric_queue
|
||||
self._context = context
|
||||
|
||||
def _initialize_cloud_conn(self):
|
||||
if not self._initialized:
|
||||
|
@ -166,9 +166,9 @@ class _CloudStorage(BaseStorageV2):
|
|||
if content_encoding is not None:
|
||||
metadata['Content-Encoding'] = content_encoding
|
||||
|
||||
if self._metric_queue is not None:
|
||||
self._metric_queue.put_deprecated('MultipartUploadStart', 1)
|
||||
self._metric_queue.multipart_upload_start.Inc()
|
||||
if self._context.metric_queue is not None:
|
||||
self._context.metric_queue.put_deprecated('MultipartUploadStart', 1)
|
||||
self._context.metric_queue.multipart_upload_start.Inc()
|
||||
|
||||
return self._cloud_bucket.initiate_multipart_upload(path, metadata=metadata,
|
||||
**self._upload_params)
|
||||
|
@ -207,9 +207,9 @@ class _CloudStorage(BaseStorageV2):
|
|||
logger.warn('Error when writing to stream in stream_write_internal at path %s: %s', path, e)
|
||||
write_error = e
|
||||
|
||||
if self._metric_queue is not None:
|
||||
self._metric_queue.put_deprecated('MultipartUploadFailure', 1)
|
||||
self._metric_queue.multipart_upload_end.Inc(labelvalues=['failure'])
|
||||
if self._context.metric_queue is not None:
|
||||
self._context.metric_queue.put_deprecated('MultipartUploadFailure', 1)
|
||||
self._context.metric_queue.multipart_upload_end.Inc(labelvalues=['failure'])
|
||||
|
||||
if cancel_on_error:
|
||||
mp.cancel_upload()
|
||||
|
@ -218,9 +218,9 @@ class _CloudStorage(BaseStorageV2):
|
|||
break
|
||||
|
||||
if total_bytes_written > 0:
|
||||
if self._metric_queue is not None:
|
||||
self._metric_queue.put_deprecated('MultipartUploadSuccess', 1)
|
||||
self._metric_queue.multipart_upload_end.Inc(labelvalues=['success'])
|
||||
if self._context.metric_queue is not None:
|
||||
self._context.metric_queue.put_deprecated('MultipartUploadSuccess', 1)
|
||||
self._context.metric_queue.multipart_upload_end.Inc(labelvalues=['success'])
|
||||
|
||||
self._perform_action_with_retry(mp.complete_upload)
|
||||
|
||||
|
@ -436,8 +436,8 @@ class _CloudStorage(BaseStorageV2):
|
|||
|
||||
|
||||
class S3Storage(_CloudStorage):
|
||||
def __init__(self, metric_queue, storage_path, s3_bucket, s3_access_key=None, s3_secret_key=None,
|
||||
host=None):
|
||||
def __init__(self, context, storage_path, s3_bucket, s3_access_key=None,
|
||||
s3_secret_key=None, host=None):
|
||||
upload_params = {
|
||||
'encrypt_key': True,
|
||||
}
|
||||
|
@ -447,7 +447,7 @@ class S3Storage(_CloudStorage):
|
|||
raise ValueError('host name must not start with http:// or https://')
|
||||
|
||||
connect_kwargs['host'] = host
|
||||
super(S3Storage, self).__init__(metric_queue, boto.s3.connection.S3Connection, boto.s3.key.Key,
|
||||
super(S3Storage, self).__init__(context, boto.s3.connection.S3Connection, boto.s3.key.Key,
|
||||
connect_kwargs, upload_params, storage_path, s3_bucket,
|
||||
access_key=s3_access_key or None,
|
||||
secret_key=s3_secret_key or None)
|
||||
|
@ -474,10 +474,10 @@ class S3Storage(_CloudStorage):
|
|||
</CORSConfiguration>""")
|
||||
|
||||
class GoogleCloudStorage(_CloudStorage):
|
||||
def __init__(self, metric_queue, storage_path, access_key, secret_key, bucket_name):
|
||||
def __init__(self, context, storage_path, access_key, secret_key, bucket_name):
|
||||
upload_params = {}
|
||||
connect_kwargs = {}
|
||||
super(GoogleCloudStorage, self).__init__(metric_queue, boto.gs.connection.GSConnection,
|
||||
super(GoogleCloudStorage, self).__init__(context, boto.gs.connection.GSConnection,
|
||||
boto.gs.key.Key, connect_kwargs, upload_params,
|
||||
storage_path, bucket_name, access_key, secret_key)
|
||||
|
||||
|
@ -534,7 +534,7 @@ class GoogleCloudStorage(_CloudStorage):
|
|||
|
||||
|
||||
class RadosGWStorage(_CloudStorage):
|
||||
def __init__(self, metric_queue, hostname, is_secure, storage_path, access_key, secret_key,
|
||||
def __init__(self, context, hostname, is_secure, storage_path, access_key, secret_key,
|
||||
bucket_name):
|
||||
upload_params = {}
|
||||
connect_kwargs = {
|
||||
|
@ -543,7 +543,7 @@ class RadosGWStorage(_CloudStorage):
|
|||
'calling_format': boto.s3.connection.OrdinaryCallingFormat(),
|
||||
}
|
||||
|
||||
super(RadosGWStorage, self).__init__(metric_queue, boto.s3.connection.S3Connection,
|
||||
super(RadosGWStorage, self).__init__(context, boto.s3.connection.S3Connection,
|
||||
boto.s3.key.Key, connect_kwargs, upload_params,
|
||||
storage_path, bucket_name, access_key, secret_key)
|
||||
|
||||
|
|
Reference in a new issue