Add a chunk cleanup queue for async GC of empty chunks

Instead of having the Swift storage engine try to delete the empty chunk(s) synchronously, we simply queue them and have a worker come along after 30s to delete the empty chunks. This has a few key benefits: it is async (doesn't slow down the push code), helps deal with Swift's eventual consistency (less retries necessary) and is generic for other storage engines if/when they need this as well
This commit is contained in:
Joseph Schorr 2016-11-10 13:54:04 -05:00
parent 59cb6bd216
commit 5f99448adc
12 changed files with 191 additions and 59 deletions

View file

@ -49,7 +49,7 @@ class StreamReadKeyAsFile(BufferedIOBase):
class _CloudStorage(BaseStorageV2):
def __init__(self, metric_queue, connection_class, key_class, connect_kwargs, upload_params,
def __init__(self, context, connection_class, key_class, connect_kwargs, upload_params,
storage_path, bucket_name, access_key=None, secret_key=None):
super(_CloudStorage, self).__init__()
@ -67,7 +67,7 @@ class _CloudStorage(BaseStorageV2):
self._connect_kwargs = connect_kwargs
self._cloud_conn = None
self._cloud_bucket = None
self._metric_queue = metric_queue
self._context = context
def _initialize_cloud_conn(self):
if not self._initialized:
@ -166,9 +166,9 @@ class _CloudStorage(BaseStorageV2):
if content_encoding is not None:
metadata['Content-Encoding'] = content_encoding
if self._metric_queue is not None:
self._metric_queue.put_deprecated('MultipartUploadStart', 1)
self._metric_queue.multipart_upload_start.Inc()
if self._context.metric_queue is not None:
self._context.metric_queue.put_deprecated('MultipartUploadStart', 1)
self._context.metric_queue.multipart_upload_start.Inc()
return self._cloud_bucket.initiate_multipart_upload(path, metadata=metadata,
**self._upload_params)
@ -207,9 +207,9 @@ class _CloudStorage(BaseStorageV2):
logger.warn('Error when writing to stream in stream_write_internal at path %s: %s', path, e)
write_error = e
if self._metric_queue is not None:
self._metric_queue.put_deprecated('MultipartUploadFailure', 1)
self._metric_queue.multipart_upload_end.Inc(labelvalues=['failure'])
if self._context.metric_queue is not None:
self._context.metric_queue.put_deprecated('MultipartUploadFailure', 1)
self._context.metric_queue.multipart_upload_end.Inc(labelvalues=['failure'])
if cancel_on_error:
mp.cancel_upload()
@ -218,9 +218,9 @@ class _CloudStorage(BaseStorageV2):
break
if total_bytes_written > 0:
if self._metric_queue is not None:
self._metric_queue.put_deprecated('MultipartUploadSuccess', 1)
self._metric_queue.multipart_upload_end.Inc(labelvalues=['success'])
if self._context.metric_queue is not None:
self._context.metric_queue.put_deprecated('MultipartUploadSuccess', 1)
self._context.metric_queue.multipart_upload_end.Inc(labelvalues=['success'])
self._perform_action_with_retry(mp.complete_upload)
@ -436,8 +436,8 @@ class _CloudStorage(BaseStorageV2):
class S3Storage(_CloudStorage):
def __init__(self, metric_queue, storage_path, s3_bucket, s3_access_key=None, s3_secret_key=None,
host=None):
def __init__(self, context, storage_path, s3_bucket, s3_access_key=None,
s3_secret_key=None, host=None):
upload_params = {
'encrypt_key': True,
}
@ -447,7 +447,7 @@ class S3Storage(_CloudStorage):
raise ValueError('host name must not start with http:// or https://')
connect_kwargs['host'] = host
super(S3Storage, self).__init__(metric_queue, boto.s3.connection.S3Connection, boto.s3.key.Key,
super(S3Storage, self).__init__(context, boto.s3.connection.S3Connection, boto.s3.key.Key,
connect_kwargs, upload_params, storage_path, s3_bucket,
access_key=s3_access_key or None,
secret_key=s3_secret_key or None)
@ -474,10 +474,10 @@ class S3Storage(_CloudStorage):
</CORSConfiguration>""")
class GoogleCloudStorage(_CloudStorage):
def __init__(self, metric_queue, storage_path, access_key, secret_key, bucket_name):
def __init__(self, context, storage_path, access_key, secret_key, bucket_name):
upload_params = {}
connect_kwargs = {}
super(GoogleCloudStorage, self).__init__(metric_queue, boto.gs.connection.GSConnection,
super(GoogleCloudStorage, self).__init__(context, boto.gs.connection.GSConnection,
boto.gs.key.Key, connect_kwargs, upload_params,
storage_path, bucket_name, access_key, secret_key)
@ -534,7 +534,7 @@ class GoogleCloudStorage(_CloudStorage):
class RadosGWStorage(_CloudStorage):
def __init__(self, metric_queue, hostname, is_secure, storage_path, access_key, secret_key,
def __init__(self, context, hostname, is_secure, storage_path, access_key, secret_key,
bucket_name):
upload_params = {}
connect_kwargs = {
@ -543,7 +543,7 @@ class RadosGWStorage(_CloudStorage):
'calling_format': boto.s3.connection.OrdinaryCallingFormat(),
}
super(RadosGWStorage, self).__init__(metric_queue, boto.s3.connection.S3Connection,
super(RadosGWStorage, self).__init__(context, boto.s3.connection.S3Connection,
boto.s3.key.Key, connect_kwargs, upload_params,
storage_path, bucket_name, access_key, secret_key)