Add a chunk cleanup queue for async GC of empty chunks

Instead of having the Swift storage engine try to delete the empty chunk(s) synchronously, we simply queue them and have a worker come along after 30s to delete the empty chunks. This has a few key benefits: it is async (doesn't slow down the push code), helps deal with Swift's eventual consistency (less retries necessary) and is generic for other storage engines if/when they need this as well
This commit is contained in:
Joseph Schorr 2016-11-10 13:54:04 -05:00
parent 59cb6bd216
commit 5f99448adc
12 changed files with 191 additions and 59 deletions

View file

@ -14,7 +14,7 @@ from swiftclient.client import Connection, ClientException
from urlparse import urlparse
from random import SystemRandom
from hashlib import sha1
from time import time, sleep
from time import time
from collections import namedtuple
from util.registry import filelike
@ -26,17 +26,20 @@ logger = logging.getLogger(__name__)
_PartUploadMetadata = namedtuple('_PartUploadMetadata', ['path', 'offset', 'length'])
_SEGMENTS_KEY = 'segments'
_EMPTY_SEGMENTS_KEY = 'emptysegments'
_SEGMENT_DIRECTORY = 'segments'
_MAXIMUM_SEGMENT_SIZE = 200000000 # ~200 MB
_DEFAULT_SWIFT_CONNECT_TIMEOUT = 5 # seconds
_CHUNK_CLEANUP_DELAY = 30 # seconds
class SwiftStorage(BaseStorage):
def __init__(self, metric_queue, swift_container, storage_path, auth_url, swift_user,
swift_password, auth_version=None, os_options=None, ca_cert_path=None,
temp_url_key=None, simple_path_concat=False, connect_timeout=None,
retry_count=None, retry_on_ratelimit=True):
def __init__(self, context, swift_container, storage_path, auth_url, swift_user, swift_password,
auth_version=None, os_options=None, ca_cert_path=None, temp_url_key=None,
simple_path_concat=False, connect_timeout=None, retry_count=None,
retry_on_ratelimit=True):
super(SwiftStorage, self).__init__()
self._swift_container = swift_container
self._context = context
self._storage_path = storage_path.lstrip('/')
self._simple_path_concat = simple_path_concat
@ -205,7 +208,8 @@ class SwiftStorage(BaseStorage):
path = self._normalize_path(path)
try:
self._get_connection().delete_object(self._swift_container, path)
except Exception:
except Exception as ex:
logger.warning('Could not delete path %s: %s', path, str(ex))
raise IOError('Cannot delete path: %s' % path)
def _random_checksum(self, count):
@ -220,14 +224,15 @@ class SwiftStorage(BaseStorage):
return headers.get('etag', '')[1:-1][:7] or self._random_checksum(7)
@staticmethod
def _segment_list_from_metadata(storage_metadata):
return [_PartUploadMetadata(*segment_args) for segment_args in storage_metadata[_SEGMENTS_KEY]]
def _segment_list_from_metadata(storage_metadata, key=_SEGMENTS_KEY):
return [_PartUploadMetadata(*segment_args) for segment_args in storage_metadata[key]]
def initiate_chunked_upload(self):
random_uuid = str(uuid4())
metadata = {
_SEGMENTS_KEY: [],
_EMPTY_SEGMENTS_KEY: [],
}
return random_uuid, metadata
@ -292,18 +297,8 @@ class SwiftStorage(BaseStorage):
updated_metadata[_SEGMENTS_KEY].append(_PartUploadMetadata(segment_path, offset,
bytes_written))
else:
# Try to delete the empty segment, as it is not needed. This will occasionally fail
# due to Swift's eventual consistency, so we retry a few times and then just leave it be.
for remaining_retries in range(2, -1, -1):
try:
self.remove(segment_path)
except IOError:
if remaining_retries:
sleep(0.25)
continue
# Otherwise, ignore it.
break
updated_metadata[_EMPTY_SEGMENTS_KEY].append(_PartUploadMetadata(segment_path, offset,
bytes_written))
return bytes_written, updated_metadata
@ -311,6 +306,26 @@ class SwiftStorage(BaseStorage):
""" Complete the chunked upload and store the final results in the path indicated.
Returns nothing.
"""
# Check all potentially empty segments against the segments that were uploaded; if the path
# is still empty, then we queue the segment to be deleted.
if self._context.chunk_cleanup_queue is not None:
nonempty_segments = SwiftStorage._segment_list_from_metadata(storage_metadata,
key=_SEGMENTS_KEY)
potentially_empty_segments = SwiftStorage._segment_list_from_metadata(storage_metadata,
key=_EMPTY_SEGMENTS_KEY)
nonempty_paths = set([segment.path for segment in nonempty_segments])
for segment in potentially_empty_segments:
if segment.path in nonempty_paths:
continue
# Queue the chunk to be deleted, as it is empty and therefore unused.
self._context.chunk_cleanup_queue.put(['segment/%s/%s' % (self._context.location, uuid)], {
'location': self._context.location,
'uuid': uuid,
'path': segment.path,
}, available_after=_CHUNK_CLEANUP_DELAY)
# Finally, we write an empty file at the proper location with a X-Object-Manifest
# header pointing to the prefix for the segments.
segments_prefix_path = self._normalize_path('%s/%s' % (_SEGMENT_DIRECTORY, uuid))
@ -323,5 +338,5 @@ class SwiftStorage(BaseStorage):
Returns nothing.
"""
# Delete all the uploaded segments.
for segment in SwiftStorage._segment_list_from_metadata(storage_metadata):
for segment in SwiftStorage._segment_list_from_metadata(storage_metadata, key=_SEGMENTS_KEY):
self.remove(segment.path)