Add a chunk cleanup queue for async GC of empty chunks
Instead of having the Swift storage engine try to delete the empty chunk(s) synchronously, we simply queue them and have a worker come along after 30s to delete the empty chunks. This has a few key benefits: it is async (doesn't slow down the push code), helps deal with Swift's eventual consistency (less retries necessary) and is generic for other storage engines if/when they need this as well
This commit is contained in:
parent
59cb6bd216
commit
5f99448adc
12 changed files with 191 additions and 59 deletions
|
@ -14,7 +14,7 @@ from swiftclient.client import Connection, ClientException
|
|||
from urlparse import urlparse
|
||||
from random import SystemRandom
|
||||
from hashlib import sha1
|
||||
from time import time, sleep
|
||||
from time import time
|
||||
from collections import namedtuple
|
||||
|
||||
from util.registry import filelike
|
||||
|
@ -26,17 +26,20 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
_PartUploadMetadata = namedtuple('_PartUploadMetadata', ['path', 'offset', 'length'])
|
||||
_SEGMENTS_KEY = 'segments'
|
||||
_EMPTY_SEGMENTS_KEY = 'emptysegments'
|
||||
_SEGMENT_DIRECTORY = 'segments'
|
||||
_MAXIMUM_SEGMENT_SIZE = 200000000 # ~200 MB
|
||||
_DEFAULT_SWIFT_CONNECT_TIMEOUT = 5 # seconds
|
||||
_CHUNK_CLEANUP_DELAY = 30 # seconds
|
||||
|
||||
class SwiftStorage(BaseStorage):
|
||||
def __init__(self, metric_queue, swift_container, storage_path, auth_url, swift_user,
|
||||
swift_password, auth_version=None, os_options=None, ca_cert_path=None,
|
||||
temp_url_key=None, simple_path_concat=False, connect_timeout=None,
|
||||
retry_count=None, retry_on_ratelimit=True):
|
||||
def __init__(self, context, swift_container, storage_path, auth_url, swift_user, swift_password,
|
||||
auth_version=None, os_options=None, ca_cert_path=None, temp_url_key=None,
|
||||
simple_path_concat=False, connect_timeout=None, retry_count=None,
|
||||
retry_on_ratelimit=True):
|
||||
super(SwiftStorage, self).__init__()
|
||||
self._swift_container = swift_container
|
||||
self._context = context
|
||||
|
||||
self._storage_path = storage_path.lstrip('/')
|
||||
self._simple_path_concat = simple_path_concat
|
||||
|
@ -205,7 +208,8 @@ class SwiftStorage(BaseStorage):
|
|||
path = self._normalize_path(path)
|
||||
try:
|
||||
self._get_connection().delete_object(self._swift_container, path)
|
||||
except Exception:
|
||||
except Exception as ex:
|
||||
logger.warning('Could not delete path %s: %s', path, str(ex))
|
||||
raise IOError('Cannot delete path: %s' % path)
|
||||
|
||||
def _random_checksum(self, count):
|
||||
|
@ -220,14 +224,15 @@ class SwiftStorage(BaseStorage):
|
|||
return headers.get('etag', '')[1:-1][:7] or self._random_checksum(7)
|
||||
|
||||
@staticmethod
|
||||
def _segment_list_from_metadata(storage_metadata):
|
||||
return [_PartUploadMetadata(*segment_args) for segment_args in storage_metadata[_SEGMENTS_KEY]]
|
||||
def _segment_list_from_metadata(storage_metadata, key=_SEGMENTS_KEY):
|
||||
return [_PartUploadMetadata(*segment_args) for segment_args in storage_metadata[key]]
|
||||
|
||||
def initiate_chunked_upload(self):
|
||||
random_uuid = str(uuid4())
|
||||
|
||||
metadata = {
|
||||
_SEGMENTS_KEY: [],
|
||||
_EMPTY_SEGMENTS_KEY: [],
|
||||
}
|
||||
|
||||
return random_uuid, metadata
|
||||
|
@ -292,18 +297,8 @@ class SwiftStorage(BaseStorage):
|
|||
updated_metadata[_SEGMENTS_KEY].append(_PartUploadMetadata(segment_path, offset,
|
||||
bytes_written))
|
||||
else:
|
||||
# Try to delete the empty segment, as it is not needed. This will occasionally fail
|
||||
# due to Swift's eventual consistency, so we retry a few times and then just leave it be.
|
||||
for remaining_retries in range(2, -1, -1):
|
||||
try:
|
||||
self.remove(segment_path)
|
||||
except IOError:
|
||||
if remaining_retries:
|
||||
sleep(0.25)
|
||||
continue
|
||||
|
||||
# Otherwise, ignore it.
|
||||
break
|
||||
updated_metadata[_EMPTY_SEGMENTS_KEY].append(_PartUploadMetadata(segment_path, offset,
|
||||
bytes_written))
|
||||
|
||||
return bytes_written, updated_metadata
|
||||
|
||||
|
@ -311,6 +306,26 @@ class SwiftStorage(BaseStorage):
|
|||
""" Complete the chunked upload and store the final results in the path indicated.
|
||||
Returns nothing.
|
||||
"""
|
||||
# Check all potentially empty segments against the segments that were uploaded; if the path
|
||||
# is still empty, then we queue the segment to be deleted.
|
||||
if self._context.chunk_cleanup_queue is not None:
|
||||
nonempty_segments = SwiftStorage._segment_list_from_metadata(storage_metadata,
|
||||
key=_SEGMENTS_KEY)
|
||||
potentially_empty_segments = SwiftStorage._segment_list_from_metadata(storage_metadata,
|
||||
key=_EMPTY_SEGMENTS_KEY)
|
||||
|
||||
nonempty_paths = set([segment.path for segment in nonempty_segments])
|
||||
for segment in potentially_empty_segments:
|
||||
if segment.path in nonempty_paths:
|
||||
continue
|
||||
|
||||
# Queue the chunk to be deleted, as it is empty and therefore unused.
|
||||
self._context.chunk_cleanup_queue.put(['segment/%s/%s' % (self._context.location, uuid)], {
|
||||
'location': self._context.location,
|
||||
'uuid': uuid,
|
||||
'path': segment.path,
|
||||
}, available_after=_CHUNK_CLEANUP_DELAY)
|
||||
|
||||
# Finally, we write an empty file at the proper location with a X-Object-Manifest
|
||||
# header pointing to the prefix for the segments.
|
||||
segments_prefix_path = self._normalize_path('%s/%s' % (_SEGMENT_DIRECTORY, uuid))
|
||||
|
@ -323,5 +338,5 @@ class SwiftStorage(BaseStorage):
|
|||
Returns nothing.
|
||||
"""
|
||||
# Delete all the uploaded segments.
|
||||
for segment in SwiftStorage._segment_list_from_metadata(storage_metadata):
|
||||
for segment in SwiftStorage._segment_list_from_metadata(storage_metadata, key=_SEGMENTS_KEY):
|
||||
self.remove(segment.path)
|
||||
|
|
Reference in a new issue