Optimize GC query for looking up deletable storages
This commit is contained in:
parent
640012103c
commit
9e4f8cac03
2 changed files with 171 additions and 110 deletions
|
@ -46,6 +46,31 @@ def add_storage_placement(storage, location_name):
|
|||
pass
|
||||
|
||||
|
||||
def _orphaned_storage_query(candidate_ids):
|
||||
""" Returns the subset of the candidate ImageStorage IDs representing storages that are no
|
||||
longer referenced by images.
|
||||
"""
|
||||
# Issue a union query to find all storages that are still referenced by a candidate storage. This
|
||||
# is much faster than the group_by and having call we used to use here.
|
||||
nonorphaned_queries = []
|
||||
for counter, candidate_id in enumerate(candidate_ids):
|
||||
query_alias = 'q{0}'.format(counter)
|
||||
storage_subq = (ImageStorage
|
||||
.select(ImageStorage.id)
|
||||
.join(Image)
|
||||
.where(ImageStorage.id == candidate_id)
|
||||
.limit(1)
|
||||
.alias(query_alias))
|
||||
|
||||
nonorphaned_queries.append(ImageStorage
|
||||
.select(SQL('*'))
|
||||
.from_(storage_subq))
|
||||
|
||||
# Build the set of storages that are missing. These storages are orphaned.
|
||||
nonorphaned_storage_ids = {storage.id for storage in _reduce_as_tree(nonorphaned_queries)}
|
||||
return list(candidate_ids - nonorphaned_storage_ids)
|
||||
|
||||
|
||||
def garbage_collect_storage(storage_id_whitelist):
|
||||
if len(storage_id_whitelist) == 0:
|
||||
return
|
||||
|
@ -55,27 +80,21 @@ def garbage_collect_storage(storage_id_whitelist):
|
|||
get_layer_path(placement.storage))
|
||||
for placement in placements_query}
|
||||
|
||||
def orphaned_storage_query(select_base_query, candidates, group_by):
|
||||
return (select_base_query
|
||||
.switch(ImageStorage)
|
||||
.join(Image, JOIN_LEFT_OUTER)
|
||||
.where(ImageStorage.id << list(candidates))
|
||||
.group_by(*group_by)
|
||||
.having(fn.Count(Image.id) == 0))
|
||||
|
||||
# Note: Both of these deletes must occur in the same transaction (unfortunately) because a
|
||||
# storage without any placement is invalid, and a placement cannot exist without a storage.
|
||||
# TODO(jake): We might want to allow for null storages on placements, which would allow us to
|
||||
# delete the storages, then delete the placements in a non-transaction.
|
||||
logger.debug('Garbage collecting storages from candidates: %s', storage_id_whitelist)
|
||||
with db_transaction():
|
||||
# Track all of the data that should be removed from blob storage
|
||||
placements_to_remove = list(orphaned_storage_query(ImageStoragePlacement
|
||||
.select(ImageStoragePlacement,
|
||||
ImageStorage)
|
||||
.join(ImageStorage),
|
||||
storage_id_whitelist,
|
||||
(ImageStorage.id, ImageStoragePlacement.id)))
|
||||
orphaned_storage_ids = _orphaned_storage_query(storage_id_whitelist)
|
||||
if len(orphaned_storage_ids) == 0:
|
||||
# Nothing to GC.
|
||||
return
|
||||
|
||||
placements_to_remove = list(ImageStoragePlacement
|
||||
.select()
|
||||
.join(ImageStorage)
|
||||
.where(ImageStorage.id << orphaned_storage_ids))
|
||||
|
||||
paths_to_remove = placements_query_to_paths_set(placements_to_remove)
|
||||
|
||||
|
@ -89,28 +108,23 @@ def garbage_collect_storage(storage_id_whitelist):
|
|||
logger.debug('Removed %s image storage placements', placements_removed)
|
||||
|
||||
# Remove all orphaned storages
|
||||
# The comma after ImageStorage.id is VERY important, it makes it a tuple, which is a sequence
|
||||
orphaned_storages = list(orphaned_storage_query(ImageStorage.select(ImageStorage.id),
|
||||
storage_id_whitelist,
|
||||
(ImageStorage.id,)).alias('osq'))
|
||||
if len(orphaned_storages) > 0:
|
||||
torrents_removed = (TorrentInfo
|
||||
.delete()
|
||||
.where(TorrentInfo.storage << orphaned_storages)
|
||||
.execute())
|
||||
logger.debug('Removed %s torrent info records', torrents_removed)
|
||||
torrents_removed = (TorrentInfo
|
||||
.delete()
|
||||
.where(TorrentInfo.storage << orphaned_storage_ids)
|
||||
.execute())
|
||||
logger.debug('Removed %s torrent info records', torrents_removed)
|
||||
|
||||
signatures_removed = (ImageStorageSignature
|
||||
.delete()
|
||||
.where(ImageStorageSignature.storage << orphaned_storages)
|
||||
.execute())
|
||||
logger.debug('Removed %s image storage signatures', signatures_removed)
|
||||
|
||||
storages_removed = (ImageStorage
|
||||
signatures_removed = (ImageStorageSignature
|
||||
.delete()
|
||||
.where(ImageStorage.id << orphaned_storages)
|
||||
.where(ImageStorageSignature.storage << orphaned_storage_ids)
|
||||
.execute())
|
||||
logger.debug('Removed %s image storage records', storages_removed)
|
||||
logger.debug('Removed %s image storage signatures', signatures_removed)
|
||||
|
||||
storages_removed = (ImageStorage
|
||||
.delete()
|
||||
.where(ImageStorage.id << orphaned_storage_ids)
|
||||
.execute())
|
||||
logger.debug('Removed %s image storage records', storages_removed)
|
||||
|
||||
# We are going to make the conscious decision to not delete image storage blobs inside
|
||||
# transactions.
|
||||
|
|
Reference in a new issue