Add a whitelist of candidate storages which will speed up the orphan queries and limit the damage of GC run amok.

This commit is contained in:
Jake Moshenko 2014-10-17 15:26:51 -04:00
parent baca3f79ed
commit c093e5a326

View file

@ -1389,6 +1389,8 @@ def garbage_collect_repository(namespace_name, repository_name):
all_images = {int(img.id): img for img in all_repo_images}
to_remove = set(all_images.keys()).difference(referenced_anscestors)
storage_id_whitelist = {all_images[to_remove_id].storage.id for to_remove_id in to_remove}
logger.info('Cleaning up unreferenced images: %s', to_remove)
Image.delete().where(Image.id << list(to_remove)).execute()
@ -1398,21 +1400,32 @@ def garbage_collect_repository(namespace_name, repository_name):
return {(placement.location.name, config.store.image_path(placement.storage.uuid))
for placement in placements_query}
def orphaned_storage_query(select_base_query):
def orphaned_storage_query(select_base_query, candidates):
return (select_base_query
.switch(ImageStorage)
.join(Image, JOIN_LEFT_OUTER)
.switch(ImageStorage)
.join(DerivedImageStorage, JOIN_LEFT_OUTER, on=(ImageStorage.id ==
DerivedImageStorage.derivative))
.join(DerivedImageStorage, JOIN_LEFT_OUTER,
on=(ImageStorage.id == DerivedImageStorage.derivative))
.where(ImageStorage.id << list(candidates))
.group_by(ImageStorage)
.having((fn.Count(Image.id) == 0) & (fn.Count(DerivedImageStorage.id) == 0)))
paths_to_remove = set()
with config.app_config['DB_TRANSACTION_FACTORY'](db):
# Find out which derived storages will be removed, and add them to the whitelist
orphaned_from_candidates = orphaned_storage_query(ImageStorage.select(), storage_id_whitelist)
derived_to_remove = (ImageStorage
.select(ImageStorage.id)
.join(DerivedImageStorage,
on=(ImageStorage.id == DerivedImageStorage.derivative))
.where(DerivedImageStorage.source << orphaned_from_candidates.clone()))
storage_id_whitelist.update({derived.id for derived in derived_to_remove})
# Remove the dervived image storages with sources of orphaned storages
DerivedImageStorage.delete().where(DerivedImageStorage.source <<
orphaned_storage_query(ImageStorage.select())).execute()
(DerivedImageStorage
.delete()
.where(DerivedImageStorage.source << orphaned_from_candidates.clone())
.execute())
# Track all of the data that should be removed from blob storage
placements_to_remove = orphaned_storage_query(ImageStoragePlacement
@ -1421,8 +1434,9 @@ def garbage_collect_repository(namespace_name, repository_name):
ImageStorageLocation)
.join(ImageStorageLocation)
.switch(ImageStoragePlacement)
.join(ImageStorage))
paths_to_remove.update(placements_query_to_paths_set(placements_to_remove.clone()))
.join(ImageStorage),
storage_id_whitelist)
paths_to_remove = placements_query_to_paths_set(placements_to_remove.clone())
# Remove the placements for orphaned storages
placements_subquery = placements_to_remove.clone().select(ImageStoragePlacement.id)
@ -1434,7 +1448,8 @@ def garbage_collect_repository(namespace_name, repository_name):
# Remove the all orphaned storages
(ImageStorage
.delete()
.where(ImageStorage.id << orphaned_storage_query(ImageStorage.select(ImageStorage.id)))
.where(ImageStorage.id << orphaned_storage_query(ImageStorage.select(ImageStorage.id),
storage_id_whitelist))
.execute())
# Delete the actual blob storage