Add a whitelist of candidate storages which will speed up the orphan queries and limit the damage of GC run amok.
This commit is contained in:
parent
baca3f79ed
commit
c093e5a326
1 changed files with 24 additions and 9 deletions
|
@ -1389,6 +1389,8 @@ def garbage_collect_repository(namespace_name, repository_name):
|
|||
all_images = {int(img.id): img for img in all_repo_images}
|
||||
to_remove = set(all_images.keys()).difference(referenced_anscestors)
|
||||
|
||||
storage_id_whitelist = {all_images[to_remove_id].storage.id for to_remove_id in to_remove}
|
||||
|
||||
logger.info('Cleaning up unreferenced images: %s', to_remove)
|
||||
Image.delete().where(Image.id << list(to_remove)).execute()
|
||||
|
||||
|
@ -1398,21 +1400,32 @@ def garbage_collect_repository(namespace_name, repository_name):
|
|||
return {(placement.location.name, config.store.image_path(placement.storage.uuid))
|
||||
for placement in placements_query}
|
||||
|
||||
def orphaned_storage_query(select_base_query):
|
||||
def orphaned_storage_query(select_base_query, candidates):
|
||||
return (select_base_query
|
||||
.switch(ImageStorage)
|
||||
.join(Image, JOIN_LEFT_OUTER)
|
||||
.switch(ImageStorage)
|
||||
.join(DerivedImageStorage, JOIN_LEFT_OUTER, on=(ImageStorage.id ==
|
||||
DerivedImageStorage.derivative))
|
||||
.join(DerivedImageStorage, JOIN_LEFT_OUTER,
|
||||
on=(ImageStorage.id == DerivedImageStorage.derivative))
|
||||
.where(ImageStorage.id << list(candidates))
|
||||
.group_by(ImageStorage)
|
||||
.having((fn.Count(Image.id) == 0) & (fn.Count(DerivedImageStorage.id) == 0)))
|
||||
|
||||
paths_to_remove = set()
|
||||
with config.app_config['DB_TRANSACTION_FACTORY'](db):
|
||||
# Find out which derived storages will be removed, and add them to the whitelist
|
||||
orphaned_from_candidates = orphaned_storage_query(ImageStorage.select(), storage_id_whitelist)
|
||||
derived_to_remove = (ImageStorage
|
||||
.select(ImageStorage.id)
|
||||
.join(DerivedImageStorage,
|
||||
on=(ImageStorage.id == DerivedImageStorage.derivative))
|
||||
.where(DerivedImageStorage.source << orphaned_from_candidates.clone()))
|
||||
storage_id_whitelist.update({derived.id for derived in derived_to_remove})
|
||||
|
||||
# Remove the dervived image storages with sources of orphaned storages
|
||||
DerivedImageStorage.delete().where(DerivedImageStorage.source <<
|
||||
orphaned_storage_query(ImageStorage.select())).execute()
|
||||
(DerivedImageStorage
|
||||
.delete()
|
||||
.where(DerivedImageStorage.source << orphaned_from_candidates.clone())
|
||||
.execute())
|
||||
|
||||
# Track all of the data that should be removed from blob storage
|
||||
placements_to_remove = orphaned_storage_query(ImageStoragePlacement
|
||||
|
@ -1421,8 +1434,9 @@ def garbage_collect_repository(namespace_name, repository_name):
|
|||
ImageStorageLocation)
|
||||
.join(ImageStorageLocation)
|
||||
.switch(ImageStoragePlacement)
|
||||
.join(ImageStorage))
|
||||
paths_to_remove.update(placements_query_to_paths_set(placements_to_remove.clone()))
|
||||
.join(ImageStorage),
|
||||
storage_id_whitelist)
|
||||
paths_to_remove = placements_query_to_paths_set(placements_to_remove.clone())
|
||||
|
||||
# Remove the placements for orphaned storages
|
||||
placements_subquery = placements_to_remove.clone().select(ImageStoragePlacement.id)
|
||||
|
@ -1434,7 +1448,8 @@ def garbage_collect_repository(namespace_name, repository_name):
|
|||
# Remove the all orphaned storages
|
||||
(ImageStorage
|
||||
.delete()
|
||||
.where(ImageStorage.id << orphaned_storage_query(ImageStorage.select(ImageStorage.id)))
|
||||
.where(ImageStorage.id << orphaned_storage_query(ImageStorage.select(ImageStorage.id),
|
||||
storage_id_whitelist))
|
||||
.execute())
|
||||
|
||||
# Delete the actual blob storage
|
||||
|
|
Reference in a new issue