diff --git a/data/model/legacy.py b/data/model/legacy.py index 175ac0a02..d4d983c9e 100644 --- a/data/model/legacy.py +++ b/data/model/legacy.py @@ -1389,6 +1389,8 @@ def garbage_collect_repository(namespace_name, repository_name): all_images = {int(img.id): img for img in all_repo_images} to_remove = set(all_images.keys()).difference(referenced_anscestors) + storage_id_whitelist = {all_images[to_remove_id].storage.id for to_remove_id in to_remove} + logger.info('Cleaning up unreferenced images: %s', to_remove) Image.delete().where(Image.id << list(to_remove)).execute() @@ -1398,21 +1400,32 @@ def garbage_collect_repository(namespace_name, repository_name): return {(placement.location.name, config.store.image_path(placement.storage.uuid)) for placement in placements_query} - def orphaned_storage_query(select_base_query): + def orphaned_storage_query(select_base_query, candidates): return (select_base_query .switch(ImageStorage) .join(Image, JOIN_LEFT_OUTER) .switch(ImageStorage) - .join(DerivedImageStorage, JOIN_LEFT_OUTER, on=(ImageStorage.id == - DerivedImageStorage.derivative)) + .join(DerivedImageStorage, JOIN_LEFT_OUTER, + on=(ImageStorage.id == DerivedImageStorage.derivative)) + .where(ImageStorage.id << list(candidates)) .group_by(ImageStorage) .having((fn.Count(Image.id) == 0) & (fn.Count(DerivedImageStorage.id) == 0))) - paths_to_remove = set() with config.app_config['DB_TRANSACTION_FACTORY'](db): + # Find out which derived storages will be removed, and add them to the whitelist + orphaned_from_candidates = orphaned_storage_query(ImageStorage.select(), storage_id_whitelist) + derived_to_remove = (ImageStorage + .select(ImageStorage.id) + .join(DerivedImageStorage, + on=(ImageStorage.id == DerivedImageStorage.derivative)) + .where(DerivedImageStorage.source << orphaned_from_candidates.clone())) + storage_id_whitelist.update({derived.id for derived in derived_to_remove}) + # Remove the dervived image storages with sources of orphaned storages - DerivedImageStorage.delete().where(DerivedImageStorage.source << - orphaned_storage_query(ImageStorage.select())).execute() + (DerivedImageStorage + .delete() + .where(DerivedImageStorage.source << orphaned_from_candidates.clone()) + .execute()) # Track all of the data that should be removed from blob storage placements_to_remove = orphaned_storage_query(ImageStoragePlacement @@ -1421,8 +1434,9 @@ def garbage_collect_repository(namespace_name, repository_name): ImageStorageLocation) .join(ImageStorageLocation) .switch(ImageStoragePlacement) - .join(ImageStorage)) - paths_to_remove.update(placements_query_to_paths_set(placements_to_remove.clone())) + .join(ImageStorage), + storage_id_whitelist) + paths_to_remove = placements_query_to_paths_set(placements_to_remove.clone()) # Remove the placements for orphaned storages placements_subquery = placements_to_remove.clone().select(ImageStoragePlacement.id) @@ -1434,7 +1448,8 @@ def garbage_collect_repository(namespace_name, repository_name): # Remove the all orphaned storages (ImageStorage .delete() - .where(ImageStorage.id << orphaned_storage_query(ImageStorage.select(ImageStorage.id))) + .where(ImageStorage.id << orphaned_storage_query(ImageStorage.select(ImageStorage.id), + storage_id_whitelist)) .execute()) # Delete the actual blob storage