diff --git a/data/model/legacy.py b/data/model/legacy.py index 878a501aa..239b86131 100644 --- a/data/model/legacy.py +++ b/data/model/legacy.py @@ -1391,46 +1391,54 @@ def garbage_collect_repository(namespace_name, repository_name): logger.info('Cleaning up unreferenced images: %s', to_remove) - uuids_to_check_for_gc = set() - for image_id_to_remove in to_remove: - image_to_remove = all_images[image_id_to_remove] + uuids_to_check_for_gc = {all_images[id_to_remove].storage.uuid for id_to_remove in to_remove} + Image.delete().where(Image.id << list(to_remove)).execute() - logger.debug('Adding image storage to the gc list: %s', - image_to_remove.storage.uuid) - uuids_to_check_for_gc.add(image_to_remove.storage.uuid) + logger.debug('Checking image storages for being orphaned: %s', uuids_to_check_for_gc) - image_to_remove.delete_instance() + # We are going to make the concious decision to not delete image storage inside the transaction + # This may end up producing garbage in s3, trading off for higher availability in the database + def placements_query_to_paths_set(placements_query): + return {(placement.location.name, config.store.image_path(placement.storage.uuid)) + for placement in placements_query} - def remove_storages(query): - for storage in query: - logger.debug('Garbage collecting image storage: %s', storage.uuid) + def remove_storages(placements_query): + storages_to_remove = {placement.storage.uuid: placement.storage + for placement in placements_query} + for storage in storages_to_remove.values(): + # Deletes all placements as well, but leaves derived storages + storage.delete_instance(recursive=True) - image_path = config.store.image_path(storage.uuid) - for placement in storage.imagestorageplacement_set: - location_name = placement.location.name - placement.delete_instance() - config.store.remove({location_name}, image_path) + paths_to_remove = set() + with config.app_config['DB_TRANSACTION_FACTORY'](db): + placements_to_remove = (ImageStoragePlacement + .select(ImageStoragePlacement, ImageStorage, ImageStorageLocation) + .join(ImageStorage) + .join(Image, JOIN_LEFT_OUTER) + .switch(ImageStoragePlacement) + .join(ImageStorageLocation) + .group_by(ImageStorage) + .where(ImageStorage.uuid << list(uuids_to_check_for_gc)) + .having(fn.Count(Image.id) == 0)) + paths_to_remove.update(placements_query_to_paths_set(placements_to_remove)) + remove_storages(placements_to_remove) - storage.delete_instance(recursive=True) + with config.app_config['DB_TRANSACTION_FACTORY'](db): + derived_to_remove = (ImageStoragePlacement + .select(ImageStoragePlacement, ImageStorage, ImageStorageLocation) + .join(ImageStorage) + .join(DerivedImageStorage, on=(ImageStorage.id == + DerivedImageStorage.derivative)) + .switch(ImageStoragePlacement) + .join(ImageStorageLocation) + .where(DerivedImageStorage.source >> None)) + paths_to_remove.update(placements_query_to_paths_set(derived_to_remove)) + remove_storages(derived_to_remove) - if uuids_to_check_for_gc: - storage_to_remove = (ImageStorage - .select() - .join(Image, JOIN_LEFT_OUTER) - .group_by(ImageStorage) - .where(ImageStorage.uuid << list(uuids_to_check_for_gc)) - .having(fn.Count(Image.id) == 0)) - - remove_storages(storage_to_remove) - - # Now remove any derived image storages whose sources have been removed - derived_storages_to_remove = (ImageStorage - .select() - .join(DerivedImageStorage, on=(ImageStorage.id == DerivedImageStorage.derivative)) - .where(DerivedImageStorage.source >> None)) - remove_storages(derived_storages_to_remove) - - return len(to_remove) + # Delete the actual blob storage + for location_name, image_path in paths_to_remove: + logger.debug('Removing %s from %s', image_path, location_name) + config.store.remove({location_name}, image_path) def get_tag_image(namespace_name, repository_name, tag_name):