diff --git a/data/model/repository.py b/data/model/repository.py index 6602bc081..2cd2b6976 100644 --- a/data/model/repository.py +++ b/data/model/repository.py @@ -185,11 +185,19 @@ def garbage_collect_repo(repo, extra_candidate_set=None): all_storage_id_whitelist = set() all_unreferenced_candidates = set() + # Remove any images directly referenced by tags, to prune the working set. + direct_referenced = (RepositoryTag + .select(RepositoryTag.image) + .where(RepositoryTag.repository == repo.id, + RepositoryTag.image << list(candidate_orphan_image_set))) + candidate_orphan_image_set.difference_update([t.image_id for t in direct_referenced]) + # Iteratively try to remove images from the database. The only images we can remove are those # that are not referenced by tags AND not the parents of other images. We continue removing images # until no changes are found. iteration = 0 - while candidate_orphan_image_set: + making_progress = True + while candidate_orphan_image_set and making_progress: iteration = iteration + 1 logger.debug('Starting iteration #%s for GC of repository %s with candidates: %s', iteration, repo.id, candidate_orphan_image_set) @@ -221,6 +229,7 @@ def garbage_collect_repo(repo, extra_candidate_set=None): ~(Image.id << referenced_candidates))) image_ids_to_remove = [candidate.id for candidate in unreferenced_candidates] + making_progress = bool(len(image_ids_to_remove)) if len(image_ids_to_remove) == 0: # No more candidates to remove. break @@ -254,12 +263,12 @@ def garbage_collect_repo(repo, extra_candidate_set=None): logger.info('Could not GC images %s; will try again soon', image_ids_to_remove) return False - # Add the images to the removed set and remove them from the candidate set. - all_images_removed.update(image_ids_to_remove) - all_storage_id_whitelist.update(storage_id_whitelist) - all_unreferenced_candidates.update(unreferenced_candidates) + # Add the images to the removed set and remove them from the candidate set. + all_images_removed.update(image_ids_to_remove) + all_storage_id_whitelist.update(storage_id_whitelist) + all_unreferenced_candidates.update(unreferenced_candidates) - candidate_orphan_image_set.difference_update(image_ids_to_remove) + candidate_orphan_image_set.difference_update(image_ids_to_remove) # If any images were removed, GC any orphaned storages. if len(all_images_removed) > 0: