Remove directly referenced images from the candidate set before starting GC iteration
Makes the lookup query underneath the transaction smaller if there are a lot of images referenced directly by tag. We still must do the direct referenced check within the transaction, but this should reduce the scope of the search space a bit.
This commit is contained in:
parent
45c7008078
commit
cdd7cb9321
1 changed files with 15 additions and 6 deletions
|
@ -185,11 +185,19 @@ def garbage_collect_repo(repo, extra_candidate_set=None):
|
|||
all_storage_id_whitelist = set()
|
||||
all_unreferenced_candidates = set()
|
||||
|
||||
# Remove any images directly referenced by tags, to prune the working set.
|
||||
direct_referenced = (RepositoryTag
|
||||
.select(RepositoryTag.image)
|
||||
.where(RepositoryTag.repository == repo.id,
|
||||
RepositoryTag.image << list(candidate_orphan_image_set)))
|
||||
candidate_orphan_image_set.difference_update([t.image_id for t in direct_referenced])
|
||||
|
||||
# Iteratively try to remove images from the database. The only images we can remove are those
|
||||
# that are not referenced by tags AND not the parents of other images. We continue removing images
|
||||
# until no changes are found.
|
||||
iteration = 0
|
||||
while candidate_orphan_image_set:
|
||||
making_progress = True
|
||||
while candidate_orphan_image_set and making_progress:
|
||||
iteration = iteration + 1
|
||||
logger.debug('Starting iteration #%s for GC of repository %s with candidates: %s', iteration,
|
||||
repo.id, candidate_orphan_image_set)
|
||||
|
@ -221,6 +229,7 @@ def garbage_collect_repo(repo, extra_candidate_set=None):
|
|||
~(Image.id << referenced_candidates)))
|
||||
|
||||
image_ids_to_remove = [candidate.id for candidate in unreferenced_candidates]
|
||||
making_progress = bool(len(image_ids_to_remove))
|
||||
if len(image_ids_to_remove) == 0:
|
||||
# No more candidates to remove.
|
||||
break
|
||||
|
@ -254,12 +263,12 @@ def garbage_collect_repo(repo, extra_candidate_set=None):
|
|||
logger.info('Could not GC images %s; will try again soon', image_ids_to_remove)
|
||||
return False
|
||||
|
||||
# Add the images to the removed set and remove them from the candidate set.
|
||||
all_images_removed.update(image_ids_to_remove)
|
||||
all_storage_id_whitelist.update(storage_id_whitelist)
|
||||
all_unreferenced_candidates.update(unreferenced_candidates)
|
||||
# Add the images to the removed set and remove them from the candidate set.
|
||||
all_images_removed.update(image_ids_to_remove)
|
||||
all_storage_id_whitelist.update(storage_id_whitelist)
|
||||
all_unreferenced_candidates.update(unreferenced_candidates)
|
||||
|
||||
candidate_orphan_image_set.difference_update(image_ids_to_remove)
|
||||
candidate_orphan_image_set.difference_update(image_ids_to_remove)
|
||||
|
||||
# If any images were removed, GC any orphaned storages.
|
||||
if len(all_images_removed) > 0:
|
||||
|
|
Reference in a new issue