Remove directly referenced images from the candidate set before starting GC iteration
Makes the lookup query underneath the transaction smaller if there are a lot of images referenced directly by tag. We still must do the direct referenced check within the transaction, but this should reduce the scope of the search space a bit.
This commit is contained in:
parent
45c7008078
commit
cdd7cb9321
1 changed files with 15 additions and 6 deletions
|
@ -185,11 +185,19 @@ def garbage_collect_repo(repo, extra_candidate_set=None):
|
||||||
all_storage_id_whitelist = set()
|
all_storage_id_whitelist = set()
|
||||||
all_unreferenced_candidates = set()
|
all_unreferenced_candidates = set()
|
||||||
|
|
||||||
|
# Remove any images directly referenced by tags, to prune the working set.
|
||||||
|
direct_referenced = (RepositoryTag
|
||||||
|
.select(RepositoryTag.image)
|
||||||
|
.where(RepositoryTag.repository == repo.id,
|
||||||
|
RepositoryTag.image << list(candidate_orphan_image_set)))
|
||||||
|
candidate_orphan_image_set.difference_update([t.image_id for t in direct_referenced])
|
||||||
|
|
||||||
# Iteratively try to remove images from the database. The only images we can remove are those
|
# Iteratively try to remove images from the database. The only images we can remove are those
|
||||||
# that are not referenced by tags AND not the parents of other images. We continue removing images
|
# that are not referenced by tags AND not the parents of other images. We continue removing images
|
||||||
# until no changes are found.
|
# until no changes are found.
|
||||||
iteration = 0
|
iteration = 0
|
||||||
while candidate_orphan_image_set:
|
making_progress = True
|
||||||
|
while candidate_orphan_image_set and making_progress:
|
||||||
iteration = iteration + 1
|
iteration = iteration + 1
|
||||||
logger.debug('Starting iteration #%s for GC of repository %s with candidates: %s', iteration,
|
logger.debug('Starting iteration #%s for GC of repository %s with candidates: %s', iteration,
|
||||||
repo.id, candidate_orphan_image_set)
|
repo.id, candidate_orphan_image_set)
|
||||||
|
@ -221,6 +229,7 @@ def garbage_collect_repo(repo, extra_candidate_set=None):
|
||||||
~(Image.id << referenced_candidates)))
|
~(Image.id << referenced_candidates)))
|
||||||
|
|
||||||
image_ids_to_remove = [candidate.id for candidate in unreferenced_candidates]
|
image_ids_to_remove = [candidate.id for candidate in unreferenced_candidates]
|
||||||
|
making_progress = bool(len(image_ids_to_remove))
|
||||||
if len(image_ids_to_remove) == 0:
|
if len(image_ids_to_remove) == 0:
|
||||||
# No more candidates to remove.
|
# No more candidates to remove.
|
||||||
break
|
break
|
||||||
|
|
Reference in a new issue