Merge pull request #3288 from quay/faster-gc
Change garbage collection queries to be far smaller by GCing per tag and per image
This commit is contained in:
commit
3e63b08731
2 changed files with 31 additions and 33 deletions
|
@ -93,18 +93,8 @@ def purge_repository(namespace_name, repository_name):
|
||||||
ApprTag.delete().where(ApprTag.repository == repo, ~(ApprTag.linked_tag >> None)).execute()
|
ApprTag.delete().where(ApprTag.repository == repo, ~(ApprTag.linked_tag >> None)).execute()
|
||||||
ApprTag.delete().where(ApprTag.repository == repo).execute()
|
ApprTag.delete().where(ApprTag.repository == repo).execute()
|
||||||
|
|
||||||
# Delete all tags to allow gc to reclaim storage
|
|
||||||
previously_referenced = tag.purge_all_tags(repo)
|
|
||||||
unreferenced_image_q = Image.select(Image.id).where(Image.repository == repo)
|
|
||||||
|
|
||||||
if len(previously_referenced) > 0:
|
|
||||||
unreferenced_image_q = (unreferenced_image_q.where(~(Image.id << list(previously_referenced))))
|
|
||||||
|
|
||||||
unreferenced_candidates = set(img[0] for img in unreferenced_image_q.tuples())
|
|
||||||
|
|
||||||
# Gc to remove the images and storage
|
# Gc to remove the images and storage
|
||||||
all_repo_images = previously_referenced | unreferenced_candidates
|
successful_gc = garbage_collect_repo(repo, is_purge=True)
|
||||||
successful_gc = garbage_collect_repo(repo, all_repo_images, is_purge=True)
|
|
||||||
if not successful_gc:
|
if not successful_gc:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@ -175,18 +165,23 @@ def _all_images_for_gc(repo):
|
||||||
def _filter_to_unreferenced(repo, candidates_orphans):
|
def _filter_to_unreferenced(repo, candidates_orphans):
|
||||||
""" Filters the given candidate orphan images into those unreferenced by any tag or
|
""" Filters the given candidate orphan images into those unreferenced by any tag or
|
||||||
other image. """
|
other image. """
|
||||||
|
def _get_clause(field, candidates):
|
||||||
|
if len(candidates) == 1:
|
||||||
|
return field == candidates[0]
|
||||||
|
|
||||||
|
return field << candidates
|
||||||
|
|
||||||
# Any image directly referenced by a tag that still exists, cannot be GCed.
|
# Any image directly referenced by a tag that still exists, cannot be GCed.
|
||||||
direct_referenced = (RepositoryTag
|
direct_referenced = (RepositoryTag
|
||||||
.select(RepositoryTag.image)
|
.select(RepositoryTag.image)
|
||||||
.where(RepositoryTag.repository == repo.id,
|
.where(RepositoryTag.repository == repo.id,
|
||||||
RepositoryTag.image << candidates_orphans))
|
_get_clause(RepositoryTag.image, candidates_orphans)))
|
||||||
|
|
||||||
# Any image which is the parent of another image, cannot be GCed.
|
# Any image which is the parent of another image, cannot be GCed.
|
||||||
parent_referenced = (Image
|
parent_referenced = (Image
|
||||||
.select(Image.parent)
|
.select(Image.parent)
|
||||||
.where(Image.repository == repo.id,
|
.where(Image.repository == repo.id,
|
||||||
Image.parent << candidates_orphans))
|
_get_clause(Image.parent, candidates_orphans)))
|
||||||
|
|
||||||
referenced_candidates = (direct_referenced | parent_referenced)
|
referenced_candidates = (direct_referenced | parent_referenced)
|
||||||
|
|
||||||
|
@ -197,12 +192,12 @@ def _filter_to_unreferenced(repo, candidates_orphans):
|
||||||
.select(Image.id, Image.docker_image_id,
|
.select(Image.id, Image.docker_image_id,
|
||||||
ImageStorage.id, ImageStorage.uuid)
|
ImageStorage.id, ImageStorage.uuid)
|
||||||
.join(ImageStorage)
|
.join(ImageStorage)
|
||||||
.where(Image.id << candidates_orphans,
|
.where(_get_clause(Image.id, candidates_orphans),
|
||||||
~(Image.id << referenced_candidates)))
|
~(_get_clause(Image.id, referenced_candidates))))
|
||||||
return list(unreferenced_candidates)
|
return list(unreferenced_candidates)
|
||||||
|
|
||||||
|
|
||||||
def garbage_collect_repo(repo, extra_candidate_set=None, is_purge=False):
|
def garbage_collect_repo(repo, is_purge=False):
|
||||||
""" Garbage collect the specified repository object. This will remove all
|
""" Garbage collect the specified repository object. This will remove all
|
||||||
images, derived images, and other associated metadata, for images which
|
images, derived images, and other associated metadata, for images which
|
||||||
are no longer referenced by a tag or another image which is itself
|
are no longer referenced by a tag or another image which is itself
|
||||||
|
@ -212,26 +207,32 @@ def garbage_collect_repo(repo, extra_candidate_set=None, is_purge=False):
|
||||||
"""
|
"""
|
||||||
logger.debug('Garbage collecting repository %s', repo.id)
|
logger.debug('Garbage collecting repository %s', repo.id)
|
||||||
|
|
||||||
storage_id_whitelist = set()
|
if is_purge:
|
||||||
|
tag.purge_all_tags(repo)
|
||||||
|
images_for_tags_removed = {i.id for i in Image.select().where(Image.repository == repo)}
|
||||||
|
return _garbage_collect_from_image(repo, images_for_tags_removed, True)
|
||||||
|
|
||||||
candidate_orphan_image_set = tag.garbage_collect_tags(repo)
|
images_for_tags_removed = tag.garbage_collect_tags(repo)
|
||||||
if extra_candidate_set:
|
if not len(images_for_tags_removed):
|
||||||
candidate_orphan_image_set.update(extra_candidate_set)
|
logger.debug('No images for GC for repo: %s', repo.id)
|
||||||
|
|
||||||
if not len(candidate_orphan_image_set):
|
|
||||||
logger.debug('No candidate images for GC for repo: %s', repo.id)
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
for image in images_for_tags_removed:
|
||||||
|
candidate_list = [image.id] + list(reversed(image.ancestor_id_list()))
|
||||||
|
for candidate_id in candidate_list:
|
||||||
|
if not _garbage_collect_from_image(repo, {candidate_id}):
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def _garbage_collect_from_image(repo, candidate_orphan_image_set, is_purge=False):
|
||||||
|
storage_id_whitelist = set()
|
||||||
|
|
||||||
all_images_removed = set()
|
all_images_removed = set()
|
||||||
all_storage_id_whitelist = set()
|
all_storage_id_whitelist = set()
|
||||||
all_unreferenced_candidates = set()
|
all_unreferenced_candidates = set()
|
||||||
|
|
||||||
if not is_purge:
|
|
||||||
# Remove any images directly referenced by tags, to prune the working set.
|
|
||||||
direct_referenced = (RepositoryTag.select(RepositoryTag.image).where(
|
|
||||||
RepositoryTag.repository == repo.id, RepositoryTag.image << list(candidate_orphan_image_set)))
|
|
||||||
candidate_orphan_image_set.difference_update([t.image_id for t in direct_referenced])
|
|
||||||
|
|
||||||
# Iteratively try to remove images from the database. The only images we can remove are those
|
# Iteratively try to remove images from the database. The only images we can remove are those
|
||||||
# that are not referenced by tags AND not the parents of other images. We continue removing images
|
# that are not referenced by tags AND not the parents of other images. We continue removing images
|
||||||
# until no changes are found.
|
# until no changes are found.
|
||||||
|
|
|
@ -517,10 +517,7 @@ def _delete_tags(repo, query_modifier=None):
|
||||||
.execute())
|
.execute())
|
||||||
|
|
||||||
logger.debug('Removed %s tags with %s manifests', num_deleted_tags, num_deleted_manifests)
|
logger.debug('Removed %s tags with %s manifests', num_deleted_tags, num_deleted_manifests)
|
||||||
ancestors = reduce(lambda r, l: r | l,
|
return [tag.image for tag in tags_to_delete]
|
||||||
(set(tag.image.ancestor_id_list()) for tag in tags_to_delete))
|
|
||||||
direct_referenced = {tag.image.id for tag in tags_to_delete}
|
|
||||||
return ancestors | direct_referenced
|
|
||||||
|
|
||||||
|
|
||||||
def _get_repo_tag_image(tag_name, include_storage, modifier):
|
def _get_repo_tag_image(tag_name, include_storage, modifier):
|
||||||
|
|
Reference in a new issue