Change garbage collection queries to be far smaller by GCing per tag and per image
While this will require far more iterations and queries, each query itself will be quite small, thus preventing us from locking up the database
This commit is contained in:
parent
d0e1f464ff
commit
5124422332
2 changed files with 31 additions and 33 deletions
|
@ -93,18 +93,8 @@ def purge_repository(namespace_name, repository_name):
|
||||||
ApprTag.delete().where(ApprTag.repository == repo, ~(ApprTag.linked_tag >> None)).execute()
|
ApprTag.delete().where(ApprTag.repository == repo, ~(ApprTag.linked_tag >> None)).execute()
|
||||||
ApprTag.delete().where(ApprTag.repository == repo).execute()
|
ApprTag.delete().where(ApprTag.repository == repo).execute()
|
||||||
|
|
||||||
# Delete all tags to allow gc to reclaim storage
|
|
||||||
previously_referenced = tag.purge_all_tags(repo)
|
|
||||||
unreferenced_image_q = Image.select(Image.id).where(Image.repository == repo)
|
|
||||||
|
|
||||||
if len(previously_referenced) > 0:
|
|
||||||
unreferenced_image_q = (unreferenced_image_q.where(~(Image.id << list(previously_referenced))))
|
|
||||||
|
|
||||||
unreferenced_candidates = set(img[0] for img in unreferenced_image_q.tuples())
|
|
||||||
|
|
||||||
# Gc to remove the images and storage
|
# Gc to remove the images and storage
|
||||||
all_repo_images = previously_referenced | unreferenced_candidates
|
successful_gc = garbage_collect_repo(repo, is_purge=True)
|
||||||
successful_gc = garbage_collect_repo(repo, all_repo_images, is_purge=True)
|
|
||||||
if not successful_gc:
|
if not successful_gc:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@ -175,18 +165,23 @@ def _all_images_for_gc(repo):
|
||||||
def _filter_to_unreferenced(repo, candidates_orphans):
|
def _filter_to_unreferenced(repo, candidates_orphans):
|
||||||
""" Filters the given candidate orphan images into those unreferenced by any tag or
|
""" Filters the given candidate orphan images into those unreferenced by any tag or
|
||||||
other image. """
|
other image. """
|
||||||
|
def _get_clause(field, candidates):
|
||||||
|
if len(candidates) == 1:
|
||||||
|
return field == candidates[0]
|
||||||
|
|
||||||
|
return field << candidates
|
||||||
|
|
||||||
# Any image directly referenced by a tag that still exists, cannot be GCed.
|
# Any image directly referenced by a tag that still exists, cannot be GCed.
|
||||||
direct_referenced = (RepositoryTag
|
direct_referenced = (RepositoryTag
|
||||||
.select(RepositoryTag.image)
|
.select(RepositoryTag.image)
|
||||||
.where(RepositoryTag.repository == repo.id,
|
.where(RepositoryTag.repository == repo.id,
|
||||||
RepositoryTag.image << candidates_orphans))
|
_get_clause(RepositoryTag.image, candidates_orphans)))
|
||||||
|
|
||||||
# Any image which is the parent of another image, cannot be GCed.
|
# Any image which is the parent of another image, cannot be GCed.
|
||||||
parent_referenced = (Image
|
parent_referenced = (Image
|
||||||
.select(Image.parent)
|
.select(Image.parent)
|
||||||
.where(Image.repository == repo.id,
|
.where(Image.repository == repo.id,
|
||||||
Image.parent << candidates_orphans))
|
_get_clause(Image.parent, candidates_orphans)))
|
||||||
|
|
||||||
referenced_candidates = (direct_referenced | parent_referenced)
|
referenced_candidates = (direct_referenced | parent_referenced)
|
||||||
|
|
||||||
|
@ -197,12 +192,12 @@ def _filter_to_unreferenced(repo, candidates_orphans):
|
||||||
.select(Image.id, Image.docker_image_id,
|
.select(Image.id, Image.docker_image_id,
|
||||||
ImageStorage.id, ImageStorage.uuid)
|
ImageStorage.id, ImageStorage.uuid)
|
||||||
.join(ImageStorage)
|
.join(ImageStorage)
|
||||||
.where(Image.id << candidates_orphans,
|
.where(_get_clause(Image.id, candidates_orphans),
|
||||||
~(Image.id << referenced_candidates)))
|
~(_get_clause(Image.id, referenced_candidates))))
|
||||||
return list(unreferenced_candidates)
|
return list(unreferenced_candidates)
|
||||||
|
|
||||||
|
|
||||||
def garbage_collect_repo(repo, extra_candidate_set=None, is_purge=False):
|
def garbage_collect_repo(repo, is_purge=False):
|
||||||
""" Garbage collect the specified repository object. This will remove all
|
""" Garbage collect the specified repository object. This will remove all
|
||||||
images, derived images, and other associated metadata, for images which
|
images, derived images, and other associated metadata, for images which
|
||||||
are no longer referenced by a tag or another image which is itself
|
are no longer referenced by a tag or another image which is itself
|
||||||
|
@ -212,26 +207,32 @@ def garbage_collect_repo(repo, extra_candidate_set=None, is_purge=False):
|
||||||
"""
|
"""
|
||||||
logger.debug('Garbage collecting repository %s', repo.id)
|
logger.debug('Garbage collecting repository %s', repo.id)
|
||||||
|
|
||||||
storage_id_whitelist = set()
|
if is_purge:
|
||||||
|
tag.purge_all_tags(repo)
|
||||||
|
images_for_tags_removed = {i.id for i in Image.select().where(Image.repository == repo)}
|
||||||
|
return _garbage_collect_from_image(repo, images_for_tags_removed, True)
|
||||||
|
|
||||||
candidate_orphan_image_set = tag.garbage_collect_tags(repo)
|
images_for_tags_removed = tag.garbage_collect_tags(repo)
|
||||||
if extra_candidate_set:
|
if not len(images_for_tags_removed):
|
||||||
candidate_orphan_image_set.update(extra_candidate_set)
|
logger.debug('No images for GC for repo: %s', repo.id)
|
||||||
|
|
||||||
if not len(candidate_orphan_image_set):
|
|
||||||
logger.debug('No candidate images for GC for repo: %s', repo.id)
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
for image in images_for_tags_removed:
|
||||||
|
candidate_list = [image.id] + list(reversed(image.ancestor_id_list()))
|
||||||
|
for candidate_id in candidate_list:
|
||||||
|
if not _garbage_collect_from_image(repo, {candidate_id}):
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def _garbage_collect_from_image(repo, candidate_orphan_image_set, is_purge=False):
|
||||||
|
storage_id_whitelist = set()
|
||||||
|
|
||||||
all_images_removed = set()
|
all_images_removed = set()
|
||||||
all_storage_id_whitelist = set()
|
all_storage_id_whitelist = set()
|
||||||
all_unreferenced_candidates = set()
|
all_unreferenced_candidates = set()
|
||||||
|
|
||||||
if not is_purge:
|
|
||||||
# Remove any images directly referenced by tags, to prune the working set.
|
|
||||||
direct_referenced = (RepositoryTag.select(RepositoryTag.image).where(
|
|
||||||
RepositoryTag.repository == repo.id, RepositoryTag.image << list(candidate_orphan_image_set)))
|
|
||||||
candidate_orphan_image_set.difference_update([t.image_id for t in direct_referenced])
|
|
||||||
|
|
||||||
# Iteratively try to remove images from the database. The only images we can remove are those
|
# Iteratively try to remove images from the database. The only images we can remove are those
|
||||||
# that are not referenced by tags AND not the parents of other images. We continue removing images
|
# that are not referenced by tags AND not the parents of other images. We continue removing images
|
||||||
# until no changes are found.
|
# until no changes are found.
|
||||||
|
|
|
@ -508,10 +508,7 @@ def _delete_tags(repo, query_modifier=None):
|
||||||
.execute())
|
.execute())
|
||||||
|
|
||||||
logger.debug('Removed %s tags with %s manifests', num_deleted_tags, num_deleted_manifests)
|
logger.debug('Removed %s tags with %s manifests', num_deleted_tags, num_deleted_manifests)
|
||||||
ancestors = reduce(lambda r, l: r | l,
|
return [tag.image for tag in tags_to_delete]
|
||||||
(set(tag.image.ancestor_id_list()) for tag in tags_to_delete))
|
|
||||||
direct_referenced = {tag.image.id for tag in tags_to_delete}
|
|
||||||
return ancestors | direct_referenced
|
|
||||||
|
|
||||||
|
|
||||||
def _get_repo_tag_image(tag_name, include_storage, modifier):
|
def _get_repo_tag_image(tag_name, include_storage, modifier):
|
||||||
|
|
Reference in a new issue