Merge pull request #2791 from coreos-inc/purge-repo-optimization
Optimize purging of a repository by skipping the unreferenced check
This commit is contained in:
commit
6ce06942f0
1 changed files with 56 additions and 29 deletions
|
@ -92,8 +92,7 @@ def purge_repository(namespace_name, repository_name):
|
||||||
|
|
||||||
# Gc to remove the images and storage
|
# Gc to remove the images and storage
|
||||||
all_repo_images = previously_referenced | unreferenced_candidates
|
all_repo_images = previously_referenced | unreferenced_candidates
|
||||||
successful_gc = garbage_collect_repo(repo, all_repo_images)
|
successful_gc = garbage_collect_repo(repo, all_repo_images, is_purge=True)
|
||||||
|
|
||||||
if not successful_gc:
|
if not successful_gc:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@ -151,7 +150,47 @@ def find_repository_with_garbage(limit_to_gc_policy_s):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def garbage_collect_repo(repo, extra_candidate_set=None):
|
def _all_images_for_gc(repo):
|
||||||
|
""" Returns all the images found in the given repository, for the purposes of GC. """
|
||||||
|
images = (Image
|
||||||
|
.select(Image.id, Image.docker_image_id,
|
||||||
|
ImageStorage.id, ImageStorage.uuid)
|
||||||
|
.join(ImageStorage)
|
||||||
|
.where(Image.repository == repo))
|
||||||
|
return list(images)
|
||||||
|
|
||||||
|
|
||||||
|
def _filter_to_unreferenced(repo, candidates_orphans):
|
||||||
|
""" Filters the given candidate orphan images into those unreferenced by any tag or
|
||||||
|
other image. """
|
||||||
|
|
||||||
|
# Any image directly referenced by a tag that still exists, cannot be GCed.
|
||||||
|
direct_referenced = (RepositoryTag
|
||||||
|
.select(RepositoryTag.image)
|
||||||
|
.where(RepositoryTag.repository == repo.id,
|
||||||
|
RepositoryTag.image << candidates_orphans))
|
||||||
|
|
||||||
|
# Any image which is the parent of another image, cannot be GCed.
|
||||||
|
parent_referenced = (Image
|
||||||
|
.select(Image.parent)
|
||||||
|
.where(Image.repository == repo.id,
|
||||||
|
Image.parent << candidates_orphans))
|
||||||
|
|
||||||
|
referenced_candidates = (direct_referenced | parent_referenced)
|
||||||
|
|
||||||
|
# We desire a few pieces of information from the database from the following
|
||||||
|
# query: all of the image ids which are associated with this repository,
|
||||||
|
# and the storages which are associated with those images.
|
||||||
|
unreferenced_candidates = (Image
|
||||||
|
.select(Image.id, Image.docker_image_id,
|
||||||
|
ImageStorage.id, ImageStorage.uuid)
|
||||||
|
.join(ImageStorage)
|
||||||
|
.where(Image.id << candidates_orphans,
|
||||||
|
~(Image.id << referenced_candidates)))
|
||||||
|
return list(unreferenced_candidates)
|
||||||
|
|
||||||
|
|
||||||
|
def garbage_collect_repo(repo, extra_candidate_set=None, is_purge=False):
|
||||||
""" Garbage collect the specified repository object. This will remove all
|
""" Garbage collect the specified repository object. This will remove all
|
||||||
images, derived images, and other associated metadata, for images which
|
images, derived images, and other associated metadata, for images which
|
||||||
are no longer referenced by a tag or another image which is itself
|
are no longer referenced by a tag or another image which is itself
|
||||||
|
@ -162,8 +201,8 @@ def garbage_collect_repo(repo, extra_candidate_set=None):
|
||||||
logger.debug('Garbage collecting repository %s', repo.id)
|
logger.debug('Garbage collecting repository %s', repo.id)
|
||||||
|
|
||||||
storage_id_whitelist = set()
|
storage_id_whitelist = set()
|
||||||
candidate_orphan_image_set = tag.garbage_collect_tags(repo)
|
|
||||||
|
|
||||||
|
candidate_orphan_image_set = tag.garbage_collect_tags(repo)
|
||||||
if extra_candidate_set:
|
if extra_candidate_set:
|
||||||
candidate_orphan_image_set.update(extra_candidate_set)
|
candidate_orphan_image_set.update(extra_candidate_set)
|
||||||
|
|
||||||
|
@ -175,10 +214,11 @@ def garbage_collect_repo(repo, extra_candidate_set=None):
|
||||||
all_storage_id_whitelist = set()
|
all_storage_id_whitelist = set()
|
||||||
all_unreferenced_candidates = set()
|
all_unreferenced_candidates = set()
|
||||||
|
|
||||||
# Remove any images directly referenced by tags, to prune the working set.
|
if not is_purge:
|
||||||
direct_referenced = (RepositoryTag.select(RepositoryTag.image).where(
|
# Remove any images directly referenced by tags, to prune the working set.
|
||||||
RepositoryTag.repository == repo.id, RepositoryTag.image << list(candidate_orphan_image_set)))
|
direct_referenced = (RepositoryTag.select(RepositoryTag.image).where(
|
||||||
candidate_orphan_image_set.difference_update([t.image_id for t in direct_referenced])
|
RepositoryTag.repository == repo.id, RepositoryTag.image << list(candidate_orphan_image_set)))
|
||||||
|
candidate_orphan_image_set.difference_update([t.image_id for t in direct_referenced])
|
||||||
|
|
||||||
# Iteratively try to remove images from the database. The only images we can remove are those
|
# Iteratively try to remove images from the database. The only images we can remove are those
|
||||||
# that are not referenced by tags AND not the parents of other images. We continue removing images
|
# that are not referenced by tags AND not the parents of other images. We continue removing images
|
||||||
|
@ -192,32 +232,19 @@ def garbage_collect_repo(repo, extra_candidate_set=None):
|
||||||
candidates_orphans = list(candidate_orphan_image_set)
|
candidates_orphans = list(candidate_orphan_image_set)
|
||||||
|
|
||||||
with db_transaction():
|
with db_transaction():
|
||||||
# Any image directly referenced by a tag that still exists, cannot be GCed.
|
# Find the images to delete.
|
||||||
direct_referenced = (RepositoryTag.select(RepositoryTag.image).where(
|
images_to_gc = (_all_images_for_gc(repo) if is_purge
|
||||||
RepositoryTag.repository == repo.id, RepositoryTag.image << candidates_orphans))
|
else _filter_to_unreferenced(repo, candidates_orphans))
|
||||||
|
|
||||||
# Any image which is the parent of another image, cannot be GCed.
|
# Make sure we are making progress.
|
||||||
parent_referenced = (Image.select(Image.parent).where(Image.repository == repo.id,
|
image_ids_to_remove = [candidate.id for candidate in images_to_gc]
|
||||||
Image.parent << candidates_orphans))
|
|
||||||
|
|
||||||
referenced_candidates = (direct_referenced | parent_referenced)
|
|
||||||
|
|
||||||
# We desire a few pieces of information from the database from the following
|
|
||||||
# query: all of the image ids which are associated with this repository,
|
|
||||||
# and the storages which are associated with those images.
|
|
||||||
unreferenced_candidates = (Image.select(Image.id, Image.docker_image_id, ImageStorage.id,
|
|
||||||
ImageStorage.uuid).join(ImageStorage)
|
|
||||||
.where(Image.id << candidates_orphans,
|
|
||||||
~(Image.id << referenced_candidates)))
|
|
||||||
|
|
||||||
image_ids_to_remove = [candidate.id for candidate in unreferenced_candidates]
|
|
||||||
making_progress = bool(len(image_ids_to_remove))
|
making_progress = bool(len(image_ids_to_remove))
|
||||||
if len(image_ids_to_remove) == 0:
|
if len(image_ids_to_remove) == 0:
|
||||||
# No more candidates to remove.
|
# No more images to remove.
|
||||||
break
|
break
|
||||||
|
|
||||||
logger.info('Cleaning up unreferenced images: %s', image_ids_to_remove)
|
logger.info('Cleaning up unreferenced images: %s', image_ids_to_remove)
|
||||||
storage_id_whitelist = set([candidate.storage_id for candidate in unreferenced_candidates])
|
storage_id_whitelist = set([candidate.storage_id for candidate in images_to_gc])
|
||||||
|
|
||||||
# Lookup any derived images for the images to remove.
|
# Lookup any derived images for the images to remove.
|
||||||
derived = DerivedStorageForImage.select().where(DerivedStorageForImage.source_image <<
|
derived = DerivedStorageForImage.select().where(DerivedStorageForImage.source_image <<
|
||||||
|
@ -246,7 +273,7 @@ def garbage_collect_repo(repo, extra_candidate_set=None):
|
||||||
# Add the images to the removed set and remove them from the candidate set.
|
# Add the images to the removed set and remove them from the candidate set.
|
||||||
all_images_removed.update(image_ids_to_remove)
|
all_images_removed.update(image_ids_to_remove)
|
||||||
all_storage_id_whitelist.update(storage_id_whitelist)
|
all_storage_id_whitelist.update(storage_id_whitelist)
|
||||||
all_unreferenced_candidates.update(unreferenced_candidates)
|
all_unreferenced_candidates.update(images_to_gc)
|
||||||
|
|
||||||
candidate_orphan_image_set.difference_update(image_ids_to_remove)
|
candidate_orphan_image_set.difference_update(image_ids_to_remove)
|
||||||
|
|
||||||
|
|
Reference in a new issue