diff --git a/data/model/repository.py b/data/model/repository.py index 6bd833a5b..ea01c63a4 100644 --- a/data/model/repository.py +++ b/data/model/repository.py @@ -93,18 +93,8 @@ def purge_repository(namespace_name, repository_name): ApprTag.delete().where(ApprTag.repository == repo, ~(ApprTag.linked_tag >> None)).execute() ApprTag.delete().where(ApprTag.repository == repo).execute() - # Delete all tags to allow gc to reclaim storage - previously_referenced = tag.purge_all_tags(repo) - unreferenced_image_q = Image.select(Image.id).where(Image.repository == repo) - - if len(previously_referenced) > 0: - unreferenced_image_q = (unreferenced_image_q.where(~(Image.id << list(previously_referenced)))) - - unreferenced_candidates = set(img[0] for img in unreferenced_image_q.tuples()) - # Gc to remove the images and storage - all_repo_images = previously_referenced | unreferenced_candidates - successful_gc = garbage_collect_repo(repo, all_repo_images, is_purge=True) + successful_gc = garbage_collect_repo(repo, is_purge=True) if not successful_gc: return False @@ -175,18 +165,23 @@ def _all_images_for_gc(repo): def _filter_to_unreferenced(repo, candidates_orphans): """ Filters the given candidate orphan images into those unreferenced by any tag or other image. """ + def _get_clause(field, candidates): + if len(candidates) == 1: + return field == candidates[0] + + return field << candidates # Any image directly referenced by a tag that still exists, cannot be GCed. direct_referenced = (RepositoryTag .select(RepositoryTag.image) .where(RepositoryTag.repository == repo.id, - RepositoryTag.image << candidates_orphans)) + _get_clause(RepositoryTag.image, candidates_orphans))) # Any image which is the parent of another image, cannot be GCed. parent_referenced = (Image .select(Image.parent) .where(Image.repository == repo.id, - Image.parent << candidates_orphans)) + _get_clause(Image.parent, candidates_orphans))) referenced_candidates = (direct_referenced | parent_referenced) @@ -197,12 +192,12 @@ def _filter_to_unreferenced(repo, candidates_orphans): .select(Image.id, Image.docker_image_id, ImageStorage.id, ImageStorage.uuid) .join(ImageStorage) - .where(Image.id << candidates_orphans, - ~(Image.id << referenced_candidates))) + .where(_get_clause(Image.id, candidates_orphans), + ~(_get_clause(Image.id, referenced_candidates)))) return list(unreferenced_candidates) -def garbage_collect_repo(repo, extra_candidate_set=None, is_purge=False): +def garbage_collect_repo(repo, is_purge=False): """ Garbage collect the specified repository object. This will remove all images, derived images, and other associated metadata, for images which are no longer referenced by a tag or another image which is itself @@ -212,26 +207,32 @@ def garbage_collect_repo(repo, extra_candidate_set=None, is_purge=False): """ logger.debug('Garbage collecting repository %s', repo.id) - storage_id_whitelist = set() + if is_purge: + tag.purge_all_tags(repo) + images_for_tags_removed = {i.id for i in Image.select().where(Image.repository == repo)} + return _garbage_collect_from_image(repo, images_for_tags_removed, True) - candidate_orphan_image_set = tag.garbage_collect_tags(repo) - if extra_candidate_set: - candidate_orphan_image_set.update(extra_candidate_set) - - if not len(candidate_orphan_image_set): - logger.debug('No candidate images for GC for repo: %s', repo.id) + images_for_tags_removed = tag.garbage_collect_tags(repo) + if not len(images_for_tags_removed): + logger.debug('No images for GC for repo: %s', repo.id) return True + for image in images_for_tags_removed: + candidate_list = [image.id] + list(reversed(image.ancestor_id_list())) + for candidate_id in candidate_list: + if not _garbage_collect_from_image(repo, {candidate_id}): + return False + + return True + + +def _garbage_collect_from_image(repo, candidate_orphan_image_set, is_purge=False): + storage_id_whitelist = set() + all_images_removed = set() all_storage_id_whitelist = set() all_unreferenced_candidates = set() - if not is_purge: - # Remove any images directly referenced by tags, to prune the working set. - direct_referenced = (RepositoryTag.select(RepositoryTag.image).where( - RepositoryTag.repository == repo.id, RepositoryTag.image << list(candidate_orphan_image_set))) - candidate_orphan_image_set.difference_update([t.image_id for t in direct_referenced]) - # Iteratively try to remove images from the database. The only images we can remove are those # that are not referenced by tags AND not the parents of other images. We continue removing images # until no changes are found. diff --git a/data/model/tag.py b/data/model/tag.py index 3cd14f298..6998beec1 100644 --- a/data/model/tag.py +++ b/data/model/tag.py @@ -508,10 +508,7 @@ def _delete_tags(repo, query_modifier=None): .execute()) logger.debug('Removed %s tags with %s manifests', num_deleted_tags, num_deleted_manifests) - ancestors = reduce(lambda r, l: r | l, - (set(tag.image.ancestor_id_list()) for tag in tags_to_delete)) - direct_referenced = {tag.image.id for tag in tags_to_delete} - return ancestors | direct_referenced + return [tag.image for tag in tags_to_delete] def _get_repo_tag_image(tag_name, include_storage, modifier):