Merge pull request #2791 from coreos-inc/purge-repo-optimization
Optimize purging of a repository by skipping the unreferenced check
This commit is contained in:
		
						commit
						6ce06942f0
					
				
					 1 changed files with 56 additions and 29 deletions
				
			
		|  | @ -92,8 +92,7 @@ def purge_repository(namespace_name, repository_name): | ||||||
| 
 | 
 | ||||||
|   # Gc to remove the images and storage |   # Gc to remove the images and storage | ||||||
|   all_repo_images = previously_referenced | unreferenced_candidates |   all_repo_images = previously_referenced | unreferenced_candidates | ||||||
|   successful_gc = garbage_collect_repo(repo, all_repo_images) |   successful_gc = garbage_collect_repo(repo, all_repo_images, is_purge=True) | ||||||
| 
 |  | ||||||
|   if not successful_gc: |   if not successful_gc: | ||||||
|     return False |     return False | ||||||
| 
 | 
 | ||||||
|  | @ -151,7 +150,47 @@ def find_repository_with_garbage(limit_to_gc_policy_s): | ||||||
|     return None |     return None | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def garbage_collect_repo(repo, extra_candidate_set=None): | def _all_images_for_gc(repo): | ||||||
|  |   """ Returns all the images found in the given repository, for the purposes of GC. """ | ||||||
|  |   images = (Image | ||||||
|  |             .select(Image.id, Image.docker_image_id, | ||||||
|  |                     ImageStorage.id, ImageStorage.uuid) | ||||||
|  |             .join(ImageStorage) | ||||||
|  |             .where(Image.repository == repo)) | ||||||
|  |   return list(images) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def _filter_to_unreferenced(repo, candidates_orphans): | ||||||
|  |   """ Filters the given candidate orphan images into those unreferenced by any tag or | ||||||
|  |       other image. """ | ||||||
|  | 
 | ||||||
|  |   # Any image directly referenced by a tag that still exists, cannot be GCed. | ||||||
|  |   direct_referenced = (RepositoryTag | ||||||
|  |                        .select(RepositoryTag.image) | ||||||
|  |                        .where(RepositoryTag.repository == repo.id, | ||||||
|  |                               RepositoryTag.image << candidates_orphans)) | ||||||
|  | 
 | ||||||
|  |   # Any image which is the parent of another image, cannot be GCed. | ||||||
|  |   parent_referenced = (Image | ||||||
|  |                        .select(Image.parent) | ||||||
|  |                        .where(Image.repository == repo.id, | ||||||
|  |                               Image.parent << candidates_orphans)) | ||||||
|  | 
 | ||||||
|  |   referenced_candidates = (direct_referenced | parent_referenced) | ||||||
|  | 
 | ||||||
|  |   # We desire a few pieces of information from the database from the following | ||||||
|  |   # query: all of the image ids which are associated with this repository, | ||||||
|  |   # and the storages which are associated with those images. | ||||||
|  |   unreferenced_candidates = (Image | ||||||
|  |                              .select(Image.id, Image.docker_image_id, | ||||||
|  |                                      ImageStorage.id, ImageStorage.uuid) | ||||||
|  |                              .join(ImageStorage) | ||||||
|  |                              .where(Image.id << candidates_orphans, | ||||||
|  |                                     ~(Image.id << referenced_candidates))) | ||||||
|  |   return list(unreferenced_candidates) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def garbage_collect_repo(repo, extra_candidate_set=None, is_purge=False): | ||||||
|   """ Garbage collect the specified repository object. This will remove all |   """ Garbage collect the specified repository object. This will remove all | ||||||
|       images, derived images, and other associated metadata, for images which |       images, derived images, and other associated metadata, for images which | ||||||
|       are no longer referenced by a tag or another image which is itself |       are no longer referenced by a tag or another image which is itself | ||||||
|  | @ -162,8 +201,8 @@ def garbage_collect_repo(repo, extra_candidate_set=None): | ||||||
|   logger.debug('Garbage collecting repository %s', repo.id) |   logger.debug('Garbage collecting repository %s', repo.id) | ||||||
| 
 | 
 | ||||||
|   storage_id_whitelist = set() |   storage_id_whitelist = set() | ||||||
|   candidate_orphan_image_set = tag.garbage_collect_tags(repo) |  | ||||||
| 
 | 
 | ||||||
|  |   candidate_orphan_image_set = tag.garbage_collect_tags(repo) | ||||||
|   if extra_candidate_set: |   if extra_candidate_set: | ||||||
|     candidate_orphan_image_set.update(extra_candidate_set) |     candidate_orphan_image_set.update(extra_candidate_set) | ||||||
| 
 | 
 | ||||||
|  | @ -175,10 +214,11 @@ def garbage_collect_repo(repo, extra_candidate_set=None): | ||||||
|   all_storage_id_whitelist = set() |   all_storage_id_whitelist = set() | ||||||
|   all_unreferenced_candidates = set() |   all_unreferenced_candidates = set() | ||||||
| 
 | 
 | ||||||
|   # Remove any images directly referenced by tags, to prune the working set. |   if not is_purge: | ||||||
|   direct_referenced = (RepositoryTag.select(RepositoryTag.image).where( |     # Remove any images directly referenced by tags, to prune the working set. | ||||||
|     RepositoryTag.repository == repo.id, RepositoryTag.image << list(candidate_orphan_image_set))) |     direct_referenced = (RepositoryTag.select(RepositoryTag.image).where( | ||||||
|   candidate_orphan_image_set.difference_update([t.image_id for t in direct_referenced]) |       RepositoryTag.repository == repo.id, RepositoryTag.image << list(candidate_orphan_image_set))) | ||||||
|  |     candidate_orphan_image_set.difference_update([t.image_id for t in direct_referenced]) | ||||||
| 
 | 
 | ||||||
|   # Iteratively try to remove images from the database. The only images we can remove are those |   # Iteratively try to remove images from the database. The only images we can remove are those | ||||||
|   # that are not referenced by tags AND not the parents of other images. We continue removing images |   # that are not referenced by tags AND not the parents of other images. We continue removing images | ||||||
|  | @ -192,32 +232,19 @@ def garbage_collect_repo(repo, extra_candidate_set=None): | ||||||
|     candidates_orphans = list(candidate_orphan_image_set) |     candidates_orphans = list(candidate_orphan_image_set) | ||||||
| 
 | 
 | ||||||
|     with db_transaction(): |     with db_transaction(): | ||||||
|       # Any image directly referenced by a tag that still exists, cannot be GCed. |       # Find the images to delete. | ||||||
|       direct_referenced = (RepositoryTag.select(RepositoryTag.image).where( |       images_to_gc = (_all_images_for_gc(repo) if is_purge | ||||||
|         RepositoryTag.repository == repo.id, RepositoryTag.image << candidates_orphans)) |                       else _filter_to_unreferenced(repo, candidates_orphans)) | ||||||
| 
 | 
 | ||||||
|       # Any image which is the parent of another image, cannot be GCed. |       # Make sure we are making progress. | ||||||
|       parent_referenced = (Image.select(Image.parent).where(Image.repository == repo.id, |       image_ids_to_remove = [candidate.id for candidate in images_to_gc] | ||||||
|                                                             Image.parent << candidates_orphans)) |  | ||||||
| 
 |  | ||||||
|       referenced_candidates = (direct_referenced | parent_referenced) |  | ||||||
| 
 |  | ||||||
|       # We desire a few pieces of information from the database from the following |  | ||||||
|       # query: all of the image ids which are associated with this repository, |  | ||||||
|       # and the storages which are associated with those images. |  | ||||||
|       unreferenced_candidates = (Image.select(Image.id, Image.docker_image_id, ImageStorage.id, |  | ||||||
|                                               ImageStorage.uuid).join(ImageStorage) |  | ||||||
|                                  .where(Image.id << candidates_orphans, |  | ||||||
|                                         ~(Image.id << referenced_candidates))) |  | ||||||
| 
 |  | ||||||
|       image_ids_to_remove = [candidate.id for candidate in unreferenced_candidates] |  | ||||||
|       making_progress = bool(len(image_ids_to_remove)) |       making_progress = bool(len(image_ids_to_remove)) | ||||||
|       if len(image_ids_to_remove) == 0: |       if len(image_ids_to_remove) == 0: | ||||||
|         # No more candidates to remove. |         # No more images to remove. | ||||||
|         break |         break | ||||||
| 
 | 
 | ||||||
|       logger.info('Cleaning up unreferenced images: %s', image_ids_to_remove) |       logger.info('Cleaning up unreferenced images: %s', image_ids_to_remove) | ||||||
|       storage_id_whitelist = set([candidate.storage_id for candidate in unreferenced_candidates]) |       storage_id_whitelist = set([candidate.storage_id for candidate in images_to_gc]) | ||||||
| 
 | 
 | ||||||
|       # Lookup any derived images for the images to remove. |       # Lookup any derived images for the images to remove. | ||||||
|       derived = DerivedStorageForImage.select().where(DerivedStorageForImage.source_image << |       derived = DerivedStorageForImage.select().where(DerivedStorageForImage.source_image << | ||||||
|  | @ -246,7 +273,7 @@ def garbage_collect_repo(repo, extra_candidate_set=None): | ||||||
|     # Add the images to the removed set and remove them from the candidate set. |     # Add the images to the removed set and remove them from the candidate set. | ||||||
|     all_images_removed.update(image_ids_to_remove) |     all_images_removed.update(image_ids_to_remove) | ||||||
|     all_storage_id_whitelist.update(storage_id_whitelist) |     all_storage_id_whitelist.update(storage_id_whitelist) | ||||||
|     all_unreferenced_candidates.update(unreferenced_candidates) |     all_unreferenced_candidates.update(images_to_gc) | ||||||
| 
 | 
 | ||||||
|     candidate_orphan_image_set.difference_update(image_ids_to_remove) |     candidate_orphan_image_set.difference_update(image_ids_to_remove) | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Reference in a new issue