Try not to throw any sets of data away when computing images to garbage collect.
This commit is contained in:
parent
c7e464ddf2
commit
e133ea0962
1 changed files with 20 additions and 11 deletions
|
@ -119,21 +119,30 @@ def garbage_collect_repo(repo):
|
|||
.join(RepositoryTag)
|
||||
.where(Image.repository == repo))
|
||||
|
||||
referenced_ancestors = set()
|
||||
for tagged_image in tagged_images:
|
||||
# The ancestor list is in the format '/1/2/3/', extract just the ids
|
||||
ancestor_id_strings = tagged_image.ancestors.split('/')[1:-1]
|
||||
ancestor_list = [int(img_id_str) for img_id_str in ancestor_id_strings]
|
||||
referenced_ancestors = referenced_ancestors.union(set(ancestor_list))
|
||||
referenced_ancestors.add(tagged_image.id)
|
||||
def gen_referenced_ancestors():
|
||||
for tagged_image in tagged_images:
|
||||
# The ancestor list is in the format '/1/2/3/', extract just the ids
|
||||
ancestor_id_strings = tagged_image.ancestors.split('/')[1:-1]
|
||||
for img_id_str in ancestor_id_strings:
|
||||
yield int(img_id_str)
|
||||
yield tagged_image.id
|
||||
|
||||
all_repo_images = Image.select(Image.id, Image.storage).where(Image.repository == repo)
|
||||
all_images = {int(img.id): img for img in all_repo_images}
|
||||
to_remove = set(all_images.keys()).difference(referenced_ancestors)
|
||||
referenced_ancestors = set(gen_referenced_ancestors())
|
||||
|
||||
# We desire two pieces of information from the database from the following
|
||||
# query: all of the image ids which are associated with this repository,
|
||||
# and the storages which are associated with those images. In order to
|
||||
# fetch just this information, and bypass all of the peewee model parsing
|
||||
# code, which is overkill for just two fields, we use a tuple query, and
|
||||
# feed that directly to the dictionary tuple constructor which takes an
|
||||
# iterable of tuples containing [(k, v), (k, v), ...]
|
||||
all_repo_images = Image.select(Image.id, Image.storage).where(Image.repository == repo).tuples()
|
||||
images_to_storages = dict(all_repo_images)
|
||||
to_remove = set(images_to_storages.keys()).difference(referenced_ancestors)
|
||||
|
||||
if len(to_remove) > 0:
|
||||
logger.info('Cleaning up unreferenced images: %s', to_remove)
|
||||
storage_id_whitelist = {all_images[to_remove_id].storage_id for to_remove_id in to_remove}
|
||||
storage_id_whitelist = {images_to_storages[to_remove_id] for to_remove_id in to_remove}
|
||||
Image.delete().where(Image.id << list(to_remove)).execute()
|
||||
|
||||
if len(to_remove) > 0:
|
||||
|
|
Reference in a new issue