Try not to throw any sets of data away when computing images to garbage collect.
This commit is contained in:
parent
c7e464ddf2
commit
e133ea0962
1 changed files with 20 additions and 11 deletions
|
@ -119,21 +119,30 @@ def garbage_collect_repo(repo):
|
||||||
.join(RepositoryTag)
|
.join(RepositoryTag)
|
||||||
.where(Image.repository == repo))
|
.where(Image.repository == repo))
|
||||||
|
|
||||||
referenced_ancestors = set()
|
def gen_referenced_ancestors():
|
||||||
for tagged_image in tagged_images:
|
for tagged_image in tagged_images:
|
||||||
# The ancestor list is in the format '/1/2/3/', extract just the ids
|
# The ancestor list is in the format '/1/2/3/', extract just the ids
|
||||||
ancestor_id_strings = tagged_image.ancestors.split('/')[1:-1]
|
ancestor_id_strings = tagged_image.ancestors.split('/')[1:-1]
|
||||||
ancestor_list = [int(img_id_str) for img_id_str in ancestor_id_strings]
|
for img_id_str in ancestor_id_strings:
|
||||||
referenced_ancestors = referenced_ancestors.union(set(ancestor_list))
|
yield int(img_id_str)
|
||||||
referenced_ancestors.add(tagged_image.id)
|
yield tagged_image.id
|
||||||
|
|
||||||
all_repo_images = Image.select(Image.id, Image.storage).where(Image.repository == repo)
|
referenced_ancestors = set(gen_referenced_ancestors())
|
||||||
all_images = {int(img.id): img for img in all_repo_images}
|
|
||||||
to_remove = set(all_images.keys()).difference(referenced_ancestors)
|
# We desire two pieces of information from the database from the following
|
||||||
|
# query: all of the image ids which are associated with this repository,
|
||||||
|
# and the storages which are associated with those images. In order to
|
||||||
|
# fetch just this information, and bypass all of the peewee model parsing
|
||||||
|
# code, which is overkill for just two fields, we use a tuple query, and
|
||||||
|
# feed that directly to the dictionary tuple constructor which takes an
|
||||||
|
# iterable of tuples containing [(k, v), (k, v), ...]
|
||||||
|
all_repo_images = Image.select(Image.id, Image.storage).where(Image.repository == repo).tuples()
|
||||||
|
images_to_storages = dict(all_repo_images)
|
||||||
|
to_remove = set(images_to_storages.keys()).difference(referenced_ancestors)
|
||||||
|
|
||||||
if len(to_remove) > 0:
|
if len(to_remove) > 0:
|
||||||
logger.info('Cleaning up unreferenced images: %s', to_remove)
|
logger.info('Cleaning up unreferenced images: %s', to_remove)
|
||||||
storage_id_whitelist = {all_images[to_remove_id].storage_id for to_remove_id in to_remove}
|
storage_id_whitelist = {images_to_storages[to_remove_id] for to_remove_id in to_remove}
|
||||||
Image.delete().where(Image.id << list(to_remove)).execute()
|
Image.delete().where(Image.id << list(to_remove)).execute()
|
||||||
|
|
||||||
if len(to_remove) > 0:
|
if len(to_remove) > 0:
|
||||||
|
|
Reference in a new issue