Merge pull request #295 from coreos-inc/lessmem

Try not to throw any sets of data away when computing images to garba…
This commit is contained in:
Jimmy Zelinskie 2015-07-31 16:27:39 -04:00
commit 74caca3316

View file

@ -119,21 +119,30 @@ def garbage_collect_repo(repo):
.join(RepositoryTag) .join(RepositoryTag)
.where(Image.repository == repo)) .where(Image.repository == repo))
referenced_ancestors = set() def gen_referenced_ancestors():
for tagged_image in tagged_images: for tagged_image in tagged_images:
# The ancestor list is in the format '/1/2/3/', extract just the ids # The ancestor list is in the format '/1/2/3/', extract just the ids
ancestor_id_strings = tagged_image.ancestors.split('/')[1:-1] ancestor_id_strings = tagged_image.ancestors.split('/')[1:-1]
ancestor_list = [int(img_id_str) for img_id_str in ancestor_id_strings] for img_id_str in ancestor_id_strings:
referenced_ancestors = referenced_ancestors.union(set(ancestor_list)) yield int(img_id_str)
referenced_ancestors.add(tagged_image.id) yield tagged_image.id
all_repo_images = Image.select(Image.id, Image.storage).where(Image.repository == repo) referenced_ancestors = set(gen_referenced_ancestors())
all_images = {int(img.id): img for img in all_repo_images}
to_remove = set(all_images.keys()).difference(referenced_ancestors) # We desire two pieces of information from the database from the following
# query: all of the image ids which are associated with this repository,
# and the storages which are associated with those images. In order to
# fetch just this information, and bypass all of the peewee model parsing
# code, which is overkill for just two fields, we use a tuple query, and
# feed that directly to the dictionary tuple constructor which takes an
# iterable of tuples containing [(k, v), (k, v), ...]
all_repo_images = Image.select(Image.id, Image.storage).where(Image.repository == repo).tuples()
images_to_storages = dict(all_repo_images)
to_remove = set(images_to_storages.keys()).difference(referenced_ancestors)
if len(to_remove) > 0: if len(to_remove) > 0:
logger.info('Cleaning up unreferenced images: %s', to_remove) logger.info('Cleaning up unreferenced images: %s', to_remove)
storage_id_whitelist = {all_images[to_remove_id].storage_id for to_remove_id in to_remove} storage_id_whitelist = {images_to_storages[to_remove_id] for to_remove_id in to_remove}
Image.delete().where(Image.id << list(to_remove)).execute() Image.delete().where(Image.id << list(to_remove)).execute()
if len(to_remove) > 0: if len(to_remove) > 0: