Reduce database bandwidth by tracking gc candidate images.

This commit is contained in:
Jake Moshenko 2016-08-26 14:48:39 -04:00
parent 0815f6b6c4
commit 584a5a7ddd
5 changed files with 161 additions and 107 deletions

View file

@ -138,54 +138,86 @@ def delete_tag(namespace_name, repository_name, tag_name):
def garbage_collect_tags(repo):
expired_time = get_epoch_timestamp() - repo.namespace_user.removed_tag_expiration_s
""" Remove all of the tags that have gone past their garbage collection
expiration window, and return a set of image ids which *may* have been
orphaned.
"""
def add_expiration_data(base_query):
expired_clause = get_epoch_timestamp() - Namespace.removed_tag_expiration_s
return (base_query
.switch(RepositoryTag)
.join(Repository)
.join(Namespace, on=(Repository.namespace_user == Namespace.id))
.where(~(RepositoryTag.lifetime_end_ts >> None),
RepositoryTag.lifetime_end_ts <= expired_clause))
return _delete_tags(repo, add_expiration_data)
tags_to_delete = list(RepositoryTag
.select(RepositoryTag.id)
.where(RepositoryTag.repository == repo,
~(RepositoryTag.lifetime_end_ts >> None),
(RepositoryTag.lifetime_end_ts <= expired_time))
.order_by(RepositoryTag.id))
def purge_all_tags(repo):
""" Remove all tags from the repository, and return a set of all of the images
ids which are now orphaned.
"""
return _delete_tags(repo)
if len(tags_to_delete) > 0:
with db_transaction():
manifests_to_delete = list(TagManifest
.select(TagManifest.id)
.join(RepositoryTag)
.where(RepositoryTag.id << tags_to_delete))
def _delete_tags(repo, query_modifier=None):
""" Garbage collect the tags for a repository and return a set of the image
ids which may now be orphaned.
"""
tags_to_delete_q = (RepositoryTag
.select(RepositoryTag.id, Image.ancestors, Image.id)
.join(Image)
.where(RepositoryTag.repository == repo))
num_deleted_manifests = 0
if len(manifests_to_delete) > 0:
# Find the set of IDs for all the labels to delete.
manifest_labels_query = (TagManifestLabel
.select()
.where(TagManifestLabel.repository == repo,
TagManifestLabel.annotated << manifests_to_delete))
if query_modifier is not None:
tags_to_delete_q = query_modifier(tags_to_delete_q)
label_ids = [manifest_label.label_id for manifest_label in manifest_labels_query]
if label_ids:
# Delete all the mapping entries.
(TagManifestLabel
.delete()
.where(TagManifestLabel.repository == repo,
TagManifestLabel.annotated << manifests_to_delete)
.execute())
tags_to_delete = list(tags_to_delete_q)
# Delete all the matching labels.
Label.delete().where(Label.id << label_ids).execute()
if len(tags_to_delete) == 0:
return set()
# Delete the tag manifests themselves.
num_deleted_manifests = (TagManifest
.delete()
.where(TagManifest.id << manifests_to_delete)
.execute())
with db_transaction():
manifests_to_delete = list(TagManifest
.select(TagManifest.id)
.join(RepositoryTag)
.where(RepositoryTag.id << tags_to_delete))
num_deleted_tags = (RepositoryTag
.delete()
.where(RepositoryTag.id << tags_to_delete)
.execute())
num_deleted_manifests = 0
if len(manifests_to_delete) > 0:
# Find the set of IDs for all the labels to delete.
manifest_labels_query = (TagManifestLabel
.select()
.where(TagManifestLabel.repository == repo,
TagManifestLabel.annotated << manifests_to_delete))
logger.debug('Removed %s tags with %s manifests', num_deleted_tags, num_deleted_manifests)
label_ids = [manifest_label.label_id for manifest_label in manifest_labels_query]
if label_ids:
# Delete all the mapping entries.
(TagManifestLabel
.delete()
.where(TagManifestLabel.repository == repo,
TagManifestLabel.annotated << manifests_to_delete)
.execute())
# Delete all the matching labels.
Label.delete().where(Label.id << label_ids).execute()
num_deleted_manifests = (TagManifest
.delete()
.where(TagManifest.id << manifests_to_delete)
.execute())
num_deleted_tags = (RepositoryTag
.delete()
.where(RepositoryTag.id << tags_to_delete)
.execute())
logger.debug('Removed %s tags with %s manifests', num_deleted_tags, num_deleted_manifests)
ancestors = reduce(lambda r, l: r | l,
(set(tag.image.ancestor_id_list()) for tag in tags_to_delete))
direct_referenced = {tag.image.id for tag in tags_to_delete}
return ancestors | direct_referenced
def _get_repo_tag_image(tag_name, include_storage, modifier):