Switch tag deletion to use a single query
This commit is contained in:
parent
79101c1055
commit
acd86008c8
2 changed files with 60 additions and 39 deletions
|
@ -7,7 +7,8 @@ from data.model import (DataModelException, tag, db_transaction, storage, image,
|
||||||
_basequery)
|
_basequery)
|
||||||
from data.database import (Repository, Namespace, RepositoryTag, Star, Image, ImageStorage, User,
|
from data.database import (Repository, Namespace, RepositoryTag, Star, Image, ImageStorage, User,
|
||||||
Visibility, RepositoryPermission, TupleSelector, RepositoryActionCount,
|
Visibility, RepositoryPermission, TupleSelector, RepositoryActionCount,
|
||||||
Role, RepositoryAuthorizedEmail, db_for_update)
|
Role, RepositoryAuthorizedEmail, db_for_update, get_epoch_timestamp,
|
||||||
|
db_random_func)
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
@ -57,40 +58,67 @@ def purge_repository(namespace_name, repository_name):
|
||||||
fetched.delete_instance(recursive=True, delete_nullable=False)
|
fetched.delete_instance(recursive=True, delete_nullable=False)
|
||||||
|
|
||||||
|
|
||||||
def garbage_collect_repository(namespace_name, repository_name):
|
def find_repository_with_garbage():
|
||||||
storage_id_whitelist = {}
|
epoch_timestamp = get_epoch_timestamp()
|
||||||
|
|
||||||
tag.garbage_collect_tags(namespace_name, repository_name)
|
try:
|
||||||
|
candidates = (RepositoryTag
|
||||||
with db_transaction():
|
.select(RepositoryTag.repository)
|
||||||
# TODO (jake): We could probably select this and all the images in a single query using
|
|
||||||
# a different kind of join.
|
|
||||||
|
|
||||||
# Get a list of all images used by tags in the repository
|
|
||||||
tag_query = (RepositoryTag
|
|
||||||
.select(RepositoryTag, Image, ImageStorage)
|
|
||||||
.join(Image)
|
|
||||||
.join(ImageStorage, JOIN_LEFT_OUTER)
|
|
||||||
.switch(RepositoryTag)
|
|
||||||
.join(Repository)
|
.join(Repository)
|
||||||
.join(Namespace, on=(Repository.namespace_user == Namespace.id))
|
.join(Namespace, on=(Repository.namespace_user == Namespace.id))
|
||||||
.where(Repository.name == repository_name, Namespace.username == namespace_name))
|
.where(~(RepositoryTag.lifetime_end_ts >> None),
|
||||||
|
(RepositoryTag.lifetime_end_ts <=
|
||||||
|
(epoch_timestamp - Namespace.removed_tag_expiration_s)))
|
||||||
|
.limit(500)
|
||||||
|
.alias('candidates'))
|
||||||
|
|
||||||
referenced_ancestors = set()
|
found = (RepositoryTag
|
||||||
for one_tag in tag_query:
|
.select(candidates.c.repository)
|
||||||
# The ancestor list is in the format '/1/2/3/', extract just the ids
|
.from_(candidates)
|
||||||
ancestor_id_strings = one_tag.image.ancestors.split('/')[1:-1]
|
.order_by(db_random_func())
|
||||||
ancestor_list = [int(img_id_str) for img_id_str in ancestor_id_strings]
|
.get())
|
||||||
referenced_ancestors = referenced_ancestors.union(set(ancestor_list))
|
if not found:
|
||||||
referenced_ancestors.add(one_tag.image.id)
|
return
|
||||||
|
|
||||||
all_repo_images = image.get_repository_images(namespace_name, repository_name)
|
return Repository.get(Repository.id == found)
|
||||||
|
|
||||||
|
except RepositoryTag.DoesNotExist:
|
||||||
|
return None
|
||||||
|
except Repository.DoesNotExist:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def garbage_collect_repository(namespace_name, repository_name):
|
||||||
|
repo = get_repository(namespace_name, repository_name)
|
||||||
|
garbage_collect_repo(repo)
|
||||||
|
|
||||||
|
|
||||||
|
def garbage_collect_repo(repo):
|
||||||
|
storage_id_whitelist = {}
|
||||||
|
tag.garbage_collect_tags(repo)
|
||||||
|
|
||||||
|
with db_transaction():
|
||||||
|
# Get a list of all images used by tags in the repository
|
||||||
|
tagged_images = (Image
|
||||||
|
.select(Image.id, Image.ancestors)
|
||||||
|
.join(RepositoryTag)
|
||||||
|
.where(Image.repository == repo))
|
||||||
|
|
||||||
|
referenced_anscestors = set()
|
||||||
|
for tagged_image in tagged_images:
|
||||||
|
# The anscestor list is in the format '/1/2/3/', extract just the ids
|
||||||
|
anscestor_id_strings = tagged_image.ancestors.split('/')[1:-1]
|
||||||
|
ancestor_list = [int(img_id_str) for img_id_str in anscestor_id_strings]
|
||||||
|
referenced_anscestors = referenced_anscestors.union(set(ancestor_list))
|
||||||
|
referenced_anscestors.add(tagged_image.id)
|
||||||
|
|
||||||
|
all_repo_images = Image.select(Image.id, Image.storage).where(Image.repository == repo)
|
||||||
all_images = {int(img.id): img for img in all_repo_images}
|
all_images = {int(img.id): img for img in all_repo_images}
|
||||||
to_remove = set(all_images.keys()).difference(referenced_ancestors)
|
to_remove = set(all_images.keys()).difference(referenced_anscestors)
|
||||||
|
|
||||||
if len(to_remove) > 0:
|
if len(to_remove) > 0:
|
||||||
logger.info('Cleaning up unreferenced images: %s', to_remove)
|
logger.info('Cleaning up unreferenced images: %s', to_remove)
|
||||||
storage_id_whitelist = {all_images[to_remove_id].storage.id for to_remove_id in to_remove}
|
storage_id_whitelist = {all_images[to_remove_id].storage_id for to_remove_id in to_remove}
|
||||||
Image.delete().where(Image.id << list(to_remove)).execute()
|
Image.delete().where(Image.id << list(to_remove)).execute()
|
||||||
|
|
||||||
if len(to_remove) > 0:
|
if len(to_remove) > 0:
|
||||||
|
|
|
@ -97,21 +97,14 @@ def delete_tag(namespace_name, repository_name, tag_name):
|
||||||
found.save()
|
found.save()
|
||||||
|
|
||||||
|
|
||||||
def garbage_collect_tags(namespace_name, repository_name):
|
def garbage_collect_tags(repo):
|
||||||
# We do this without using a join to prevent holding read locks on the repository table
|
|
||||||
repo = _basequery.get_existing_repository(namespace_name, repository_name)
|
|
||||||
expired_time = get_epoch_timestamp() - repo.namespace_user.removed_tag_expiration_s
|
expired_time = get_epoch_timestamp() - repo.namespace_user.removed_tag_expiration_s
|
||||||
|
|
||||||
tags_to_delete = list(RepositoryTag
|
(RepositoryTag
|
||||||
.select(RepositoryTag.id)
|
.delete()
|
||||||
.where(RepositoryTag.repository == repo,
|
.where(RepositoryTag.repository == repo,
|
||||||
~(RepositoryTag.lifetime_end_ts >> None),
|
~(RepositoryTag.lifetime_end_ts >> None),
|
||||||
(RepositoryTag.lifetime_end_ts <= expired_time))
|
(RepositoryTag.lifetime_end_ts <= expired_time))
|
||||||
.order_by(RepositoryTag.id))
|
|
||||||
if len(tags_to_delete) > 0:
|
|
||||||
(RepositoryTag
|
|
||||||
.delete()
|
|
||||||
.where(RepositoryTag.id << tags_to_delete)
|
|
||||||
.execute())
|
.execute())
|
||||||
|
|
||||||
|
|
||||||
|
|
Reference in a new issue