Make GC of repositories fully async for whitelisted namespaces
This change adds a worker to conduct GC on repositories with garbage every 10s. Fixes #144
This commit is contained in:
parent
acd86008c8
commit
70de107268
6 changed files with 111 additions and 10 deletions
|
@ -4,7 +4,7 @@ from peewee import JOIN_LEFT_OUTER, fn
|
|||
from datetime import timedelta, datetime
|
||||
|
||||
from data.model import (DataModelException, tag, db_transaction, storage, image, permission,
|
||||
_basequery)
|
||||
_basequery, config)
|
||||
from data.database import (Repository, Namespace, RepositoryTag, Star, Image, ImageStorage, User,
|
||||
Visibility, RepositoryPermission, TupleSelector, RepositoryActionCount,
|
||||
Role, RepositoryAuthorizedEmail, db_for_update, get_epoch_timestamp,
|
||||
|
@ -58,7 +58,11 @@ def purge_repository(namespace_name, repository_name):
|
|||
fetched.delete_instance(recursive=True, delete_nullable=False)
|
||||
|
||||
|
||||
def find_repository_with_garbage():
|
||||
def find_repository_with_garbage(filter_list=None):
|
||||
# TODO(jschorr): Remove the filter once we have turned the experiment on for everyone.
|
||||
if filter_list is not None and not filter_list:
|
||||
return None
|
||||
|
||||
epoch_timestamp = get_epoch_timestamp()
|
||||
|
||||
try:
|
||||
|
@ -72,16 +76,19 @@ def find_repository_with_garbage():
|
|||
.limit(500)
|
||||
.alias('candidates'))
|
||||
|
||||
if filter_list:
|
||||
candidates = candidates.where(Namespace.username << filter_list)
|
||||
|
||||
found = (RepositoryTag
|
||||
.select(candidates.c.repository)
|
||||
.select(candidates.c.repository_id)
|
||||
.from_(candidates)
|
||||
.order_by(db_random_func())
|
||||
.get())
|
||||
if not found:
|
||||
|
||||
if found is None:
|
||||
return
|
||||
|
||||
return Repository.get(Repository.id == found)
|
||||
|
||||
return Repository.get(Repository.id == found.repository_id)
|
||||
except RepositoryTag.DoesNotExist:
|
||||
return None
|
||||
except Repository.DoesNotExist:
|
||||
|
@ -89,11 +96,19 @@ def find_repository_with_garbage():
|
|||
|
||||
|
||||
def garbage_collect_repository(namespace_name, repository_name):
|
||||
# If the namespace is the async experiment, don't perform garbage collection here.
|
||||
# TODO(jschorr): Remove this check once we have turned the experiment on for everyone.
|
||||
if namespace_name in config.app_config.get('EXP_ASYNC_GARBAGE_COLLECTION', []):
|
||||
return
|
||||
|
||||
repo = get_repository(namespace_name, repository_name)
|
||||
garbage_collect_repo(repo)
|
||||
if repo is not None:
|
||||
garbage_collect_repo(repo)
|
||||
|
||||
|
||||
def garbage_collect_repo(repo):
|
||||
logger.debug('Garbage collecting repository %s', repo.id)
|
||||
|
||||
storage_id_whitelist = {}
|
||||
tag.garbage_collect_tags(repo)
|
||||
|
||||
|
|
Reference in a new issue