Get rid of remaining slow query for garbage collection.

This commit is contained in:
Jake Moshenko 2016-08-01 18:22:38 -04:00
parent b0bffe56ca
commit 05e2773fa7
3 changed files with 33 additions and 16 deletions

View file

@ -1,14 +1,16 @@
import logging
import random
from peewee import JOIN_LEFT_OUTER, fn
from datetime import timedelta, datetime
from peewee import JOIN_LEFT_OUTER, fn
from cachetools import ttl_cache
from data.model import (DataModelException, tag, db_transaction, storage, permission,
_basequery, config)
_basequery)
from data.database import (Repository, Namespace, RepositoryTag, Star, Image, User,
Visibility, RepositoryPermission, TupleSelector, RepositoryActionCount,
Visibility, RepositoryPermission, RepositoryActionCount,
Role, RepositoryAuthorizedEmail, TagManifest, DerivedStorageForImage,
db_for_update, get_epoch_timestamp, db_random_func)
get_epoch_timestamp, db_random_func)
logger = logging.getLogger(__name__)
@ -71,8 +73,24 @@ def purge_repository(namespace_name, repository_name):
fetched.delete_instance(recursive=True, delete_nullable=False)
def find_repository_with_garbage():
epoch_timestamp = get_epoch_timestamp()
@ttl_cache(maxsize=1, ttl=600)
def _get_gc_expiration_policies():
policy_tuples_query = (Namespace
.select(Namespace.removed_tag_expiration_s)
.distinct()
.limit(100) # This sucks but it's the only way to limit memory
.tuples())
return [policy[0] for policy in policy_tuples_query]
def get_random_gc_policy():
""" Return a single random policy from the database to use when garbage collecting.
"""
return random.choice(_get_gc_expiration_policies())
def find_repository_with_garbage(limit_to_gc_policy_s):
expiration_timestamp = get_epoch_timestamp() - limit_to_gc_policy_s
try:
candidates = (RepositoryTag
@ -80,8 +98,8 @@ def find_repository_with_garbage():
.join(Repository)
.join(Namespace, on=(Repository.namespace_user == Namespace.id))
.where(~(RepositoryTag.lifetime_end_ts >> None),
(RepositoryTag.lifetime_end_ts <=
(epoch_timestamp - Namespace.removed_tag_expiration_s)))
(RepositoryTag.lifetime_end_ts <= expiration_timestamp),
(Namespace.removed_tag_expiration_s == limit_to_gc_policy_s))
.limit(500)
.distinct()
.alias('candidates'))