Get rid of remaining slow query for garbage collection.
This commit is contained in:
parent
b0bffe56ca
commit
05e2773fa7
3 changed files with 33 additions and 16 deletions
|
@ -1,14 +1,16 @@
|
|||
import logging
|
||||
import random
|
||||
|
||||
from peewee import JOIN_LEFT_OUTER, fn
|
||||
from datetime import timedelta, datetime
|
||||
from peewee import JOIN_LEFT_OUTER, fn
|
||||
from cachetools import ttl_cache
|
||||
|
||||
from data.model import (DataModelException, tag, db_transaction, storage, permission,
|
||||
_basequery, config)
|
||||
_basequery)
|
||||
from data.database import (Repository, Namespace, RepositoryTag, Star, Image, User,
|
||||
Visibility, RepositoryPermission, TupleSelector, RepositoryActionCount,
|
||||
Visibility, RepositoryPermission, RepositoryActionCount,
|
||||
Role, RepositoryAuthorizedEmail, TagManifest, DerivedStorageForImage,
|
||||
db_for_update, get_epoch_timestamp, db_random_func)
|
||||
get_epoch_timestamp, db_random_func)
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -71,8 +73,24 @@ def purge_repository(namespace_name, repository_name):
|
|||
fetched.delete_instance(recursive=True, delete_nullable=False)
|
||||
|
||||
|
||||
def find_repository_with_garbage():
|
||||
epoch_timestamp = get_epoch_timestamp()
|
||||
@ttl_cache(maxsize=1, ttl=600)
|
||||
def _get_gc_expiration_policies():
|
||||
policy_tuples_query = (Namespace
|
||||
.select(Namespace.removed_tag_expiration_s)
|
||||
.distinct()
|
||||
.limit(100) # This sucks but it's the only way to limit memory
|
||||
.tuples())
|
||||
return [policy[0] for policy in policy_tuples_query]
|
||||
|
||||
|
||||
def get_random_gc_policy():
|
||||
""" Return a single random policy from the database to use when garbage collecting.
|
||||
"""
|
||||
return random.choice(_get_gc_expiration_policies())
|
||||
|
||||
|
||||
def find_repository_with_garbage(limit_to_gc_policy_s):
|
||||
expiration_timestamp = get_epoch_timestamp() - limit_to_gc_policy_s
|
||||
|
||||
try:
|
||||
candidates = (RepositoryTag
|
||||
|
@ -80,8 +98,8 @@ def find_repository_with_garbage():
|
|||
.join(Repository)
|
||||
.join(Namespace, on=(Repository.namespace_user == Namespace.id))
|
||||
.where(~(RepositoryTag.lifetime_end_ts >> None),
|
||||
(RepositoryTag.lifetime_end_ts <=
|
||||
(epoch_timestamp - Namespace.removed_tag_expiration_s)))
|
||||
(RepositoryTag.lifetime_end_ts <= expiration_timestamp),
|
||||
(Namespace.removed_tag_expiration_s == limit_to_gc_policy_s))
|
||||
.limit(500)
|
||||
.distinct()
|
||||
.alias('candidates'))
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
import unittest
|
||||
import time
|
||||
|
||||
from peewee import fn, JOIN_LEFT_OUTER
|
||||
|
||||
from app import app, storage
|
||||
from initdb import setup_database_for_testing, finished_database_for_testing
|
||||
from data import model, database
|
||||
|
@ -166,13 +164,13 @@ class TestGarbageCollection(unittest.TestCase):
|
|||
repository = self.createRepository(latest=['i1', 'i2', 'i3'])
|
||||
|
||||
# Ensure that no repositories are returned by the has garbage check.
|
||||
self.assertIsNone(model.repository.find_repository_with_garbage())
|
||||
self.assertIsNone(model.repository.find_repository_with_garbage(1000000000))
|
||||
|
||||
# Delete a tag.
|
||||
self.deleteTag(repository, 'latest', perform_gc=False)
|
||||
|
||||
# There should still not be any repositories with garbage, due to time machine.
|
||||
self.assertIsNone(model.repository.find_repository_with_garbage())
|
||||
self.assertIsNone(model.repository.find_repository_with_garbage(1000000000))
|
||||
|
||||
# Change the time machine expiration on the namespace.
|
||||
(database.User.update(removed_tag_expiration_s=0)
|
||||
|
@ -180,7 +178,7 @@ class TestGarbageCollection(unittest.TestCase):
|
|||
.execute())
|
||||
|
||||
# Now we should find the repository for GC.
|
||||
repository = model.repository.find_repository_with_garbage()
|
||||
repository = model.repository.find_repository_with_garbage(0)
|
||||
self.assertIsNotNone(repository)
|
||||
self.assertEquals(REPO, repository.name)
|
||||
|
||||
|
@ -188,7 +186,7 @@ class TestGarbageCollection(unittest.TestCase):
|
|||
model.repository.garbage_collect_repository(repository.namespace_user.username, repository.name)
|
||||
|
||||
# There should now be no repositories with garbage.
|
||||
self.assertIsNone(model.repository.find_repository_with_garbage())
|
||||
self.assertIsNone(model.repository.find_repository_with_garbage(0))
|
||||
|
||||
|
||||
def test_one_tag(self):
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
import logging
|
||||
|
||||
from app import app
|
||||
from data.model.repository import find_repository_with_garbage, garbage_collect_repo
|
||||
from data.model.repository import (find_repository_with_garbage, garbage_collect_repo,
|
||||
|
||||
from workers.worker import Worker
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -14,7 +15,7 @@ class GarbageCollectionWorker(Worker):
|
|||
|
||||
def _garbage_collection_repos(self):
|
||||
""" Performs garbage collection on repositories. """
|
||||
repository = find_repository_with_garbage()
|
||||
repository = find_repository_with_garbage(get_random_gc_policy())
|
||||
if repository is None:
|
||||
logger.debug('No repository with garbage found')
|
||||
return
|
||||
|
|
Reference in a new issue