Get rid of remaining slow query for garbage collection.
This commit is contained in:
parent
b0bffe56ca
commit
05e2773fa7
3 changed files with 33 additions and 16 deletions
|
@ -1,14 +1,16 @@
|
||||||
import logging
|
import logging
|
||||||
|
import random
|
||||||
|
|
||||||
from peewee import JOIN_LEFT_OUTER, fn
|
|
||||||
from datetime import timedelta, datetime
|
from datetime import timedelta, datetime
|
||||||
|
from peewee import JOIN_LEFT_OUTER, fn
|
||||||
|
from cachetools import ttl_cache
|
||||||
|
|
||||||
from data.model import (DataModelException, tag, db_transaction, storage, permission,
|
from data.model import (DataModelException, tag, db_transaction, storage, permission,
|
||||||
_basequery, config)
|
_basequery)
|
||||||
from data.database import (Repository, Namespace, RepositoryTag, Star, Image, User,
|
from data.database import (Repository, Namespace, RepositoryTag, Star, Image, User,
|
||||||
Visibility, RepositoryPermission, TupleSelector, RepositoryActionCount,
|
Visibility, RepositoryPermission, RepositoryActionCount,
|
||||||
Role, RepositoryAuthorizedEmail, TagManifest, DerivedStorageForImage,
|
Role, RepositoryAuthorizedEmail, TagManifest, DerivedStorageForImage,
|
||||||
db_for_update, get_epoch_timestamp, db_random_func)
|
get_epoch_timestamp, db_random_func)
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
@ -71,8 +73,24 @@ def purge_repository(namespace_name, repository_name):
|
||||||
fetched.delete_instance(recursive=True, delete_nullable=False)
|
fetched.delete_instance(recursive=True, delete_nullable=False)
|
||||||
|
|
||||||
|
|
||||||
def find_repository_with_garbage():
|
@ttl_cache(maxsize=1, ttl=600)
|
||||||
epoch_timestamp = get_epoch_timestamp()
|
def _get_gc_expiration_policies():
|
||||||
|
policy_tuples_query = (Namespace
|
||||||
|
.select(Namespace.removed_tag_expiration_s)
|
||||||
|
.distinct()
|
||||||
|
.limit(100) # This sucks but it's the only way to limit memory
|
||||||
|
.tuples())
|
||||||
|
return [policy[0] for policy in policy_tuples_query]
|
||||||
|
|
||||||
|
|
||||||
|
def get_random_gc_policy():
|
||||||
|
""" Return a single random policy from the database to use when garbage collecting.
|
||||||
|
"""
|
||||||
|
return random.choice(_get_gc_expiration_policies())
|
||||||
|
|
||||||
|
|
||||||
|
def find_repository_with_garbage(limit_to_gc_policy_s):
|
||||||
|
expiration_timestamp = get_epoch_timestamp() - limit_to_gc_policy_s
|
||||||
|
|
||||||
try:
|
try:
|
||||||
candidates = (RepositoryTag
|
candidates = (RepositoryTag
|
||||||
|
@ -80,8 +98,8 @@ def find_repository_with_garbage():
|
||||||
.join(Repository)
|
.join(Repository)
|
||||||
.join(Namespace, on=(Repository.namespace_user == Namespace.id))
|
.join(Namespace, on=(Repository.namespace_user == Namespace.id))
|
||||||
.where(~(RepositoryTag.lifetime_end_ts >> None),
|
.where(~(RepositoryTag.lifetime_end_ts >> None),
|
||||||
(RepositoryTag.lifetime_end_ts <=
|
(RepositoryTag.lifetime_end_ts <= expiration_timestamp),
|
||||||
(epoch_timestamp - Namespace.removed_tag_expiration_s)))
|
(Namespace.removed_tag_expiration_s == limit_to_gc_policy_s))
|
||||||
.limit(500)
|
.limit(500)
|
||||||
.distinct()
|
.distinct()
|
||||||
.alias('candidates'))
|
.alias('candidates'))
|
||||||
|
|
|
@ -1,8 +1,6 @@
|
||||||
import unittest
|
import unittest
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from peewee import fn, JOIN_LEFT_OUTER
|
|
||||||
|
|
||||||
from app import app, storage
|
from app import app, storage
|
||||||
from initdb import setup_database_for_testing, finished_database_for_testing
|
from initdb import setup_database_for_testing, finished_database_for_testing
|
||||||
from data import model, database
|
from data import model, database
|
||||||
|
@ -166,13 +164,13 @@ class TestGarbageCollection(unittest.TestCase):
|
||||||
repository = self.createRepository(latest=['i1', 'i2', 'i3'])
|
repository = self.createRepository(latest=['i1', 'i2', 'i3'])
|
||||||
|
|
||||||
# Ensure that no repositories are returned by the has garbage check.
|
# Ensure that no repositories are returned by the has garbage check.
|
||||||
self.assertIsNone(model.repository.find_repository_with_garbage())
|
self.assertIsNone(model.repository.find_repository_with_garbage(1000000000))
|
||||||
|
|
||||||
# Delete a tag.
|
# Delete a tag.
|
||||||
self.deleteTag(repository, 'latest', perform_gc=False)
|
self.deleteTag(repository, 'latest', perform_gc=False)
|
||||||
|
|
||||||
# There should still not be any repositories with garbage, due to time machine.
|
# There should still not be any repositories with garbage, due to time machine.
|
||||||
self.assertIsNone(model.repository.find_repository_with_garbage())
|
self.assertIsNone(model.repository.find_repository_with_garbage(1000000000))
|
||||||
|
|
||||||
# Change the time machine expiration on the namespace.
|
# Change the time machine expiration on the namespace.
|
||||||
(database.User.update(removed_tag_expiration_s=0)
|
(database.User.update(removed_tag_expiration_s=0)
|
||||||
|
@ -180,7 +178,7 @@ class TestGarbageCollection(unittest.TestCase):
|
||||||
.execute())
|
.execute())
|
||||||
|
|
||||||
# Now we should find the repository for GC.
|
# Now we should find the repository for GC.
|
||||||
repository = model.repository.find_repository_with_garbage()
|
repository = model.repository.find_repository_with_garbage(0)
|
||||||
self.assertIsNotNone(repository)
|
self.assertIsNotNone(repository)
|
||||||
self.assertEquals(REPO, repository.name)
|
self.assertEquals(REPO, repository.name)
|
||||||
|
|
||||||
|
@ -188,7 +186,7 @@ class TestGarbageCollection(unittest.TestCase):
|
||||||
model.repository.garbage_collect_repository(repository.namespace_user.username, repository.name)
|
model.repository.garbage_collect_repository(repository.namespace_user.username, repository.name)
|
||||||
|
|
||||||
# There should now be no repositories with garbage.
|
# There should now be no repositories with garbage.
|
||||||
self.assertIsNone(model.repository.find_repository_with_garbage())
|
self.assertIsNone(model.repository.find_repository_with_garbage(0))
|
||||||
|
|
||||||
|
|
||||||
def test_one_tag(self):
|
def test_one_tag(self):
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from app import app
|
from app import app
|
||||||
from data.model.repository import find_repository_with_garbage, garbage_collect_repo
|
from data.model.repository import (find_repository_with_garbage, garbage_collect_repo,
|
||||||
|
|
||||||
from workers.worker import Worker
|
from workers.worker import Worker
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
@ -14,7 +15,7 @@ class GarbageCollectionWorker(Worker):
|
||||||
|
|
||||||
def _garbage_collection_repos(self):
|
def _garbage_collection_repos(self):
|
||||||
""" Performs garbage collection on repositories. """
|
""" Performs garbage collection on repositories. """
|
||||||
repository = find_repository_with_garbage()
|
repository = find_repository_with_garbage(get_random_gc_policy())
|
||||||
if repository is None:
|
if repository is None:
|
||||||
logger.debug('No repository with garbage found')
|
logger.debug('No repository with garbage found')
|
||||||
return
|
return
|
||||||
|
|
Reference in a new issue