From 4e5c8a9281d7aaa869f21303a63c4c5da7b1912b Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Tue, 20 Oct 2015 18:13:29 -0400 Subject: [PATCH] Reduce GC work time and make sure to use distinct query --- data/model/repository.py | 1 + workers/gcworker.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/data/model/repository.py b/data/model/repository.py index be94ccac2..fc6b2327e 100644 --- a/data/model/repository.py +++ b/data/model/repository.py @@ -74,6 +74,7 @@ def find_repository_with_garbage(): (RepositoryTag.lifetime_end_ts <= (epoch_timestamp - Namespace.removed_tag_expiration_s))) .limit(500) + .distinct() .alias('candidates')) found = (RepositoryTag diff --git a/workers/gcworker.py b/workers/gcworker.py index 559095ebc..1470a4e7d 100644 --- a/workers/gcworker.py +++ b/workers/gcworker.py @@ -10,7 +10,7 @@ logger = logging.getLogger(__name__) class GarbageCollectionWorker(Worker): def __init__(self): super(GarbageCollectionWorker, self).__init__() - self.add_operation(self._garbage_collection_repos, 10) + self.add_operation(self._garbage_collection_repos, 30) def _garbage_collection_repos(self): """ Performs garbage collection on repositories. """