Add a cleanup worker for the queue item table

Fixes #784
This commit is contained in:
Joseph Schorr 2015-11-20 16:50:12 -05:00
parent ec492bb683
commit 5746b42c69
3 changed files with 49 additions and 0 deletions

View file

@ -0,0 +1,2 @@
#!/bin/sh
exec logger -i -t queuecleanupworker

View file

@ -0,0 +1,8 @@
#! /bin/bash
echo 'Starting Queue cleanup worker'
cd /
venv/bin/python -m workers.queuecleanupworker 2>&1
echo 'Repository Queue cleanup exited'

View file

@ -0,0 +1,39 @@
import logging
from app import app
from data.database import UseThenDisconnect, QueueItem
from workers.worker import Worker
from datetime import timedelta, datetime
logger = logging.getLogger(__name__)
DELETION_DATE_THRESHOLD = timedelta(days=7)
DELETION_COUNT_THRESHOLD = 50
BATCH_SIZE = 500
QUEUE_CLEANUP_FREQUENCY = app.config.get('QUEUE_CLEANUP_FREQUENCY', 60*60*24)
class QueueCleanupWorker(Worker):
def __init__(self):
super(QueueCleanupWorker, self).__init__()
self.add_operation(self._cleanup_queue, QUEUE_CLEANUP_FREQUENCY)
def _cleanup_queue(self):
""" Performs garbage collection on the queueitem table. """
with UseThenDisconnect(app.config):
while True:
# Find all queue items older than the threshold (typically a week) that have no additional
# retries and delete them.
threshold_ago = datetime.now() - DELETION_DATE_THRESHOLD
to_delete = list(QueueItem.select().where(QueueItem.processing_expires >= threshold_ago,
QueueItem.retries_remaining == 0)
.limit(BATCH_SIZE))
if len(to_delete) < DELETION_COUNT_THRESHOLD:
return
QueueItem.delete().where(QueueItem.id << to_delete).execute()
if __name__ == "__main__":
worker = QueueCleanupWorker()
worker.start()