From b05ebbf2c0e767e671425723f245f0191bd376f7 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Tue, 21 Mar 2017 16:58:36 -0400 Subject: [PATCH 1/2] Have storage replication wait up to 20 minutes before trying again Copying a file can be a long operation, so make this configurable and far above the default 5 minutes --- workers/storagereplication.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/workers/storagereplication.py b/workers/storagereplication.py index be000884d..767daf203 100644 --- a/workers/storagereplication.py +++ b/workers/storagereplication.py @@ -10,6 +10,7 @@ from workers.queueworker import QueueWorker, WorkerUnhealthyException logger = logging.getLogger(__name__) POLL_PERIOD_SECONDS = 10 +RESERVATION_SECONDS = app.config.get('STORAGE_REPLICATION_PROCESSING_SECONDS', 60*20) class StorageReplicationWorker(QueueWorker): def process_queue_item(self, job_details): @@ -97,5 +98,6 @@ if __name__ == "__main__": logger.debug('Starting replication worker') worker = StorageReplicationWorker(image_replication_queue, - poll_period_seconds=POLL_PERIOD_SECONDS) + poll_period_seconds=POLL_PERIOD_SECONDS, + reservation_seconds=RESERVATION_SECONDS) worker.start() From 6ab5b8be45ed964014fe150589218a92cf812654 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Wed, 22 Mar 2017 11:30:49 -0400 Subject: [PATCH 2/2] Have storage replication backfill tool only backfill missing storages Prevents overload of the queue --- util/backfillreplication.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/util/backfillreplication.py b/util/backfillreplication.py index e63cd45fe..a60a6ef89 100644 --- a/util/backfillreplication.py +++ b/util/backfillreplication.py @@ -1,7 +1,9 @@ import logging import features +from app import storage, image_replication_queue from data.database import Image, ImageStorage, Repository, User +from data import model from util.registry.replication import queue_storage_replication def backfill_replication(): @@ -17,9 +19,16 @@ def backfill_replication(): if image.storage.uuid in encountered: continue - print "Enqueueing image storage %s to be replicated" % (image.storage.uuid) - encountered.add(image.storage.uuid) - queue_storage_replication(image.repository.namespace_user.username, image.storage) + namespace = image.repository.namespace_user.username + locations = model.user.get_region_locations(namespace) + locations_required = locations | set(storage.default_locations) + locations_missing = locations_required - set(image.storage.locations) + if locations_missing: + print "Enqueueing image storage %s to be replicated" % (image.storage.uuid) + encountered.add(image.storage.uuid) + + if not image_replication_queue.alive([image.storage.uuid]): + queue_storage_replication(image.repository.namespace_user.username, image.storage) if __name__ == '__main__':