import logging import time import features from app import app, storage, image_replication_queue from data.database import CloseForLongOperation from data import model from workers.queueworker import QueueWorker, WorkerUnhealthyException from util.log import logfile_path logger = logging.getLogger(__name__) POLL_PERIOD_SECONDS = 10 RESERVATION_SECONDS = app.config.get('STORAGE_REPLICATION_PROCESSING_SECONDS', 60*20) class StorageReplicationWorker(QueueWorker): def process_queue_item(self, job_details): storage_uuid = job_details['storage_id'] namespace_id = job_details['namespace_user_id'] logger.debug('Starting replication of image storage %s under namespace %s', storage_uuid, namespace_id) try: namespace = model.user.get_namespace_user_by_user_id(namespace_id) except model.user.InvalidUsernameException: logger.exception('Exception when looking up namespace %s for replication of image storage %s', namespace_id, storage_uuid) return succeeded = self.replicate_storage(namespace, storage_uuid) logger.debug('Replication finished of image storage %s under namespace %s: %s', storage_uuid, namespace_id, succeeded) if not succeeded: raise WorkerUnhealthyException() def replicate_storage(self, namespace, storage_uuid): # Lookup the namespace and its associated regions. if not namespace: logger.debug('Unknown namespace when trying to replicate storage %s', storage_uuid) return True locations = model.user.get_region_locations(namespace) # Lookup the image storage. try: partial_storage = model.storage.get_storage_by_uuid(storage_uuid) except model.InvalidImageException: logger.debug('Unknown storage: %s', storage_uuid) return True # Check to see if the image is at all the required locations. locations_required = locations | set(storage.default_locations) locations_missing = locations_required - set(partial_storage.locations) logger.debug('For replication of storage %s under namespace %s: %s required; %s missing', storage_uuid, namespace.username, locations_required, locations_missing) if not locations_missing: logger.debug('No missing locations for storage %s under namespace %s. Required: %s', storage_uuid, namespace.username, locations_required) return True # For any missing storage locations, initiate a copy. existing_location = list(partial_storage.locations)[0] path_to_copy = model.storage.get_layer_path(partial_storage) # Lookup the existing location. If not found, progressively sleep a few times to handle the case # of not fully consistent storage. for retry in range(0, 3): if storage.exists([existing_location], path_to_copy): break logger.debug('Cannot find image storage %s in existing location %s (try #%s)', storage_uuid, existing_location, retry) time.sleep(pow(2, retry) * 5) if not storage.exists([existing_location], path_to_copy): logger.warning('Cannot find image storage %s in existing location %s; stopping replication', storage_uuid, existing_location) return False # For each missing location, copy over the storage. for location in locations_missing: logger.debug('Starting copy of storage %s to location %s from %s', partial_storage.uuid, location, existing_location) # Copy the binary data. copied = False try: with CloseForLongOperation(app.config): storage.copy_between(path_to_copy, existing_location, location) copied = True except: logger.exception('Exception when copying path %s of image storage %s to location %s', path_to_copy, partial_storage.uuid, location) return False # Create the storage location record for the storage now that the copies have # completed. if copied: model.storage.add_storage_placement(partial_storage, location) logger.debug('Finished copy of image storage %s to location %s from %s', partial_storage.uuid, location, existing_location) logger.debug('Completed replication of image storage %s to locations %s from %s', partial_storage.uuid, locations_missing, existing_location) return True if __name__ == "__main__": logging.config.fileConfig(logfile_path(debug=False), disable_existing_loggers=False) has_local_storage = False if features.STORAGE_REPLICATION: for storage_type, _ in app.config.get('DISTRIBUTED_STORAGE_CONFIG', {}).values(): if storage_type == 'LocalStorage': has_local_storage = True break if not features.STORAGE_REPLICATION or has_local_storage: if has_local_storage: logger.error("Storage replication can't be used with local storage") else: logger.debug('Full storage replication disabled; skipping') while True: time.sleep(10000) logger.debug('Starting replication worker') worker = StorageReplicationWorker(image_replication_queue, poll_period_seconds=POLL_PERIOD_SECONDS, reservation_seconds=RESERVATION_SECONDS) worker.start()