import logging from peewee import JOIN_LEFT_OUTER from data.database import (Image, ImageStorage, ImageStoragePlacement, ImageStorageLocation, db, db_for_update) from app import app, storage from data import model logger = logging.getLogger(__name__) def backfill_v1_metadata(): """ Copies metadata from image storages to their images. """ logger.debug('Image v1 metadata backfill: Began execution') while True: batch_image_ids = list(Image .select(Image.id) .join(ImageStorage) .where(Image.v1_json_metadata >> None, ImageStorage.uploading == False) .limit(100)) if len(batch_image_ids) == 0: logger.debug('Image v1 metadata backfill: Backfill completed') return logger.debug('Image v1 metadata backfill: Found %s records to update', len(batch_image_ids)) for one_id in batch_image_ids: with app.config['DB_TRANSACTION_FACTORY'](db): try: logger.debug('Loading image: %s', one_id.id) raw_query = (ImageStoragePlacement .select(ImageStoragePlacement, Image, ImageStorage, ImageStorageLocation) .join(ImageStorageLocation) .switch(ImageStoragePlacement) .join(ImageStorage, JOIN_LEFT_OUTER) .join(Image) .where(Image.id == one_id.id)) placement_query = db_for_update(raw_query) repo_image_list = model.image.invert_placement_query_results(placement_query) if len(repo_image_list) > 1: logger.error('Found more images than we requested, something is wrong with the query') return repo_image = repo_image_list[0] uuid = repo_image.storage.uuid json_path = storage.image_json_path(uuid) logger.debug('Updating image: %s from: %s', repo_image.id, json_path) try: data = storage.get_content(repo_image.storage.locations, json_path) except IOError: data = None logger.exception('failed to find v1 metadata, defaulting to None') repo_image.v1_json_metadata = data repo_image.save() except ImageStoragePlacement.DoesNotExist: pass if __name__ == "__main__": logging.basicConfig(level=logging.DEBUG) # logging.getLogger('peewee').setLevel(logging.CRITICAL) backfill_v1_metadata()