import logging from peewee import (CharField, BigIntegerField, BooleanField, ForeignKeyField, DateTimeField, TextField) from data.database import BaseModel, db, db_for_update from app import app logger = logging.getLogger(__name__) class Repository(BaseModel): pass # Vendor the information from tables we will be writing to at the time of this migration class ImageStorage(BaseModel): created = DateTimeField(null=True) comment = TextField(null=True) command = TextField(null=True) aggregate_size = BigIntegerField(null=True) uploading = BooleanField(default=True, null=True) class Image(BaseModel): # This class is intentionally denormalized. Even though images are supposed # to be globally unique we can't treat them as such for permissions and # security reasons. So rather than Repository <-> Image being many to many # each image now belongs to exactly one repository. docker_image_id = CharField(index=True) repository = ForeignKeyField(Repository) # '/' separated list of ancestory ids, e.g. /1/2/6/7/10/ ancestors = CharField(index=True, default='/', max_length=64535, null=True) storage = ForeignKeyField(ImageStorage, index=True, null=True) created = DateTimeField(null=True) comment = TextField(null=True) command = TextField(null=True) aggregate_size = BigIntegerField(null=True) v1_json_metadata = TextField(null=True) def backfill_image_fields(): """ Copies metadata from image storages to their images. """ logger.debug('Image metadata backfill: Began execution') while True: batch_image_ids = list(Image .select(Image.id) .join(ImageStorage) .where(Image.created >> None, Image.comment >> None, Image.command >> None, Image.aggregate_size >> None, ImageStorage.uploading == False, ~((ImageStorage.created >> None) & (ImageStorage.comment >> None) & (ImageStorage.command >> None) & (ImageStorage.aggregate_size >> None))) .limit(100)) if len(batch_image_ids) == 0: logger.debug('Image metadata backfill: Backfill completed') return logger.debug('Image metadata backfill: Found %s records to update', len(batch_image_ids)) for image_id in batch_image_ids: logger.debug('Updating image: %s', image_id.id) with app.config['DB_TRANSACTION_FACTORY'](db): try: image = db_for_update(Image .select(Image, ImageStorage) .join(ImageStorage) .where(Image.id == image_id.id)).get() image.created = image.storage.created image.comment = image.storage.comment image.command = image.storage.command image.aggregate_size = image.storage.aggregate_size image.save() except Image.DoesNotExist: pass if __name__ == "__main__": logging.basicConfig(level=logging.DEBUG) logging.getLogger('peewee').setLevel(logging.CRITICAL) backfill_image_fields()