import logging from peewee import JOIN_LEFT_OUTER from peewee import (CharField, BigIntegerField, BooleanField, ForeignKeyField, DateTimeField, TextField) from data.database import BaseModel, db, db_for_update from app import app, storage from data import model logger = logging.getLogger(__name__) class Repository(BaseModel): pass # Vendor the information from tables we will be writing to at the time of this migration class ImageStorage(BaseModel): uuid = CharField(index=True, unique=True) checksum = CharField(null=True) image_size = BigIntegerField(null=True) uncompressed_size = BigIntegerField(null=True) uploading = BooleanField(default=True, null=True) class Image(BaseModel): # This class is intentionally denormalized. Even though images are supposed # to be globally unique we can't treat them as such for permissions and # security reasons. So rather than Repository <-> Image being many to many # each image now belongs to exactly one repository. docker_image_id = CharField(index=True) repository = ForeignKeyField(Repository) # '/' separated list of ancestory ids, e.g. /1/2/6/7/10/ ancestors = CharField(index=True, default='/', max_length=64535, null=True) storage = ForeignKeyField(ImageStorage, index=True, null=True) created = DateTimeField(null=True) comment = TextField(null=True) command = TextField(null=True) aggregate_size = BigIntegerField(null=True) v1_json_metadata = TextField(null=True) class ImageStorageLocation(BaseModel): name = CharField(unique=True, index=True) class ImageStoragePlacement(BaseModel): storage = ForeignKeyField(ImageStorage) location = ForeignKeyField(ImageStorageLocation) def image_json_path(storage_uuid): base_path = storage.image_path(storage_uuid) return '{0}json'.format(base_path) def backfill_v1_metadata(): """ Copies metadata from image storages to their images. """ logger.debug('Image v1 metadata backfill: Began execution') while True: batch_image_ids = list(Image .select(Image.id) .join(ImageStorage) .where(Image.v1_json_metadata >> None, ImageStorage.uploading == False) .limit(100)) if len(batch_image_ids) == 0: logger.debug('Image v1 metadata backfill: Backfill completed') return logger.debug('Image v1 metadata backfill: Found %s records to update', len(batch_image_ids)) for one_id in batch_image_ids: with app.config['DB_TRANSACTION_FACTORY'](db): try: logger.debug('Loading image: %s', one_id.id) raw_query = (ImageStoragePlacement .select(ImageStoragePlacement, Image, ImageStorage, ImageStorageLocation) .join(ImageStorageLocation) .switch(ImageStoragePlacement) .join(ImageStorage, JOIN_LEFT_OUTER) .join(Image) .where(Image.id == one_id.id)) placement_query = db_for_update(raw_query) repo_image_list = model.image.invert_placement_query_results(placement_query) if len(repo_image_list) > 1: logger.error('Found more images than we requested, something is wrong with the query') return repo_image = repo_image_list[0] uuid = repo_image.storage.uuid json_path = image_json_path(uuid) logger.debug('Updating image: %s from: %s', repo_image.id, json_path) try: data = storage.get_content(repo_image.storage.locations, json_path) except IOError: data = None logger.exception('failed to find v1 metadata, defaulting to None') repo_image.v1_json_metadata = data repo_image.save() except ImageStoragePlacement.DoesNotExist: pass if __name__ == "__main__": logging.basicConfig(level=logging.DEBUG) # logging.getLogger('peewee').setLevel(logging.CRITICAL) backfill_v1_metadata()