import logging from data.database import BaseModel from peewee import (fn, CharField, BigIntegerField, ForeignKeyField, BooleanField, DateTimeField, TextField, IntegerField) from app import app from util.migrate.allocator import yield_random_entries BATCH_SIZE = 1000 class Repository(BaseModel): pass # Vendor the information from tables we will be writing to at the time of this migration class ImageStorage(BaseModel): uuid = CharField(index=True, unique=True) checksum = CharField(null=True) image_size = BigIntegerField(null=True) uncompressed_size = BigIntegerField(null=True) uploading = BooleanField(default=True, null=True) cas_path = BooleanField(default=True) content_checksum = CharField(null=True, index=True) class Image(BaseModel): docker_image_id = CharField(index=True) repository = ForeignKeyField(Repository) ancestors = CharField(index=True, default='/', max_length=64535, null=True) storage = ForeignKeyField(ImageStorage, index=True, null=True) created = DateTimeField(null=True) comment = TextField(null=True) command = TextField(null=True) aggregate_size = BigIntegerField(null=True) v1_json_metadata = TextField(null=True) v1_checksum = CharField(null=True) security_indexed = BooleanField(default=False) security_indexed_engine = IntegerField(default=-1) parent_id = IntegerField(index=True, null=True) logger = logging.getLogger(__name__) def backfill_parent_id(): logger.setLevel(logging.DEBUG) logger.debug('backfill_parent_id: Starting') logger.debug('backfill_parent_id: This can be a LONG RUNNING OPERATION. Please wait!') def fetch_batch(): return (Image .select(Image.id, Image.ancestors) .join(ImageStorage) .where(Image.parent_id >> None, Image.ancestors != '/', ImageStorage.uploading == False)) max_id = Image.select(fn.Max(Image.id)).scalar() written = 0 for to_backfill, abort in yield_random_entries(fetch_batch, Image.id, BATCH_SIZE, max_id): computed_parent = int(to_backfill.ancestors.split('/')[-2]) num_changed = (Image .update(parent_id=computed_parent) .where(Image.id == to_backfill.id, Image.parent_id >> None)).execute() if num_changed == 0: logger.info('Collision with another worker, aborting batch') abort.set() written += num_changed if (written % BATCH_SIZE) == 0: logger.debug('%s entries written', written) logger.debug('backfill_parent_id: Completed, updated %s entries', written) if __name__ == '__main__': logging.basicConfig(level=logging.DEBUG) logging.getLogger('peewee').setLevel(logging.CRITICAL) backfill_parent_id()