From fd3f88f48929dba53bbe1033f96fe93c5c97d633 Mon Sep 17 00:00:00 2001 From: Jake Moshenko Date: Fri, 6 Nov 2015 15:45:39 -0500 Subject: [PATCH] Re-enable parent id backfill, use new backfill style --- ...722_backfill_parent_id_and_v1_checksums.py | 2 + util/migrate/backfill_parent_id.py | 52 ++++++++----------- 2 files changed, 24 insertions(+), 30 deletions(-) diff --git a/data/migrations/versions/22af01f81722_backfill_parent_id_and_v1_checksums.py b/data/migrations/versions/22af01f81722_backfill_parent_id_and_v1_checksums.py index e6d732dcc..2f6772b66 100644 --- a/data/migrations/versions/22af01f81722_backfill_parent_id_and_v1_checksums.py +++ b/data/migrations/versions/22af01f81722_backfill_parent_id_and_v1_checksums.py @@ -11,8 +11,10 @@ revision = '22af01f81722' down_revision = '2827d36939e4' from util.migrate.backfill_v1_checksums import backfill_checksums +from util.migrate.backfill_parent_id import backfill_parent_id def upgrade(tables): + backfill_parent_id() backfill_checksums() def downgrade(tables): diff --git a/util/migrate/backfill_parent_id.py b/util/migrate/backfill_parent_id.py index 0d2540489..2a4e7b091 100644 --- a/util/migrate/backfill_parent_id.py +++ b/util/migrate/backfill_parent_id.py @@ -1,46 +1,38 @@ import logging -from data.database import Image, ImageStorage, db + +from data.database import Image, ImageStorage, db, db_for_update from app import app +from util.migrate import yield_random_entries + logger = logging.getLogger(__name__) + def backfill_parent_id(): logger.setLevel(logging.DEBUG) logger.debug('backfill_parent_id: Starting') logger.debug('backfill_parent_id: This can be a LONG RUNNING OPERATION. Please wait!') - # Check for any images without parent - has_images = bool(list(Image - .select(Image.id) - .join(ImageStorage) - .where(Image.parent >> None, Image.ancestors != '/', ImageStorage.uploading == False) - .limit(1))) + def fetch_batch(): + return (Image + .select(Image.id, Image.ancestors) + .join(ImageStorage) + .where(Image.parent >> None, Image.ancestors != '/', + ImageStorage.uploading == False)) - if not has_images: - logger.debug('backfill_parent_id: No migration needed') - return + for to_backfill in yield_random_entries(fetch_batch, 10000, 0.3): + with app.config['DB_TRANSACTION_FACTORY'](db): + try: + image = db_for_update(Image + .select() + .where(Image.id == to_backfill.id)).get() + image.parent = to_backfill.ancestors.split('/')[-2] + image.save() + except Image.DoesNotExist: + pass - while True: - # Load the record from the DB. - batch_images_ids = list(Image - .select(Image.id) - .join(ImageStorage) - .where(Image.parent >> None, Image.ancestors != '/', ImageStorage.uploading == False) - .limit(100)) - - if len(batch_images_ids) == 0: - logger.debug('backfill_parent_id: Completed') - return - - for image_id in batch_images_ids: - with app.config['DB_TRANSACTION_FACTORY'](db): - try: - image = Image.select(Image.id, Image.ancestors).where(Image.id == image_id).get() - image.parent = image.ancestors.split('/')[-2] - image.save() - except Image.DoesNotExist: - pass + logger.debug('backfill_parent_id: Completed') if __name__ == "__main__": logging.basicConfig(level=logging.DEBUG)