Add aggregate size column and a migration to backfill it

This commit is contained in:
Joseph Schorr 2015-03-16 18:03:17 -04:00
parent 7b5341c067
commit b8d88c0f4e
3 changed files with 97 additions and 0 deletions

View file

@ -0,0 +1,26 @@
"""Add aggregate size column
Revision ID: 2b2529fd23ff
Revises: 2088f2b81010
Create Date: 2015-03-16 17:36:53.321458
"""
# revision identifiers, used by Alembic.
revision = '2b2529fd23ff'
down_revision = '2088f2b81010'
from alembic import op
import sqlalchemy as sa
def upgrade(tables):
### commands auto generated by Alembic - please adjust! ###
op.add_column('imagestorage', sa.Column('aggregate_size', sa.BigInteger(), nullable=True))
### end Alembic commands ###
def downgrade(tables):
### commands auto generated by Alembic - please adjust! ###
op.drop_column('imagestorage', 'aggregate_size')
### end Alembic commands ###

View file

@ -0,0 +1,22 @@
"""Backfill aggregate size columns
Revision ID: 87c29d0cd05
Revises: 2b2529fd23ff
Create Date: 2015-03-16 17:55:30.148557
"""
# revision identifiers, used by Alembic.
revision = '87c29d0cd05'
down_revision = '2b2529fd23ff'
from alembic import op
import sqlalchemy as sa
from util.backfill_aggregate_sizes import backfill_aggregate_sizes
def upgrade(tables):
backfill_aggregate_sizes()
def downgrade(tables):
pass

View file

@ -0,0 +1,49 @@
import logging
from data.database import ImageStorage, Image, db
from app import app
LOGGER = logging.getLogger(__name__)
def backfill_aggregate_sizes():
""" Generates aggregate sizes for any image storage entries without them """
LOGGER.setLevel(logging.DEBUG)
LOGGER.debug('Aggregate sizes backfill: Began execution')
while True:
batch_storage_ids = list(ImageStorage
.select(ImageStorage.id)
.where(ImageStorage.aggregate_size >> None)
.limit(10))
if len(batch_storage_ids) == 0:
# There are no storages left to backfill. We're done!
LOGGER.debug('Aggregate sizes backfill: Backfill completed')
return
LOGGER.debug('Aggregate sizes backfill: Found %s records to update', len(batch_storage_ids))
for image_storage_id in batch_storage_ids:
LOGGER.debug('Updating image storage: %s', image_storage_id.id)
with app.config['DB_TRANSACTION_FACTORY'](db):
try:
storage = ImageStorage.select().where(ImageStorage.id == image_storage_id.id).get()
image = Image.select().where(Image.storage == storage).get()
image_ids = image.ancestors.split('/')[1:-1]
aggregate_size = storage.image_size
for image_id in image_ids:
current_image = Image.select().where(Image.id == image_id).join(ImageStorage)
aggregate_size += image.storage.image_size
storage.aggregate_size = aggregate_size
storage.save()
except ImageStorage.DoesNotExist:
pass
except Image.DoesNotExist:
pass
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
logging.getLogger('peewee').setLevel(logging.CRITICAL)
backfill_aggregate_sizes()