This repository has been archived on 2020-03-24. You can view files and clone it, but cannot push or open issues or pull requests.
quay/util/uncompressedsize.py
Joseph Schorr 9aa72c5cc2 Fix migration issues:
- MySQL 5.5 doesn't support the now() call as a default
  - Postgres migration isn't auto-committed, so we have to check if the table exists first
2014-11-03 15:25:55 -05:00

104 lines
3.5 KiB
Python

import logging
import zlib
import sys
from data import model
from data.database import ImageStorage, configure
from app import app, storage as store
from data.database import db, db_random_func
from util.gzipstream import ZLIB_GZIP_WINDOW
logger = logging.getLogger(__name__)
CHUNK_SIZE = 5 * 1024 * 1024
def backfill_sizes_from_data():
logger.setLevel(logging.DEBUG)
logger.debug('Starting uncompressed image size backfill')
# Make sure we have a reference to the current DB.
configure(app.config)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
ch = logging.StreamHandler(sys.stdout)
ch.setFormatter(formatter)
logger.addHandler(ch)
encountered = set()
# Try reading the ImageStorage table count. If it doesn't exist, then this is a postgres
# initial setup migration and we can skip this step anyway.
try:
ImageStorage.select().count()
except:
logger.debug('Skipping migration for new setup')
return
while True:
# Load the record from the DB.
batch_ids = list(ImageStorage
.select(ImageStorage.uuid)
.where(ImageStorage.uncompressed_size >> None,
ImageStorage.uploading == False)
.limit(100)
.order_by(db_random_func()))
batch_ids = set(batch_ids) - encountered
if len(batch_ids) == 0:
# We're done!
return
for record in batch_ids:
uuid = record.uuid
encountered.add(uuid)
try:
with_locs = model.get_storage_by_uuid(uuid)
if with_locs.uncompressed_size is not None:
logger.debug('Somebody else already filled this in for us: %s', uuid)
continue
# Read the layer from backing storage and calculate the uncompressed size.
logger.debug('Loading data: %s (%s bytes)', uuid, with_locs.image_size)
decompressor = zlib.decompressobj(ZLIB_GZIP_WINDOW)
uncompressed_size = 0
with store.stream_read_file(with_locs.locations, store.image_layer_path(uuid)) as stream:
while True:
current_data = stream.read(CHUNK_SIZE)
if len(current_data) == 0:
break
while current_data:
uncompressed_size += len(decompressor.decompress(current_data, CHUNK_SIZE))
current_data = decompressor.unconsumed_tail
# Write the size to the image storage. We do so under a transaction AFTER checking to
# make sure the image storage still exists and has not changed.
logger.debug('Writing entry: %s. Size: %s', uuid, uncompressed_size)
with app.config['DB_TRANSACTION_FACTORY'](db):
current_record = model.get_storage_by_uuid(uuid)
if not current_record.uploading and current_record.uncompressed_size == None:
current_record.uncompressed_size = uncompressed_size
current_record.save()
else:
logger.debug('Somebody else already filled this in for us, after we did the work: %s',
uuid)
except model.InvalidImageException:
logger.warning('Storage with uuid no longer exists: %s', uuid)
except IOError:
logger.warning('IOError on %s', uuid)
except MemoryError:
logger.warning('MemoryError on %s', uuid)
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
logging.getLogger('boto').setLevel(logging.CRITICAL)
logging.getLogger('peewee').setLevel(logging.CRITICAL)
backfill_sizes_from_data()