diff --git a/tools/uncompressedsize.py b/tools/uncompressedsize.py index 32ba0957c..f326c4d3d 100644 --- a/tools/uncompressedsize.py +++ b/tools/uncompressedsize.py @@ -5,12 +5,14 @@ from data import model from data.database import ImageStorage from app import app, storage as store from data.database import db +from gzip import GzipFile +from StringIO import StringIO logger = logging.getLogger(__name__) -def backfill_sizes(): +def backfill_sizes_from_json(): query = (ImageStorage .select() .where(ImageStorage.uncompressed_size == None, ImageStorage.uploading == False) @@ -47,8 +49,57 @@ def backfill_sizes(): image_storage.save() +def backfill_sizes_from_data(): + storage_ids = list(ImageStorage + .select(ImageStorage.uuid) + .where(ImageStorage.uncompressed_size == -1, ImageStorage.uploading == False)) + + counter = 0 + for uuid in [s.uuid for s in storage_ids]: + counter += 1 + + # Load the storage with locations. + logger.debug('Loading entry: %s (%s/%s)', uuid, counter, len(storage_ids)) + with_locations = model.get_storage_by_uuid(uuid) + layer_size = -2 + + # Read the layer from backing storage. + logger.debug('Reading entry: %s (%s bytes)', uuid, with_locations.image_size) + try: + layer_data = store.get_content(with_locations.locations, store.image_layer_path(uuid)) + except Exception as ex: + logger.debug('Could not read entry: %s. Reason: %s', uuid, ex) + continue + + # Calculate the uncompressed size. + try: + layer_stream = StringIO(layer_data) + with GzipFile(fileobj=layer_stream, mode='rb') as gzip_file: + layer_size = len(gzip_file.read()) + logger.debug('%s' % gzip_file.size) + + layer_stream.close() + except Exception as ex: + logger.debug('Could not gunzip entry: %s. Reason: %s', uuid, ex) + continue + + # Write the size to the image storage. We do so under a transaction AFTER checking to + # make sure the image storage still exists and has not changed. + logger.debug('Writing entry: %s. Size: %s', uuid, layer_size) + with app.config['DB_TRANSACTION_FACTORY'](db): + try: + current_record = model.get_storage_by_uuid(uuid) + except: + # Record no longer exists. + continue + + if not current_record.uploading and current_record.uncompressed_size == -1: + current_record.uncompressed_size = layer_size + current_record.save() + + if __name__ == "__main__": logging.basicConfig(level=logging.DEBUG) logging.getLogger('boto').setLevel(logging.CRITICAL) - backfill_sizes() + backfill_sizes_from_data()