2014-09-23 20:09:33 +00:00
|
|
|
import logging
|
2014-10-03 19:07:50 +00:00
|
|
|
import zlib
|
2014-10-07 19:29:56 +00:00
|
|
|
import sys
|
2014-09-23 20:09:33 +00:00
|
|
|
|
2014-09-23 18:01:27 +00:00
|
|
|
from data import model
|
|
|
|
from data.database import ImageStorage
|
|
|
|
from app import app, storage as store
|
2014-10-06 21:15:45 +00:00
|
|
|
from data.database import db, db_random_func
|
2014-10-03 19:07:50 +00:00
|
|
|
from util.gzipstream import ZLIB_GZIP_WINDOW
|
2014-09-23 18:01:27 +00:00
|
|
|
|
|
|
|
|
2014-09-23 20:06:38 +00:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
2014-10-03 20:14:24 +00:00
|
|
|
|
2014-10-06 15:26:23 +00:00
|
|
|
CHUNK_SIZE = 5 * 1024 * 1024
|
2014-10-03 20:14:24 +00:00
|
|
|
|
|
|
|
|
2014-09-26 16:21:50 +00:00
|
|
|
def backfill_sizes_from_data():
|
2014-10-07 19:29:56 +00:00
|
|
|
logger.setLevel(logging.DEBUG)
|
|
|
|
logger.debug('Starting uncompressed image size backfill')
|
|
|
|
|
|
|
|
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
|
|
|
|
|
|
|
ch = logging.StreamHandler(sys.stdout)
|
|
|
|
ch.setFormatter(formatter)
|
|
|
|
logger.addHandler(ch)
|
|
|
|
|
2014-10-29 19:42:44 +00:00
|
|
|
encountered = set()
|
|
|
|
|
2014-10-03 19:07:50 +00:00
|
|
|
while True:
|
|
|
|
# Load the record from the DB.
|
2014-10-06 21:15:45 +00:00
|
|
|
batch_ids = list(ImageStorage
|
|
|
|
.select(ImageStorage.uuid)
|
|
|
|
.where(ImageStorage.uncompressed_size >> None,
|
|
|
|
ImageStorage.uploading == False)
|
|
|
|
.limit(100)
|
|
|
|
.order_by(db_random_func()))
|
2014-10-29 19:42:44 +00:00
|
|
|
|
|
|
|
batch_ids = set(batch_ids) - encountered
|
2014-10-06 21:15:45 +00:00
|
|
|
if len(batch_ids) == 0:
|
2014-10-03 19:07:50 +00:00
|
|
|
# We're done!
|
|
|
|
return
|
|
|
|
|
2014-10-06 21:15:45 +00:00
|
|
|
for record in batch_ids:
|
|
|
|
uuid = record.uuid
|
2014-10-29 19:42:44 +00:00
|
|
|
encountered.add(uuid)
|
2014-10-03 19:07:50 +00:00
|
|
|
|
2014-10-07 14:22:02 +00:00
|
|
|
try:
|
|
|
|
with_locs = model.get_storage_by_uuid(uuid)
|
|
|
|
if with_locs.uncompressed_size is not None:
|
|
|
|
logger.debug('Somebody else already filled this in for us: %s', uuid)
|
2014-10-06 21:15:45 +00:00
|
|
|
continue
|
|
|
|
|
2014-10-07 14:22:02 +00:00
|
|
|
# Read the layer from backing storage and calculate the uncompressed size.
|
|
|
|
logger.debug('Loading data: %s (%s bytes)', uuid, with_locs.image_size)
|
|
|
|
decompressor = zlib.decompressobj(ZLIB_GZIP_WINDOW)
|
|
|
|
|
|
|
|
uncompressed_size = 0
|
|
|
|
with store.stream_read_file(with_locs.locations, store.image_layer_path(uuid)) as stream:
|
|
|
|
while True:
|
|
|
|
current_data = stream.read(CHUNK_SIZE)
|
|
|
|
if len(current_data) == 0:
|
|
|
|
break
|
|
|
|
|
2014-10-07 19:29:56 +00:00
|
|
|
while current_data:
|
|
|
|
uncompressed_size += len(decompressor.decompress(current_data, CHUNK_SIZE))
|
|
|
|
current_data = decompressor.unconsumed_tail
|
2014-10-07 14:22:02 +00:00
|
|
|
|
|
|
|
# Write the size to the image storage. We do so under a transaction AFTER checking to
|
|
|
|
# make sure the image storage still exists and has not changed.
|
|
|
|
logger.debug('Writing entry: %s. Size: %s', uuid, uncompressed_size)
|
|
|
|
with app.config['DB_TRANSACTION_FACTORY'](db):
|
|
|
|
current_record = model.get_storage_by_uuid(uuid)
|
|
|
|
|
|
|
|
if not current_record.uploading and current_record.uncompressed_size == None:
|
|
|
|
current_record.uncompressed_size = uncompressed_size
|
|
|
|
current_record.save()
|
|
|
|
else:
|
|
|
|
logger.debug('Somebody else already filled this in for us, after we did the work: %s',
|
|
|
|
uuid)
|
2014-09-26 16:21:50 +00:00
|
|
|
|
2014-10-07 14:22:02 +00:00
|
|
|
except model.InvalidImageException:
|
|
|
|
logger.warning('Storage with uuid no longer exists: %s', uuid)
|
2014-10-29 19:42:44 +00:00
|
|
|
except IOError:
|
|
|
|
logger.warning('IOError on %s', uuid)
|
2014-10-07 17:13:03 +00:00
|
|
|
except MemoryError:
|
|
|
|
logger.warning('MemoryError on %s', uuid)
|
2014-09-26 16:21:50 +00:00
|
|
|
|
2014-09-23 18:01:27 +00:00
|
|
|
if __name__ == "__main__":
|
|
|
|
logging.basicConfig(level=logging.DEBUG)
|
|
|
|
logging.getLogger('boto').setLevel(logging.CRITICAL)
|
2014-10-07 14:22:02 +00:00
|
|
|
logging.getLogger('peewee').setLevel(logging.CRITICAL)
|
2014-09-23 18:01:27 +00:00
|
|
|
|
2014-09-26 16:21:50 +00:00
|
|
|
backfill_sizes_from_data()
|