This repository has been archived on 2020-03-24. You can view files and clone it, but cannot push or open issues or pull requests.
quay/tools/uncompressedsize.py

106 lines
3.2 KiB
Python

import json
import logging
from data import model
from data.database import ImageStorage
from app import app, storage as store
from data.database import db
from gzip import GzipFile
from StringIO import StringIO
logger = logging.getLogger(__name__)
def backfill_sizes_from_json():
query = (ImageStorage
.select()
.where(ImageStorage.uncompressed_size == None, ImageStorage.uploading == False)
.limit(100))
total = 0
missing = 0
batch_processed = 1
while batch_processed > 0:
batch_processed = 0
with app.config['DB_TRANSACTION_FACTORY'](db):
for image_storage in query.clone():
total += 1
batch_processed += 1
if (total - 1) % 100 == 0:
logger.debug('Storing entry: %s', total)
# Lookup the JSON for the image.
uuid = image_storage.uuid
with_locations = model.get_storage_by_uuid(uuid)
json_string = store.get_content(with_locations.locations, store.image_json_path(uuid))
json_data = json.loads(json_string)
size = json_data.get('Size', json_data.get('size', -1))
if size == -1:
missing += 1
logger.debug('Missing entry %s (%s/%s)', uuid, missing, total)
image_storage.uncompressed_size = size
image_storage.save()
def backfill_sizes_from_data():
storage_ids = list(ImageStorage
.select(ImageStorage.uuid)
.where(ImageStorage.uncompressed_size == -1, ImageStorage.uploading == False))
counter = 0
for uuid in [s.uuid for s in storage_ids]:
counter += 1
# Load the storage with locations.
logger.debug('Loading entry: %s (%s/%s)', uuid, counter, len(storage_ids))
with_locations = model.get_storage_by_uuid(uuid)
layer_size = -2
# Read the layer from backing storage.
logger.debug('Reading entry: %s (%s bytes)', uuid, with_locations.image_size)
try:
layer_data = store.get_content(with_locations.locations, store.image_layer_path(uuid))
except Exception as ex:
logger.debug('Could not read entry: %s. Reason: %s', uuid, ex)
continue
# Calculate the uncompressed size.
try:
layer_stream = StringIO(layer_data)
with GzipFile(fileobj=layer_stream, mode='rb') as gzip_file:
layer_size = len(gzip_file.read())
logger.debug('%s' % gzip_file.size)
layer_stream.close()
except Exception as ex:
logger.debug('Could not gunzip entry: %s. Reason: %s', uuid, ex)
continue
# Write the size to the image storage. We do so under a transaction AFTER checking to
# make sure the image storage still exists and has not changed.
logger.debug('Writing entry: %s. Size: %s', uuid, layer_size)
with app.config['DB_TRANSACTION_FACTORY'](db):
try:
current_record = model.get_storage_by_uuid(uuid)
except:
# Record no longer exists.
continue
if not current_record.uploading and current_record.uncompressed_size == -1:
current_record.uncompressed_size = layer_size
current_record.save()
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
logging.getLogger('boto').setLevel(logging.CRITICAL)
backfill_sizes_from_json()
backfill_sizes_from_data()