Select random records to use for the backfill script for uncompressed sizes, can now be parallelized.
This commit is contained in:
parent
c4266140e2
commit
153dbc3f92
1 changed files with 39 additions and 34 deletions
|
@ -4,7 +4,7 @@ import zlib
|
|||
from data import model
|
||||
from data.database import ImageStorage
|
||||
from app import app, storage as store
|
||||
from data.database import db
|
||||
from data.database import db, db_random_func
|
||||
from util.gzipstream import ZLIB_GZIP_WINDOW
|
||||
|
||||
|
||||
|
@ -17,18 +17,23 @@ CHUNK_SIZE = 5 * 1024 * 1024
|
|||
def backfill_sizes_from_data():
|
||||
while True:
|
||||
# Load the record from the DB.
|
||||
try:
|
||||
record = (ImageStorage
|
||||
batch_ids = list(ImageStorage
|
||||
.select(ImageStorage.uuid)
|
||||
.where(ImageStorage.uncompressed_size >> None, ImageStorage.uploading == False)
|
||||
.get())
|
||||
except ImageStorage.DoesNotExist:
|
||||
.where(ImageStorage.uncompressed_size >> None,
|
||||
ImageStorage.uploading == False)
|
||||
.limit(100)
|
||||
.order_by(db_random_func()))
|
||||
if len(batch_ids) == 0:
|
||||
# We're done!
|
||||
return
|
||||
|
||||
for record in batch_ids:
|
||||
uuid = record.uuid
|
||||
|
||||
with_locations = model.get_storage_by_uuid(uuid)
|
||||
if with_locations.uncompressed_size is not None:
|
||||
logger.debug('Somebody else already filled this in for us: %s', uuid)
|
||||
continue
|
||||
|
||||
# Read the layer from backing storage and calculate the uncompressed size.
|
||||
logger.debug('Loading data: %s (%s bytes)', uuid, with_locations.image_size)
|
||||
|
|
Reference in a new issue