Optimistically update backfill items, reducing RTs
This commit is contained in:
parent
493d077f62
commit
a33077b978
3 changed files with 59 additions and 43 deletions
|
@ -2,11 +2,14 @@ import logging
|
|||
|
||||
from peewee import (CharField, BigIntegerField, BooleanField, ForeignKeyField, DateTimeField,
|
||||
TextField, fn)
|
||||
from data.database import BaseModel, db, db_for_update
|
||||
from data.database import BaseModel
|
||||
from util.migrate.allocator import yield_random_entries
|
||||
from app import app
|
||||
|
||||
|
||||
BATCH_SIZE = 1000
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
@ -40,30 +43,30 @@ class Image(BaseModel):
|
|||
|
||||
def backfill_checksums():
|
||||
""" Copies checksums from image storages to their images. """
|
||||
logger.debug('Image v1 checksum backfill: Began execution')
|
||||
logger.debug('Began execution')
|
||||
logger.debug('This may be a long operation!')
|
||||
def batch_query():
|
||||
return (Image
|
||||
.select(Image.id)
|
||||
.select(Image, ImageStorage)
|
||||
.join(ImageStorage)
|
||||
.where(Image.v1_checksum >> None, ImageStorage.uploading == False,
|
||||
~(ImageStorage.checksum >> None)))
|
||||
|
||||
max_id = Image.select(fn.Max(Image.id)).scalar()
|
||||
|
||||
for candidate_image, abort in yield_random_entries(batch_query, Image.id, 1000, max_id):
|
||||
with app.config['DB_TRANSACTION_FACTORY'](db):
|
||||
try:
|
||||
image = db_for_update(Image
|
||||
.select(Image, ImageStorage)
|
||||
.join(ImageStorage)
|
||||
.where(Image.id == candidate_image.id,
|
||||
Image.v1_checksum >> None)).get()
|
||||
written = 0
|
||||
for candidate_image, abort in yield_random_entries(batch_query, Image.id, BATCH_SIZE, max_id):
|
||||
num_changed = (Image
|
||||
.update(v1_checksum=candidate_image.storage.checksum)
|
||||
.where(Image.id == candidate_image.id, Image.v1_checksum >> None)).execute()
|
||||
if num_changed == 0:
|
||||
logger.info('Collision with another worker, aborting batch')
|
||||
abort.set()
|
||||
written += num_changed
|
||||
if (written % BATCH_SIZE) == 0:
|
||||
logger.debug('%s entries written', written)
|
||||
|
||||
image.v1_checksum = image.storage.checksum
|
||||
image.save()
|
||||
except Image.DoesNotExist:
|
||||
logger.info('Collision with another worker, aborting batch')
|
||||
abort.set()
|
||||
logger.debug('Completed, updated %s entries', written)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
Reference in a new issue