Backfill by allocating and selecting ids in random blocks

Fixes #826
This commit is contained in:
Jake Moshenko 2015-11-09 20:51:38 -05:00
parent e826b14ca4
commit dc24e8b1a1
8 changed files with 347 additions and 70 deletions

View file

@ -1,9 +1,9 @@
import logging
from peewee import (CharField, BigIntegerField, BooleanField, ForeignKeyField, DateTimeField,
TextField)
TextField, fn)
from data.database import BaseModel, db, db_for_update
from util.migrate import yield_random_entries
from util.migrate.allocator import yield_random_entries
from app import app
@ -48,20 +48,22 @@ def backfill_checksums():
.where(Image.v1_checksum >> None, ImageStorage.uploading == False,
~(ImageStorage.checksum >> None)))
for candidate_image in yield_random_entries(batch_query, 10000, 0.1):
logger.debug('Computing content checksum for storage: %s', candidate_image.id)
max_id = Image.select(fn.Max(Image.id)).scalar()
for candidate_image, abort in yield_random_entries(batch_query, Image.id, 1000, max_id):
with app.config['DB_TRANSACTION_FACTORY'](db):
try:
image = db_for_update(Image
.select(Image, ImageStorage)
.join(ImageStorage)
.where(Image.id == candidate_image.id)).get()
.where(Image.id == candidate_image.id,
Image.v1_checksum >> None)).get()
image.v1_checksum = image.storage.checksum
image.save()
except Image.DoesNotExist:
pass
logger.info('Collision with another worker, aborting batch')
abort.set()
if __name__ == "__main__":