Optimistically update backfill items, reducing RTs

This commit is contained in:
Jake Moshenko 2015-11-10 11:10:09 -05:00
parent 493d077f62
commit a33077b978
3 changed files with 59 additions and 43 deletions

View file

@ -1,11 +1,15 @@
import logging
from data.database import BaseModel, db, db_for_update
from data.database import BaseModel
from peewee import (fn, CharField, BigIntegerField, ForeignKeyField, BooleanField, DateTimeField,
TextField, IntegerField)
from app import app
from util.migrate.allocator import yield_random_entries
BATCH_SIZE = 1000
class Repository(BaseModel):
pass
@ -56,19 +60,20 @@ def backfill_parent_id():
max_id = Image.select(fn.Max(Image.id)).scalar()
for to_backfill, abort in yield_random_entries(fetch_batch, Image.id, 1000, max_id):
with app.config['DB_TRANSACTION_FACTORY'](db):
try:
image = db_for_update(Image
.select()
.where(Image.id == to_backfill.id, Image.parent_id >> None)).get()
image.parent_id = int(to_backfill.ancestors.split('/')[-2])
image.save()
except Image.DoesNotExist:
logger.info('Collision with another worker, aborting batch')
abort.set()
written = 0
for to_backfill, abort in yield_random_entries(fetch_batch, Image.id, BATCH_SIZE, max_id):
computed_parent = int(to_backfill.ancestors.split('/')[-2])
num_changed = (Image
.update(parent_id=computed_parent)
.where(Image.id == to_backfill.id, Image.parent_id >> None)).execute()
if num_changed == 0:
logger.info('Collision with another worker, aborting batch')
abort.set()
written += num_changed
if (written % BATCH_SIZE) == 0:
logger.debug('%s entries written', written)
logger.debug('backfill_parent_id: Completed')
logger.debug('backfill_parent_id: Completed, updated %s entries', written)
if __name__ == '__main__':
logging.basicConfig(level=logging.DEBUG)