Backfill the v1 checksums from imagestorage
This commit is contained in:
parent
f3c3e684a1
commit
88b9e80cbb
4 changed files with 257 additions and 0 deletions
70
util/migrate/backfill_v1_checksums.py
Normal file
70
util/migrate/backfill_v1_checksums.py
Normal file
|
@ -0,0 +1,70 @@
|
|||
import logging
|
||||
|
||||
from peewee import (CharField, BigIntegerField, BooleanField, ForeignKeyField, DateTimeField,
|
||||
TextField)
|
||||
from data.database import BaseModel, db, db_for_update
|
||||
from util.migrate import yield_random_entries
|
||||
from app import app
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Repository(BaseModel):
|
||||
pass
|
||||
|
||||
|
||||
# Vendor the information from tables we will be writing to at the time of this migration
|
||||
class ImageStorage(BaseModel):
|
||||
uuid = CharField(index=True, unique=True)
|
||||
checksum = CharField(null=True)
|
||||
image_size = BigIntegerField(null=True)
|
||||
uncompressed_size = BigIntegerField(null=True)
|
||||
uploading = BooleanField(default=True, null=True)
|
||||
cas_path = BooleanField(default=True)
|
||||
content_checksum = CharField(null=True, index=True)
|
||||
|
||||
|
||||
class Image(BaseModel):
|
||||
docker_image_id = CharField(index=True)
|
||||
repository = ForeignKeyField(Repository)
|
||||
ancestors = CharField(index=True, default='/', max_length=64535, null=True)
|
||||
storage = ForeignKeyField(ImageStorage, index=True, null=True)
|
||||
created = DateTimeField(null=True)
|
||||
comment = TextField(null=True)
|
||||
command = TextField(null=True)
|
||||
aggregate_size = BigIntegerField(null=True)
|
||||
v1_json_metadata = TextField(null=True)
|
||||
v1_checksum = CharField(null=True)
|
||||
|
||||
|
||||
def backfill_checksums():
|
||||
""" Copies checksums from image storages to their images. """
|
||||
logger.debug('Image v1 checksum backfill: Began execution')
|
||||
def batch_query():
|
||||
return (Image
|
||||
.select(Image.id)
|
||||
.join(ImageStorage)
|
||||
.where(Image.v1_checksum >> None, ImageStorage.uploading == False,
|
||||
~(ImageStorage.checksum >> None)))
|
||||
|
||||
for candidate_image in yield_random_entries(batch_query, 10000, 0.1):
|
||||
logger.debug('Computing content checksum for storage: %s', candidate_image.id)
|
||||
|
||||
with app.config['DB_TRANSACTION_FACTORY'](db):
|
||||
try:
|
||||
image = db_for_update(Image
|
||||
.select(Image, ImageStorage)
|
||||
.join(ImageStorage)
|
||||
.where(Image.id == candidate_image.id)).get()
|
||||
|
||||
image.v1_checksum = image.storage.checksum
|
||||
image.save()
|
||||
except Image.DoesNotExist:
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
logging.getLogger('peewee').setLevel(logging.CRITICAL)
|
||||
backfill_checksums()
|
Reference in a new issue