117 lines
4 KiB
Python
117 lines
4 KiB
Python
import logging
|
|
|
|
from peewee import JOIN_LEFT_OUTER
|
|
|
|
from peewee import (CharField, BigIntegerField, BooleanField, ForeignKeyField, DateTimeField,
|
|
TextField)
|
|
|
|
from data.database import BaseModel, db, db_for_update
|
|
from app import app, storage
|
|
from data import model
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class Repository(BaseModel):
|
|
pass
|
|
|
|
|
|
# Vendor the information from tables we will be writing to at the time of this migration
|
|
class ImageStorage(BaseModel):
|
|
uuid = CharField(index=True, unique=True)
|
|
checksum = CharField(null=True)
|
|
image_size = BigIntegerField(null=True)
|
|
uncompressed_size = BigIntegerField(null=True)
|
|
uploading = BooleanField(default=True, null=True)
|
|
|
|
|
|
class Image(BaseModel):
|
|
# This class is intentionally denormalized. Even though images are supposed
|
|
# to be globally unique we can't treat them as such for permissions and
|
|
# security reasons. So rather than Repository <-> Image being many to many
|
|
# each image now belongs to exactly one repository.
|
|
docker_image_id = CharField(index=True)
|
|
repository = ForeignKeyField(Repository)
|
|
|
|
# '/' separated list of ancestory ids, e.g. /1/2/6/7/10/
|
|
ancestors = CharField(index=True, default='/', max_length=64535, null=True)
|
|
|
|
storage = ForeignKeyField(ImageStorage, index=True, null=True)
|
|
|
|
created = DateTimeField(null=True)
|
|
comment = TextField(null=True)
|
|
command = TextField(null=True)
|
|
aggregate_size = BigIntegerField(null=True)
|
|
v1_json_metadata = TextField(null=True)
|
|
|
|
|
|
class ImageStorageLocation(BaseModel):
|
|
name = CharField(unique=True, index=True)
|
|
|
|
|
|
class ImageStoragePlacement(BaseModel):
|
|
storage = ForeignKeyField(ImageStorage)
|
|
location = ForeignKeyField(ImageStorageLocation)
|
|
|
|
|
|
def image_json_path(storage_uuid):
|
|
base_path = storage.image_path(storage_uuid)
|
|
return '{0}json'.format(base_path)
|
|
|
|
|
|
def backfill_v1_metadata():
|
|
""" Copies metadata from image storages to their images. """
|
|
logger.debug('Image v1 metadata backfill: Began execution')
|
|
|
|
while True:
|
|
batch_image_ids = list(Image
|
|
.select(Image.id)
|
|
.join(ImageStorage)
|
|
.where(Image.v1_json_metadata >> None, ImageStorage.uploading == False)
|
|
.limit(100))
|
|
|
|
if len(batch_image_ids) == 0:
|
|
logger.debug('Image v1 metadata backfill: Backfill completed')
|
|
return
|
|
|
|
logger.debug('Image v1 metadata backfill: Found %s records to update', len(batch_image_ids))
|
|
for one_id in batch_image_ids:
|
|
with app.config['DB_TRANSACTION_FACTORY'](db):
|
|
try:
|
|
logger.debug('Loading image: %s', one_id.id)
|
|
|
|
raw_query = (ImageStoragePlacement
|
|
.select(ImageStoragePlacement, Image, ImageStorage, ImageStorageLocation)
|
|
.join(ImageStorageLocation)
|
|
.switch(ImageStoragePlacement)
|
|
.join(ImageStorage, JOIN_LEFT_OUTER)
|
|
.join(Image)
|
|
.where(Image.id == one_id.id))
|
|
|
|
placement_query = db_for_update(raw_query)
|
|
|
|
repo_image_list = model.image.invert_placement_query_results(placement_query)
|
|
if len(repo_image_list) > 1:
|
|
logger.error('Found more images than we requested, something is wrong with the query')
|
|
return
|
|
|
|
repo_image = repo_image_list[0]
|
|
uuid = repo_image.storage.uuid
|
|
json_path = image_json_path(uuid)
|
|
|
|
logger.debug('Updating image: %s from: %s', repo_image.id, json_path)
|
|
try:
|
|
data = storage.get_content(repo_image.storage.locations, json_path)
|
|
except IOError:
|
|
data = None
|
|
logger.exception('failed to find v1 metadata, defaulting to None')
|
|
repo_image.v1_json_metadata = data
|
|
repo_image.save()
|
|
except ImageStoragePlacement.DoesNotExist:
|
|
pass
|
|
|
|
if __name__ == "__main__":
|
|
logging.basicConfig(level=logging.DEBUG)
|
|
# logging.getLogger('peewee').setLevel(logging.CRITICAL)
|
|
backfill_v1_metadata()
|