117 lines
		
	
	
	
		
			4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			117 lines
		
	
	
	
		
			4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import logging
 | |
| 
 | |
| from peewee import JOIN_LEFT_OUTER
 | |
| 
 | |
| from peewee import (CharField, BigIntegerField, BooleanField, ForeignKeyField, DateTimeField,
 | |
|                     TextField)
 | |
| 
 | |
| from data.database import BaseModel, db, db_for_update
 | |
| from app import app, storage
 | |
| from data import model
 | |
| 
 | |
| 
 | |
| logger = logging.getLogger(__name__)
 | |
| 
 | |
| 
 | |
| class Repository(BaseModel):
 | |
|   pass
 | |
| 
 | |
| 
 | |
| # Vendor the information from tables we will be writing to at the time of this migration
 | |
| class ImageStorage(BaseModel):
 | |
|   uuid = CharField(index=True, unique=True)
 | |
|   checksum = CharField(null=True)
 | |
|   image_size = BigIntegerField(null=True)
 | |
|   uncompressed_size = BigIntegerField(null=True)
 | |
|   uploading = BooleanField(default=True, null=True)
 | |
| 
 | |
| 
 | |
| class Image(BaseModel):
 | |
|   # This class is intentionally denormalized. Even though images are supposed
 | |
|   # to be globally unique we can't treat them as such for permissions and
 | |
|   # security reasons. So rather than Repository <-> Image being many to many
 | |
|   # each image now belongs to exactly one repository.
 | |
|   docker_image_id = CharField(index=True)
 | |
|   repository = ForeignKeyField(Repository)
 | |
| 
 | |
|   # '/' separated list of ancestory ids, e.g. /1/2/6/7/10/
 | |
|   ancestors = CharField(index=True, default='/', max_length=64535, null=True)
 | |
| 
 | |
|   storage = ForeignKeyField(ImageStorage, index=True, null=True)
 | |
| 
 | |
|   created = DateTimeField(null=True)
 | |
|   comment = TextField(null=True)
 | |
|   command = TextField(null=True)
 | |
|   aggregate_size = BigIntegerField(null=True)
 | |
|   v1_json_metadata = TextField(null=True)
 | |
| 
 | |
| 
 | |
| class ImageStorageLocation(BaseModel):
 | |
|   name = CharField(unique=True, index=True)
 | |
| 
 | |
| 
 | |
| class ImageStoragePlacement(BaseModel):
 | |
|   storage = ForeignKeyField(ImageStorage)
 | |
|   location = ForeignKeyField(ImageStorageLocation)
 | |
| 
 | |
| 
 | |
| def image_json_path(storage_uuid):
 | |
|   base_path = storage._image_path(storage_uuid)
 | |
|   return '{0}json'.format(base_path)
 | |
| 
 | |
| 
 | |
| def backfill_v1_metadata():
 | |
|   """ Copies metadata from image storages to their images. """
 | |
|   logger.debug('Image v1 metadata backfill: Began execution')
 | |
| 
 | |
|   while True:
 | |
|     batch_image_ids = list(Image
 | |
|                            .select(Image.id)
 | |
|                            .join(ImageStorage)
 | |
|                            .where(Image.v1_json_metadata >> None, ImageStorage.uploading == False)
 | |
|                            .limit(100))
 | |
| 
 | |
|     if len(batch_image_ids) == 0:
 | |
|       logger.debug('Image v1 metadata backfill: Backfill completed')
 | |
|       return
 | |
| 
 | |
|     logger.debug('Image v1 metadata backfill: Found %s records to update', len(batch_image_ids))
 | |
|     for one_id in batch_image_ids:
 | |
|       with app.config['DB_TRANSACTION_FACTORY'](db):
 | |
|         try:
 | |
|           logger.debug('Loading image: %s', one_id.id)
 | |
| 
 | |
|           raw_query = (ImageStoragePlacement
 | |
|                        .select(ImageStoragePlacement, Image, ImageStorage, ImageStorageLocation)
 | |
|                        .join(ImageStorageLocation)
 | |
|                        .switch(ImageStoragePlacement)
 | |
|                        .join(ImageStorage, JOIN_LEFT_OUTER)
 | |
|                        .join(Image)
 | |
|                        .where(Image.id == one_id.id))
 | |
| 
 | |
|           placement_query = db_for_update(raw_query)
 | |
| 
 | |
|           repo_image_list = model.image.invert_placement_query_results(placement_query)
 | |
|           if len(repo_image_list) > 1:
 | |
|             logger.error('Found more images than we requested, something is wrong with the query')
 | |
|             return
 | |
| 
 | |
|           repo_image = repo_image_list[0]
 | |
|           uuid = repo_image.storage.uuid
 | |
|           json_path = image_json_path(uuid)
 | |
| 
 | |
|           logger.debug('Updating image: %s from: %s', repo_image.id, json_path)
 | |
|           try:
 | |
|             data = storage.get_content(repo_image.storage.locations, json_path)
 | |
|           except IOError:
 | |
|             data = "{}"
 | |
|             logger.warning('failed to find v1 metadata, defaulting to {}')
 | |
|           repo_image.v1_json_metadata = data
 | |
|           repo_image.save()
 | |
|         except ImageStoragePlacement.DoesNotExist:
 | |
|           pass
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|   logging.basicConfig(level=logging.DEBUG)
 | |
|   # logging.getLogger('peewee').setLevel(logging.CRITICAL)
 | |
|   backfill_v1_metadata()
 |