Add a uniqueness hash to derived image storage to break caching over tags

This allows converted ACIs and squashed images to be unique based on the specified tag.

Fixes #92
This commit is contained in:
Joseph Schorr 2016-06-06 15:38:29 -04:00
parent a33a70a419
commit a43b741f1b
7 changed files with 119 additions and 40 deletions

View file

@ -617,12 +617,13 @@ class DerivedStorageForImage(BaseModel):
source_image = ForeignKeyField(Image)
derivative = ForeignKeyField(ImageStorage)
transformation = ForeignKeyField(ImageStorageTransformation)
uniqueness_hash = CharField(null=True)
class Meta:
database = db
read_slaves = (read_slave,)
indexes = (
(('source_image', 'transformation'), True),
(('source_image', 'transformation', 'uniqueness_hash'), True),
)

View file

@ -0,0 +1,29 @@
"""Add uniqueness hash column for derived image storage
Revision ID: 1093d8b212bb
Revises: 0f17d94d11eb
Create Date: 2016-06-06 15:27:21.735669
"""
# revision identifiers, used by Alembic.
revision = '1093d8b212bb'
down_revision = '0f17d94d11eb'
from alembic import op
import sqlalchemy as sa
def upgrade(tables):
### commands auto generated by Alembic - please adjust! ###
op.drop_index('derivedstorageforimage_source_image_id_transformation_id', table_name='derivedstorageforimage')
op.add_column('derivedstorageforimage', sa.Column('uniqueness_hash', sa.String(length=255), nullable=True))
op.create_index('uniqueness_index', 'derivedstorageforimage', ['source_image_id', 'transformation_id', 'uniqueness_hash'], unique=True)
### end Alembic commands ###
def downgrade(tables):
### commands auto generated by Alembic - please adjust! ###
op.drop_index('uniqueness_index', table_name='derivedstorageforimage')
op.drop_column('derivedstorageforimage', 'uniqueness_hash')
op.create_index('derivedstorageforimage_source_image_id_transformation_id', 'derivedstorageforimage', ['source_image_id', 'transformation_id'], unique=True)
### end Alembic commands ###

View file

@ -1,6 +1,7 @@
import logging
import dateutil.parser
import random
import hashlib
import json
from peewee import JOIN_LEFT_OUTER, IntegrityError
from datetime import datetime
@ -11,6 +12,7 @@ from data.database import (Image, Repository, ImageStoragePlacement, Namespace,
ImageStorageLocation, RepositoryPermission, DerivedStorageForImage,
ImageStorageTransformation, db_random_func)
from util.canonicaljson import canonicalize
logger = logging.getLogger(__name__)
@ -458,27 +460,39 @@ def set_secscan_status(image, indexed, version):
.execute()) != 0
def find_or_create_derived_storage(source_image, transformation_name, preferred_location):
existing = find_derived_storage_for_image(source_image, transformation_name)
def _get_uniqueness_hash(varying_metadata):
if not varying_metadata:
return None
return hashlib.sha256(json.dumps(canonicalize(varying_metadata))).hexdigest()
def find_or_create_derived_storage(source_image, transformation_name, preferred_location,
varying_metadata=None):
existing = find_derived_storage_for_image(source_image, transformation_name, varying_metadata)
if existing is not None:
return existing
logger.debug('Creating storage dervied from source image: %s', source_image.id)
uniqueness_hash = _get_uniqueness_hash(varying_metadata)
trans = ImageStorageTransformation.get(name=transformation_name)
new_storage = storage.create_v1_storage(preferred_location)
DerivedStorageForImage.create(source_image=source_image, derivative=new_storage,
transformation=trans)
transformation=trans, uniqueness_hash=uniqueness_hash)
return new_storage
def find_derived_storage_for_image(source_image, transformation_name):
def find_derived_storage_for_image(source_image, transformation_name, varying_metadata=None):
uniqueness_hash = _get_uniqueness_hash(varying_metadata)
try:
found = (ImageStorage
.select(ImageStorage, DerivedStorageForImage)
.join(DerivedStorageForImage)
.join(ImageStorageTransformation)
.where(DerivedStorageForImage.source_image == source_image,
ImageStorageTransformation.name == transformation_name)
ImageStorageTransformation.name == transformation_name,
DerivedStorageForImage.uniqueness_hash == uniqueness_hash)
.get())
found.locations = {placement.location.name for placement in found.imagestorageplacement_set}