Add a uniqueness hash to derived image storage to break caching over tags
This allows converted ACIs and squashed images to be unique based on the specified tag. Fixes #92
This commit is contained in:
parent
a33a70a419
commit
a43b741f1b
7 changed files with 119 additions and 40 deletions
|
@ -617,12 +617,13 @@ class DerivedStorageForImage(BaseModel):
|
||||||
source_image = ForeignKeyField(Image)
|
source_image = ForeignKeyField(Image)
|
||||||
derivative = ForeignKeyField(ImageStorage)
|
derivative = ForeignKeyField(ImageStorage)
|
||||||
transformation = ForeignKeyField(ImageStorageTransformation)
|
transformation = ForeignKeyField(ImageStorageTransformation)
|
||||||
|
uniqueness_hash = CharField(null=True)
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
database = db
|
database = db
|
||||||
read_slaves = (read_slave,)
|
read_slaves = (read_slave,)
|
||||||
indexes = (
|
indexes = (
|
||||||
(('source_image', 'transformation'), True),
|
(('source_image', 'transformation', 'uniqueness_hash'), True),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,29 @@
|
||||||
|
"""Add uniqueness hash column for derived image storage
|
||||||
|
|
||||||
|
Revision ID: 1093d8b212bb
|
||||||
|
Revises: 0f17d94d11eb
|
||||||
|
Create Date: 2016-06-06 15:27:21.735669
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision = '1093d8b212bb'
|
||||||
|
down_revision = '0f17d94d11eb'
|
||||||
|
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
|
||||||
|
def upgrade(tables):
|
||||||
|
### commands auto generated by Alembic - please adjust! ###
|
||||||
|
op.drop_index('derivedstorageforimage_source_image_id_transformation_id', table_name='derivedstorageforimage')
|
||||||
|
op.add_column('derivedstorageforimage', sa.Column('uniqueness_hash', sa.String(length=255), nullable=True))
|
||||||
|
op.create_index('uniqueness_index', 'derivedstorageforimage', ['source_image_id', 'transformation_id', 'uniqueness_hash'], unique=True)
|
||||||
|
### end Alembic commands ###
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade(tables):
|
||||||
|
### commands auto generated by Alembic - please adjust! ###
|
||||||
|
op.drop_index('uniqueness_index', table_name='derivedstorageforimage')
|
||||||
|
op.drop_column('derivedstorageforimage', 'uniqueness_hash')
|
||||||
|
op.create_index('derivedstorageforimage_source_image_id_transformation_id', 'derivedstorageforimage', ['source_image_id', 'transformation_id'], unique=True)
|
||||||
|
### end Alembic commands ###
|
|
@ -1,6 +1,7 @@
|
||||||
import logging
|
import logging
|
||||||
import dateutil.parser
|
import dateutil.parser
|
||||||
import random
|
import hashlib
|
||||||
|
import json
|
||||||
|
|
||||||
from peewee import JOIN_LEFT_OUTER, IntegrityError
|
from peewee import JOIN_LEFT_OUTER, IntegrityError
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
@ -11,6 +12,7 @@ from data.database import (Image, Repository, ImageStoragePlacement, Namespace,
|
||||||
ImageStorageLocation, RepositoryPermission, DerivedStorageForImage,
|
ImageStorageLocation, RepositoryPermission, DerivedStorageForImage,
|
||||||
ImageStorageTransformation, db_random_func)
|
ImageStorageTransformation, db_random_func)
|
||||||
|
|
||||||
|
from util.canonicaljson import canonicalize
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@ -458,27 +460,39 @@ def set_secscan_status(image, indexed, version):
|
||||||
.execute()) != 0
|
.execute()) != 0
|
||||||
|
|
||||||
|
|
||||||
def find_or_create_derived_storage(source_image, transformation_name, preferred_location):
|
def _get_uniqueness_hash(varying_metadata):
|
||||||
existing = find_derived_storage_for_image(source_image, transformation_name)
|
if not varying_metadata:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return hashlib.sha256(json.dumps(canonicalize(varying_metadata))).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def find_or_create_derived_storage(source_image, transformation_name, preferred_location,
|
||||||
|
varying_metadata=None):
|
||||||
|
existing = find_derived_storage_for_image(source_image, transformation_name, varying_metadata)
|
||||||
if existing is not None:
|
if existing is not None:
|
||||||
return existing
|
return existing
|
||||||
|
|
||||||
logger.debug('Creating storage dervied from source image: %s', source_image.id)
|
logger.debug('Creating storage dervied from source image: %s', source_image.id)
|
||||||
|
uniqueness_hash = _get_uniqueness_hash(varying_metadata)
|
||||||
trans = ImageStorageTransformation.get(name=transformation_name)
|
trans = ImageStorageTransformation.get(name=transformation_name)
|
||||||
new_storage = storage.create_v1_storage(preferred_location)
|
new_storage = storage.create_v1_storage(preferred_location)
|
||||||
DerivedStorageForImage.create(source_image=source_image, derivative=new_storage,
|
DerivedStorageForImage.create(source_image=source_image, derivative=new_storage,
|
||||||
transformation=trans)
|
transformation=trans, uniqueness_hash=uniqueness_hash)
|
||||||
return new_storage
|
return new_storage
|
||||||
|
|
||||||
|
|
||||||
def find_derived_storage_for_image(source_image, transformation_name):
|
def find_derived_storage_for_image(source_image, transformation_name, varying_metadata=None):
|
||||||
|
uniqueness_hash = _get_uniqueness_hash(varying_metadata)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
found = (ImageStorage
|
found = (ImageStorage
|
||||||
.select(ImageStorage, DerivedStorageForImage)
|
.select(ImageStorage, DerivedStorageForImage)
|
||||||
.join(DerivedStorageForImage)
|
.join(DerivedStorageForImage)
|
||||||
.join(ImageStorageTransformation)
|
.join(ImageStorageTransformation)
|
||||||
.where(DerivedStorageForImage.source_image == source_image,
|
.where(DerivedStorageForImage.source_image == source_image,
|
||||||
ImageStorageTransformation.name == transformation_name)
|
ImageStorageTransformation.name == transformation_name,
|
||||||
|
DerivedStorageForImage.uniqueness_hash == uniqueness_hash)
|
||||||
.get())
|
.get())
|
||||||
|
|
||||||
found.locations = {placement.location.name for placement in found.imagestorageplacement_set}
|
found.locations = {placement.location.name for placement in found.imagestorageplacement_set}
|
||||||
|
|
|
@ -171,7 +171,8 @@ def _torrent_repo_verb(repo_image, tag, verb, **kwargs):
|
||||||
|
|
||||||
# Lookup an *existing* derived storage for the verb. If the verb's image storage doesn't exist,
|
# Lookup an *existing* derived storage for the verb. If the verb's image storage doesn't exist,
|
||||||
# we cannot create it here, so we 406.
|
# we cannot create it here, so we 406.
|
||||||
derived = model.image.find_derived_storage_for_image(repo_image, verb)
|
derived = model.image.find_derived_storage_for_image(repo_image, verb,
|
||||||
|
varying_metadata={'tag': tag})
|
||||||
if not derived:
|
if not derived:
|
||||||
abort(406)
|
abort(406)
|
||||||
|
|
||||||
|
@ -221,7 +222,8 @@ def _repo_verb_signature(namespace, repository, tag, verb, checker=None, **kwarg
|
||||||
(repo_image, _, _) = result
|
(repo_image, _, _) = result
|
||||||
|
|
||||||
# Lookup the derived image storage for the verb.
|
# Lookup the derived image storage for the verb.
|
||||||
derived = model.image.find_derived_storage_for_image(repo_image, verb)
|
derived = model.image.find_derived_storage_for_image(repo_image, verb,
|
||||||
|
varying_metadata={'tag': tag})
|
||||||
if derived is None or derived.uploading:
|
if derived is None or derived.uploading:
|
||||||
return make_response('', 202)
|
return make_response('', 202)
|
||||||
|
|
||||||
|
@ -253,7 +255,8 @@ def _repo_verb(namespace, repository, tag, verb, formatter, sign=False, checker=
|
||||||
|
|
||||||
# Lookup/create the derived image storage for the verb and repo image.
|
# Lookup/create the derived image storage for the verb and repo image.
|
||||||
derived = model.image.find_or_create_derived_storage(repo_image, verb,
|
derived = model.image.find_or_create_derived_storage(repo_image, verb,
|
||||||
storage.preferred_locations[0])
|
storage.preferred_locations[0],
|
||||||
|
varying_metadata={'tag': tag})
|
||||||
|
|
||||||
if not derived.uploading:
|
if not derived.uploading:
|
||||||
logger.debug('Derived %s image %s exists in storage', verb, derived.uuid)
|
logger.debug('Derived %s image %s exists in storage', verb, derived.uuid)
|
||||||
|
|
|
@ -12,7 +12,7 @@ import gpgme
|
||||||
|
|
||||||
import Crypto.Random
|
import Crypto.Random
|
||||||
from cachetools import lru_cache
|
from cachetools import lru_cache
|
||||||
from flask import request, jsonify, abort
|
from flask import request, jsonify
|
||||||
from flask.blueprints import Blueprint
|
from flask.blueprints import Blueprint
|
||||||
from flask.ext.testing import LiveServerTestCase
|
from flask.ext.testing import LiveServerTestCase
|
||||||
|
|
||||||
|
@ -342,7 +342,7 @@ class V1RegistryPushMixin(V1RegistryMixin):
|
||||||
push_version = 'v1'
|
push_version = 'v1'
|
||||||
|
|
||||||
def do_push(self, namespace, repository, username, password, images=None, expect_failure=None,
|
def do_push(self, namespace, repository, username, password, images=None, expect_failure=None,
|
||||||
munge_shas=False):
|
munge_shas=False, tag_names=None):
|
||||||
images = images or self._get_default_images()
|
images = images or self._get_default_images()
|
||||||
auth = (username, password)
|
auth = (username, password)
|
||||||
repo_name = _get_repo_name(namespace, repository)
|
repo_name = _get_repo_name(namespace, repository)
|
||||||
|
@ -361,7 +361,6 @@ class V1RegistryPushMixin(V1RegistryMixin):
|
||||||
|
|
||||||
for image_data in images:
|
for image_data in images:
|
||||||
image_id = image_data['id']
|
image_id = image_data['id']
|
||||||
last_image_id = image_id
|
|
||||||
|
|
||||||
# PUT /v1/images/{imageID}/json
|
# PUT /v1/images/{imageID}/json
|
||||||
image_json_data = {'id': image_id}
|
image_json_data = {'id': image_id}
|
||||||
|
@ -385,9 +384,10 @@ class V1RegistryPushMixin(V1RegistryMixin):
|
||||||
headers={'X-Docker-Checksum-Payload': checksum},
|
headers={'X-Docker-Checksum-Payload': checksum},
|
||||||
auth='sig')
|
auth='sig')
|
||||||
|
|
||||||
|
|
||||||
# PUT /v1/repositories/{namespace}/{repository}/tags/latest
|
# PUT /v1/repositories/{namespace}/{repository}/tags/latest
|
||||||
self.do_tag(namespace, repository, 'latest', images[-1]['id'])
|
tag_names = tag_names or ['latest']
|
||||||
|
for tag_name in tag_names:
|
||||||
|
self.do_tag(namespace, repository, tag_name, images[-1]['id'])
|
||||||
|
|
||||||
# PUT /v1/repositories/{namespace}/{repository}/images
|
# PUT /v1/repositories/{namespace}/{repository}/images
|
||||||
self.conduct('PUT', '/v1/repositories/%s/images' % repo_name,
|
self.conduct('PUT', '/v1/repositories/%s/images' % repo_name,
|
||||||
|
@ -1500,19 +1500,21 @@ class TorrentV2PushTests(RegistryTestCaseMixin, TorrentTestMixin, V2RegistryPush
|
||||||
class ACIConversionTests(RegistryTestCaseMixin, V1RegistryPushMixin, LiveServerTestCase):
|
class ACIConversionTests(RegistryTestCaseMixin, V1RegistryPushMixin, LiveServerTestCase):
|
||||||
""" Tests for registry ACI conversion. """
|
""" Tests for registry ACI conversion. """
|
||||||
|
|
||||||
def get_converted_image(self):
|
def get_converted_image(self, tag_name='latest'):
|
||||||
response = self.conduct('GET', '/c1/aci/localhost:5000/devtable/newrepo/latest/aci/linux/amd64/', auth='sig')
|
url = '/c1/aci/localhost:5000/devtable/newrepo/' + tag_name + '/aci/linux/amd64/'
|
||||||
|
response = self.conduct('GET', url, auth='sig')
|
||||||
tar = tarfile.open(fileobj=StringIO(response.content))
|
tar = tarfile.open(fileobj=StringIO(response.content))
|
||||||
return tar, response.content
|
return tar, response.content
|
||||||
|
|
||||||
def get_converted_signature(self):
|
def get_converted_signature(self, tag_name='latest'):
|
||||||
counter = 0
|
counter = 0
|
||||||
|
|
||||||
# Give time for the signature to be written before continuing. As we don't exactly know when
|
# Give time for the signature to be written before continuing. As we don't exactly know when
|
||||||
# this is (based on CPU conditions when the test is being run), we try a backoff and sleep
|
# this is (based on CPU conditions when the test is being run), we try a backoff and sleep
|
||||||
# approach.
|
# approach.
|
||||||
while counter < 10:
|
while counter < 10:
|
||||||
response = self.conduct('GET', '/c1/aci/localhost:5000/devtable/newrepo/latest/aci.asc/linux/amd64/', auth='sig', expected_code=None)
|
url = '/c1/aci/localhost:5000/devtable/newrepo/' + tag_name + '/aci.asc/linux/amd64/'
|
||||||
|
response = self.conduct('GET', url, auth='sig', expected_code=None)
|
||||||
if response.status_code == 202 or response.status_code == 404:
|
if response.status_code == 202 or response.status_code == 404:
|
||||||
counter += 1
|
counter += 1
|
||||||
time.sleep(counter * 2)
|
time.sleep(counter * 2)
|
||||||
|
@ -1574,7 +1576,8 @@ class ACIConversionTests(RegistryTestCaseMixin, V1RegistryPushMixin, LiveServerT
|
||||||
"annotations": [
|
"annotations": [
|
||||||
{"name": "created", "value": ""},
|
{"name": "created", "value": ""},
|
||||||
{"name": "homepage", "value": "http://localhost:5000/devtable/newrepo:latest"},
|
{"name": "homepage", "value": "http://localhost:5000/devtable/newrepo:latest"},
|
||||||
{"name": "quay.io/derived-image", "value": "fa916d5ca4da5348628dfffcfc943288a0cca521cd21a6d2981a85ec1d7f7a3a"}
|
{"name": "quay.io/derived-image",
|
||||||
|
"value": "fa916d5ca4da5348628dfffcfc943288a0cca521cd21a6d2981a85ec1d7f7a3a"}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"labels": [
|
"labels": [
|
||||||
|
@ -1609,6 +1612,36 @@ class ACIConversionTests(RegistryTestCaseMixin, V1RegistryPushMixin, LiveServerT
|
||||||
self._verify_signature(signature_again, converted)
|
self._verify_signature(signature_again, converted)
|
||||||
self._verify_signature(signature_again, converted_again)
|
self._verify_signature(signature_again, converted_again)
|
||||||
|
|
||||||
|
def assertHasDerivedImage(self, manifest, expected):
|
||||||
|
for annotation in manifest['app']['annotations']:
|
||||||
|
if annotation['name'] == 'homepage':
|
||||||
|
self.assertEqual(expected, annotation['value'])
|
||||||
|
return
|
||||||
|
|
||||||
|
self.fail('Derived image annotation not found in metadata')
|
||||||
|
|
||||||
|
def test_conversion_different_tags(self):
|
||||||
|
initial_images = [
|
||||||
|
{
|
||||||
|
'id': 'initialid',
|
||||||
|
'contents': 'the initial image',
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
# Create the repo.
|
||||||
|
self.do_push('devtable', 'newrepo', 'devtable', 'password', images=initial_images,
|
||||||
|
tag_names=['latest', 'sometag'])
|
||||||
|
|
||||||
|
# Pull the squashed version of tag latest.
|
||||||
|
latest_tar, _ = self.get_converted_image(tag_name='latest')
|
||||||
|
latest_manifest = json.loads(latest_tar.extractfile(latest_tar.getmember('manifest')).read())
|
||||||
|
self.assertHasDerivedImage(latest_manifest, 'http://localhost:5000/devtable/newrepo:latest')
|
||||||
|
|
||||||
|
# Pull the squashed version of tag sometag.
|
||||||
|
sometag_tar, _ = self.get_converted_image(tag_name='sometag')
|
||||||
|
sometag_manifest = json.loads(sometag_tar.extractfile(sometag_tar.getmember('manifest')).read())
|
||||||
|
self.assertHasDerivedImage(sometag_manifest, 'http://localhost:5000/devtable/newrepo:sometag')
|
||||||
|
|
||||||
|
|
||||||
def test_multilayer_conversion(self):
|
def test_multilayer_conversion(self):
|
||||||
images = [
|
images = [
|
||||||
|
|
18
util/canonicaljson.py
Normal file
18
util/canonicaljson.py
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
import collections
|
||||||
|
|
||||||
|
def canonicalize(json_obj):
|
||||||
|
"""This function canonicalizes a Python object that will be serialized as JSON.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
json_obj (object): the Python object that will later be serialized as JSON.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
object: json_obj now sorted to its canonical form.
|
||||||
|
|
||||||
|
"""
|
||||||
|
if isinstance(json_obj, collections.MutableMapping):
|
||||||
|
sorted_obj = sorted({key: canonicalize(val) for key, val in json_obj.items()}.items())
|
||||||
|
return collections.OrderedDict(sorted_obj)
|
||||||
|
elif isinstance(json_obj, (list, tuple)):
|
||||||
|
return [canonicalize(val) for val in json_obj]
|
||||||
|
return json_obj
|
|
@ -1,26 +1,7 @@
|
||||||
import collections
|
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from hashlib import sha256
|
from hashlib import sha256
|
||||||
|
from util.canonicaljson import canonicalize
|
||||||
|
|
||||||
def canonicalize(json_obj):
|
|
||||||
"""This function canonicalizes a Python object that will be serialized as JSON.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
json_obj (object): the Python object that will later be serialized as JSON.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
object: json_obj now sorted to its canonical form.
|
|
||||||
|
|
||||||
"""
|
|
||||||
if isinstance(json_obj, collections.MutableMapping):
|
|
||||||
sorted_obj = sorted({key: canonicalize(val) for key, val in json_obj.items()}.items())
|
|
||||||
return collections.OrderedDict(sorted_obj)
|
|
||||||
elif isinstance(json_obj, (list, tuple)):
|
|
||||||
return [canonicalize(val) for val in json_obj]
|
|
||||||
return json_obj
|
|
||||||
|
|
||||||
|
|
||||||
def canonical_kid(jwk):
|
def canonical_kid(jwk):
|
||||||
"""This function returns the SHA256 hash of a canonical JWK.
|
"""This function returns the SHA256 hash of a canonical JWK.
|
||||||
|
|
Reference in a new issue