Implement a manifest builder, to allow for easier management of state around constructing manifests

This commit is contained in:
Joseph Schorr 2018-09-17 17:02:49 -04:00
parent 1224930af8
commit 65d5be23c7
5 changed files with 326 additions and 5 deletions

View file

@ -366,6 +366,13 @@ def get_image(repo, docker_image_id):
return None
def get_image_by_db_id(id):
try:
return Image.get(id=id)
except Image.DoesNotExist:
return None
def synthesize_v1_image(repo, image_storage_id, storage_image_size, docker_image_id,
created_date_str, comment, command, v1_json_metadata, parent_image=None):
""" Find an existing image with this docker image id, and if none exists, write one with the

View file

@ -50,3 +50,16 @@ def requiresinput(input_name):
return wrapper
return inner
def optionalinput(input_name):
""" Marks a property on the data type as having an input be optional when invoked. """
def inner(func):
@wraps(func)
def wrapper(self, *args, **kwargs):
kwargs[input_name] = self._inputs.get(input_name)
result = func(self, *args, **kwargs)
return result
return wrapper
return inner

View file

@ -6,7 +6,7 @@ from enum import Enum, unique
from cachetools import lru_cache
from data import model
from data.registry_model.datatype import datatype, requiresinput
from data.registry_model.datatype import datatype, requiresinput, optionalinput
from image.docker.schema1 import DockerSchema1Manifest
@ -17,17 +17,41 @@ class RepositoryReference(datatype('Repository', [])):
if repo_obj is None:
return None
return RepositoryReference(db_id=repo_obj.id)
return RepositoryReference(db_id=repo_obj.id,
inputs=dict(
kind=model.repository.get_repo_kind_name(repo_obj),
is_public=model.repository.is_repository_public(repo_obj)
))
@classmethod
def for_id(cls, repo_id):
return RepositoryReference(db_id=repo_id)
return RepositoryReference(db_id=repo_id, inputs=dict(kind=None, is_public=None))
@property
@lru_cache(maxsize=1)
def _repository_obj(self):
return model.repository.lookup_repository(self._db_id)
@property
@optionalinput('kind')
def kind(self, kind):
""" Returns the kind of the repository. """
return kind or model.repository.get_repo_kind_name(self._repositry_obj)
@property
@optionalinput('is_public')
def is_public(self, is_public):
""" Returns whether the repository is public. """
if is_public is not None:
return is_public
return model.repository.is_repository_public(self._repository_obj)
@property
def id(self):
""" Returns the database ID of the repository. """
return self._db_id
@property
def namespace_name(self):
""" Returns the namespace name of this repository.
@ -119,7 +143,8 @@ class Manifest(datatype('Manifest', ['digest', 'manifest_bytes'])):
class LegacyImage(datatype('LegacyImage', ['docker_image_id', 'created', 'comment', 'command',
'image_size', 'aggregate_size', 'uploading'])):
'image_size', 'aggregate_size', 'uploading',
'v1_metadata_string'])):
""" LegacyImage represents a Docker V1-style image found in a repository. """
@classmethod
def for_image(cls, image, images_map=None, tags_map=None, blob=None):
@ -134,6 +159,7 @@ class LegacyImage(datatype('LegacyImage', ['docker_image_id', 'created', 'commen
created=image.created,
comment=image.comment,
command=image.command,
v1_metadata_string=image.v1_json_metadata,
image_size=image.storage.image_size,
aggregate_size=image.aggregate_size,
uploading=image.storage.uploading)
@ -143,7 +169,8 @@ class LegacyImage(datatype('LegacyImage', ['docker_image_id', 'created', 'commen
@requiresinput('ancestor_id_list')
def parents(self, images_map, ancestor_id_list):
""" Returns the parent images for this image. Raises an exception if the parents have
not been loaded before this property is invoked.
not been loaded before this property is invoked. Parents are returned starting at the
leaf image.
"""
return [LegacyImage.for_image(images_map[ancestor_id], images_map=images_map)
for ancestor_id in reversed(ancestor_id_list)

View file

@ -0,0 +1,180 @@
import json
import uuid
from collections import namedtuple
from flask import session
from data import model
from data.database import db_transaction
from data.registry_model import registry_model
ManifestLayer = namedtuple('ManifestLayer', ['layer_id', 'v1_metadata', 'db_id'])
_BuilderState = namedtuple('_BuilderState', ['builder_id', 'images', 'tags', 'checksums'])
_SESSION_KEY = '__manifestbuilder'
def create_manifest_builder(repository_ref):
""" Creates a new manifest builder for populating manifests under the specified repository
and returns it. Returns None if the builder could not be constructed.
"""
builder_id = str(uuid.uuid4())
builder = _ManifestBuilder(repository_ref, _BuilderState(builder_id, {}, {}, {}))
builder._save_to_session()
return builder
def lookup_manifest_builder(repository_ref, builder_id):
""" Looks up the manifest builder with the given ID under the specified repository and returns
it or None if none.
"""
builder_state_tuple = session.get(_SESSION_KEY)
if builder_state_tuple is None:
return None
builder_state = _BuilderState(*builder_state_tuple)
if builder_state.builder_id != builder_id:
return None
return _ManifestBuilder(repository_ref, builder_state)
class _ManifestBuilder(object):
""" Helper class which provides an interface for bookkeeping the layers and configuration of
manifests being constructed.
"""
def __init__(self, repository_ref, builder_state):
self._repository_ref = repository_ref
self._builder_state = builder_state
@property
def builder_id(self):
""" Returns the unique ID for this builder. """
return self._builder_state.builder_id
@property
def committed_tags(self):
""" Returns the tags committed by this builder, if any. """
return [registry_model.get_repo_tag(self._repository_ref, tag_name, include_legacy_image=True)
for tag_name in self._builder_state.tags.keys()]
def start_layer(self, layer_id, v1_metadata, location_name, calling_user, temp_tag_expiration):
""" Starts a new layer with the given ID to be placed into a manifest. Returns the layer
started or None if an error occurred.
"""
# Ensure the repository still exists.
repository = model.repository.lookup_repository(self._repository_ref._db_id)
if repository is None:
return None
namespace_name = repository.namespace_user.username
repo_name = repository.name
# Sanity check that the ID matches the v1 metadata.
if layer_id != v1_metadata['id']:
return None
# Ensure the parent already exists in the repository.
parent_id = v1_metadata.get('parent', None)
parent_image = None
if parent_id is not None:
parent_image = model.image.get_repo_image(namespace_name, repo_name, parent_id)
if parent_image is None:
return None
# Check to see if this layer already exists in the repository. If so, we can skip the creation.
existing_image = registry_model.get_legacy_image(self._repository_ref, layer_id)
if existing_image is not None:
self._builder_state.images[layer_id] = existing_image.id
self._save_to_session()
return ManifestLayer(layer_id, v1_metadata, existing_image.id)
with db_transaction():
# Otherwise, create a new legacy image and point a temporary tag at it.
created = model.image.find_create_or_link_image(layer_id, repository, calling_user, {},
location_name)
model.tag.create_temporary_hidden_tag(repository, created, temp_tag_expiration)
# Mark the image as uploading.
created.storage.uploading = True
created.storage.save()
# Save its V1 metadata.
command_list = v1_metadata.get('container_config', {}).get('Cmd', None)
command = json.dumps(command_list) if command_list else None
model.image.set_image_metadata(layer_id, namespace_name, repo_name,
v1_metadata.get('created'),
v1_metadata.get('comment'),
command, json.dumps(v1_metadata),
parent=parent_image)
# Save the changes to the builder.
self._builder_state.images[layer_id] = created.id
self._save_to_session()
return ManifestLayer(layer_id, v1_metadata, created.id)
def lookup_layer(self, layer_id):
""" Returns a layer with the given ID under this builder. If none exists, returns None. """
if layer_id not in self._builder_state.images:
return None
image = model.image.get_image_by_db_id(self._builder_state.images[layer_id])
if image is None:
return None
return ManifestLayer(layer_id, json.loads(image.v1_json_metadata), image.id)
def assign_layer_blob(self, layer, blob, computed_checksums):
""" Assigns a blob to a layer. """
assert blob
repo_image = model.image.get_image_by_db_id(layer.db_id)
if repo_image is None:
return None
with db_transaction():
existing_storage = repo_image.storage
repo_image.storage = blob._db_id
repo_image.save()
existing_storage.delete_instance(recursive=True)
self._builder_state.checksums[layer.layer_id] = computed_checksums
self._save_to_session()
return True
def validate_layer_checksum(self, layer, checksum):
""" Returns whether the checksum for a layer matches that specified.
"""
return checksum in self._builder_state.checksums.get(layer.layer_id)
def commit_tag_and_manifest(self, tag_name, layer):
""" Commits a new tag + manifest for that tag to the repository with the given name,
pointing to the given layer.
"""
legacy_image = registry_model.get_legacy_image(self._repository_ref, layer.layer_id)
if legacy_image is None:
return None
tag = registry_model.retarget_tag(self._repository_ref, tag_name, legacy_image)
if tag is None:
return None
self._builder_state.tags[tag_name] = tag._db_id
self._save_to_session()
return tag
def done(self):
""" Marks the manifest builder as complete and disposes of any state. This call is optional
and it is expected manifest builders will eventually time out if unusued for an
extended period of time.
"""
session.pop(_SESSION_KEY, None)
def _save_to_session(self):
session[_SESSION_KEY] = self._builder_state

View file

@ -0,0 +1,94 @@
import hashlib
from io import BytesIO
import pytest
from mock import patch
from data.registry_model.blobuploader import BlobUploadSettings, upload_blob
from data.registry_model.manifestbuilder import create_manifest_builder, lookup_manifest_builder
from data.registry_model.registry_pre_oci_model import PreOCIModel
from storage.distributedstorage import DistributedStorage
from storage.fakestorage import FakeStorage
from test.fixtures import *
@pytest.fixture()
def pre_oci_model(initialized_db):
return PreOCIModel()
@pytest.fixture()
def fake_session():
with patch('data.registry_model.manifestbuilder.session', {}):
yield
@pytest.mark.parametrize('layers', [
pytest.param([('someid', None, 'some data')], id='Single layer'),
pytest.param([('parentid', None, 'some parent data'),
('someid', 'parentid', 'some data')],
id='Multi layer'),
])
def test_build_manifest(layers, fake_session, pre_oci_model):
repository_ref = pre_oci_model.lookup_repository('devtable', 'complex')
storage = DistributedStorage({'local_us': FakeStorage(None)}, ['local_us'])
settings = BlobUploadSettings('2M', 512 * 1024, 3600)
app_config = {'TESTING': True}
builder = create_manifest_builder(repository_ref)
assert lookup_manifest_builder(repository_ref, 'anotherid') is None
assert lookup_manifest_builder(repository_ref, builder.builder_id) is not None
blobs_by_layer = {}
for layer_id, parent_id, layer_bytes in layers:
# Start a new layer.
assert builder.start_layer(layer_id, {'id': layer_id, 'parent': parent_id}, 'local_us', None,
60)
checksum = hashlib.sha1(layer_bytes).hexdigest()
# Assign it a blob.
with upload_blob(repository_ref, storage, settings) as uploader:
uploader.upload_chunk(app_config, BytesIO(layer_bytes))
blob = uploader.commit_to_blob(app_config)
blobs_by_layer[layer_id] = blob
builder.assign_layer_blob(builder.lookup_layer(layer_id), blob, [checksum])
# Validate the checksum.
assert builder.validate_layer_checksum(builder.lookup_layer(layer_id), checksum)
# Commit the manifest to a tag.
tag = builder.commit_tag_and_manifest('somenewtag', builder.lookup_layer(layers[-1][0]))
assert tag
assert tag in builder.committed_tags
# Verify the legacy image for the tag.
found = pre_oci_model.get_repo_tag(repository_ref, 'somenewtag', include_legacy_image=True)
assert found
assert found.name == 'somenewtag'
assert found.legacy_image.docker_image_id == layers[-1][0]
# Verify the blob and manifest.
manifest = pre_oci_model.get_manifest_for_tag(found)
assert manifest
parsed = manifest.get_parsed_manifest()
assert len(list(parsed.layers)) == len(layers)
for index, (layer_id, parent_id, layer_bytes) in enumerate(layers):
assert list(parsed.blob_digests)[index] == blobs_by_layer[layer_id].digest
assert list(parsed.layers)[index].v1_metadata.image_id == layer_id
assert list(parsed.layers)[index].v1_metadata.parent_image_id == parent_id
assert parsed.leaf_layer_v1_image_id == layers[-1][0]
def test_build_manifest_missing_parent(fake_session, pre_oci_model):
repository_ref = pre_oci_model.lookup_repository('devtable', 'complex')
builder = create_manifest_builder(repository_ref)
assert builder.start_layer('somelayer', {'id': 'somelayer', 'parent': 'someparent'},
'local_us', None, 60) is None