From 65d5be23c7002b120a9c14bb0207eaa01d60c227 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Mon, 17 Sep 2018 17:02:49 -0400 Subject: [PATCH] Implement a manifest builder, to allow for easier management of state around constructing manifests --- data/model/image.py | 7 + data/registry_model/datatype.py | 13 ++ data/registry_model/datatypes.py | 37 +++- data/registry_model/manifestbuilder.py | 180 ++++++++++++++++++ .../test/test_manifestbuilder.py | 94 +++++++++ 5 files changed, 326 insertions(+), 5 deletions(-) create mode 100644 data/registry_model/manifestbuilder.py create mode 100644 data/registry_model/test/test_manifestbuilder.py diff --git a/data/model/image.py b/data/model/image.py index 1b29ee33d..972067514 100644 --- a/data/model/image.py +++ b/data/model/image.py @@ -366,6 +366,13 @@ def get_image(repo, docker_image_id): return None +def get_image_by_db_id(id): + try: + return Image.get(id=id) + except Image.DoesNotExist: + return None + + def synthesize_v1_image(repo, image_storage_id, storage_image_size, docker_image_id, created_date_str, comment, command, v1_json_metadata, parent_image=None): """ Find an existing image with this docker image id, and if none exists, write one with the diff --git a/data/registry_model/datatype.py b/data/registry_model/datatype.py index 1183fd1ea..8264f277e 100644 --- a/data/registry_model/datatype.py +++ b/data/registry_model/datatype.py @@ -50,3 +50,16 @@ def requiresinput(input_name): return wrapper return inner + + +def optionalinput(input_name): + """ Marks a property on the data type as having an input be optional when invoked. """ + def inner(func): + @wraps(func) + def wrapper(self, *args, **kwargs): + kwargs[input_name] = self._inputs.get(input_name) + result = func(self, *args, **kwargs) + return result + + return wrapper + return inner diff --git a/data/registry_model/datatypes.py b/data/registry_model/datatypes.py index 3ec0c6f22..d09d92ade 100644 --- a/data/registry_model/datatypes.py +++ b/data/registry_model/datatypes.py @@ -6,7 +6,7 @@ from enum import Enum, unique from cachetools import lru_cache from data import model -from data.registry_model.datatype import datatype, requiresinput +from data.registry_model.datatype import datatype, requiresinput, optionalinput from image.docker.schema1 import DockerSchema1Manifest @@ -17,17 +17,41 @@ class RepositoryReference(datatype('Repository', [])): if repo_obj is None: return None - return RepositoryReference(db_id=repo_obj.id) + return RepositoryReference(db_id=repo_obj.id, + inputs=dict( + kind=model.repository.get_repo_kind_name(repo_obj), + is_public=model.repository.is_repository_public(repo_obj) + )) @classmethod def for_id(cls, repo_id): - return RepositoryReference(db_id=repo_id) + return RepositoryReference(db_id=repo_id, inputs=dict(kind=None, is_public=None)) @property @lru_cache(maxsize=1) def _repository_obj(self): return model.repository.lookup_repository(self._db_id) + @property + @optionalinput('kind') + def kind(self, kind): + """ Returns the kind of the repository. """ + return kind or model.repository.get_repo_kind_name(self._repositry_obj) + + @property + @optionalinput('is_public') + def is_public(self, is_public): + """ Returns whether the repository is public. """ + if is_public is not None: + return is_public + + return model.repository.is_repository_public(self._repository_obj) + + @property + def id(self): + """ Returns the database ID of the repository. """ + return self._db_id + @property def namespace_name(self): """ Returns the namespace name of this repository. @@ -119,7 +143,8 @@ class Manifest(datatype('Manifest', ['digest', 'manifest_bytes'])): class LegacyImage(datatype('LegacyImage', ['docker_image_id', 'created', 'comment', 'command', - 'image_size', 'aggregate_size', 'uploading'])): + 'image_size', 'aggregate_size', 'uploading', + 'v1_metadata_string'])): """ LegacyImage represents a Docker V1-style image found in a repository. """ @classmethod def for_image(cls, image, images_map=None, tags_map=None, blob=None): @@ -134,6 +159,7 @@ class LegacyImage(datatype('LegacyImage', ['docker_image_id', 'created', 'commen created=image.created, comment=image.comment, command=image.command, + v1_metadata_string=image.v1_json_metadata, image_size=image.storage.image_size, aggregate_size=image.aggregate_size, uploading=image.storage.uploading) @@ -143,7 +169,8 @@ class LegacyImage(datatype('LegacyImage', ['docker_image_id', 'created', 'commen @requiresinput('ancestor_id_list') def parents(self, images_map, ancestor_id_list): """ Returns the parent images for this image. Raises an exception if the parents have - not been loaded before this property is invoked. + not been loaded before this property is invoked. Parents are returned starting at the + leaf image. """ return [LegacyImage.for_image(images_map[ancestor_id], images_map=images_map) for ancestor_id in reversed(ancestor_id_list) diff --git a/data/registry_model/manifestbuilder.py b/data/registry_model/manifestbuilder.py new file mode 100644 index 000000000..9b9852b2e --- /dev/null +++ b/data/registry_model/manifestbuilder.py @@ -0,0 +1,180 @@ +import json +import uuid + +from collections import namedtuple + +from flask import session + +from data import model +from data.database import db_transaction +from data.registry_model import registry_model + + +ManifestLayer = namedtuple('ManifestLayer', ['layer_id', 'v1_metadata', 'db_id']) +_BuilderState = namedtuple('_BuilderState', ['builder_id', 'images', 'tags', 'checksums']) + + +_SESSION_KEY = '__manifestbuilder' + + +def create_manifest_builder(repository_ref): + """ Creates a new manifest builder for populating manifests under the specified repository + and returns it. Returns None if the builder could not be constructed. + """ + builder_id = str(uuid.uuid4()) + builder = _ManifestBuilder(repository_ref, _BuilderState(builder_id, {}, {}, {})) + builder._save_to_session() + return builder + + +def lookup_manifest_builder(repository_ref, builder_id): + """ Looks up the manifest builder with the given ID under the specified repository and returns + it or None if none. + """ + builder_state_tuple = session.get(_SESSION_KEY) + if builder_state_tuple is None: + return None + + builder_state = _BuilderState(*builder_state_tuple) + if builder_state.builder_id != builder_id: + return None + + return _ManifestBuilder(repository_ref, builder_state) + + +class _ManifestBuilder(object): + """ Helper class which provides an interface for bookkeeping the layers and configuration of + manifests being constructed. + """ + def __init__(self, repository_ref, builder_state): + self._repository_ref = repository_ref + self._builder_state = builder_state + + @property + def builder_id(self): + """ Returns the unique ID for this builder. """ + return self._builder_state.builder_id + + @property + def committed_tags(self): + """ Returns the tags committed by this builder, if any. """ + return [registry_model.get_repo_tag(self._repository_ref, tag_name, include_legacy_image=True) + for tag_name in self._builder_state.tags.keys()] + + def start_layer(self, layer_id, v1_metadata, location_name, calling_user, temp_tag_expiration): + """ Starts a new layer with the given ID to be placed into a manifest. Returns the layer + started or None if an error occurred. + """ + # Ensure the repository still exists. + repository = model.repository.lookup_repository(self._repository_ref._db_id) + if repository is None: + return None + + namespace_name = repository.namespace_user.username + repo_name = repository.name + + # Sanity check that the ID matches the v1 metadata. + if layer_id != v1_metadata['id']: + return None + + # Ensure the parent already exists in the repository. + parent_id = v1_metadata.get('parent', None) + parent_image = None + + if parent_id is not None: + parent_image = model.image.get_repo_image(namespace_name, repo_name, parent_id) + if parent_image is None: + return None + + # Check to see if this layer already exists in the repository. If so, we can skip the creation. + existing_image = registry_model.get_legacy_image(self._repository_ref, layer_id) + if existing_image is not None: + self._builder_state.images[layer_id] = existing_image.id + self._save_to_session() + return ManifestLayer(layer_id, v1_metadata, existing_image.id) + + with db_transaction(): + # Otherwise, create a new legacy image and point a temporary tag at it. + created = model.image.find_create_or_link_image(layer_id, repository, calling_user, {}, + location_name) + model.tag.create_temporary_hidden_tag(repository, created, temp_tag_expiration) + + # Mark the image as uploading. + created.storage.uploading = True + created.storage.save() + + # Save its V1 metadata. + command_list = v1_metadata.get('container_config', {}).get('Cmd', None) + command = json.dumps(command_list) if command_list else None + + model.image.set_image_metadata(layer_id, namespace_name, repo_name, + v1_metadata.get('created'), + v1_metadata.get('comment'), + command, json.dumps(v1_metadata), + parent=parent_image) + + # Save the changes to the builder. + self._builder_state.images[layer_id] = created.id + self._save_to_session() + + return ManifestLayer(layer_id, v1_metadata, created.id) + + def lookup_layer(self, layer_id): + """ Returns a layer with the given ID under this builder. If none exists, returns None. """ + if layer_id not in self._builder_state.images: + return None + + image = model.image.get_image_by_db_id(self._builder_state.images[layer_id]) + if image is None: + return None + + return ManifestLayer(layer_id, json.loads(image.v1_json_metadata), image.id) + + def assign_layer_blob(self, layer, blob, computed_checksums): + """ Assigns a blob to a layer. """ + assert blob + + repo_image = model.image.get_image_by_db_id(layer.db_id) + if repo_image is None: + return None + + with db_transaction(): + existing_storage = repo_image.storage + repo_image.storage = blob._db_id + repo_image.save() + existing_storage.delete_instance(recursive=True) + + self._builder_state.checksums[layer.layer_id] = computed_checksums + self._save_to_session() + return True + + def validate_layer_checksum(self, layer, checksum): + """ Returns whether the checksum for a layer matches that specified. + """ + return checksum in self._builder_state.checksums.get(layer.layer_id) + + def commit_tag_and_manifest(self, tag_name, layer): + """ Commits a new tag + manifest for that tag to the repository with the given name, + pointing to the given layer. + """ + legacy_image = registry_model.get_legacy_image(self._repository_ref, layer.layer_id) + if legacy_image is None: + return None + + tag = registry_model.retarget_tag(self._repository_ref, tag_name, legacy_image) + if tag is None: + return None + + self._builder_state.tags[tag_name] = tag._db_id + self._save_to_session() + return tag + + def done(self): + """ Marks the manifest builder as complete and disposes of any state. This call is optional + and it is expected manifest builders will eventually time out if unusued for an + extended period of time. + """ + session.pop(_SESSION_KEY, None) + + def _save_to_session(self): + session[_SESSION_KEY] = self._builder_state diff --git a/data/registry_model/test/test_manifestbuilder.py b/data/registry_model/test/test_manifestbuilder.py new file mode 100644 index 000000000..37e53a50a --- /dev/null +++ b/data/registry_model/test/test_manifestbuilder.py @@ -0,0 +1,94 @@ +import hashlib + +from io import BytesIO + +import pytest + +from mock import patch + +from data.registry_model.blobuploader import BlobUploadSettings, upload_blob +from data.registry_model.manifestbuilder import create_manifest_builder, lookup_manifest_builder +from data.registry_model.registry_pre_oci_model import PreOCIModel + +from storage.distributedstorage import DistributedStorage +from storage.fakestorage import FakeStorage +from test.fixtures import * + + +@pytest.fixture() +def pre_oci_model(initialized_db): + return PreOCIModel() + + +@pytest.fixture() +def fake_session(): + with patch('data.registry_model.manifestbuilder.session', {}): + yield + + +@pytest.mark.parametrize('layers', [ + pytest.param([('someid', None, 'some data')], id='Single layer'), + pytest.param([('parentid', None, 'some parent data'), + ('someid', 'parentid', 'some data')], + id='Multi layer'), +]) +def test_build_manifest(layers, fake_session, pre_oci_model): + repository_ref = pre_oci_model.lookup_repository('devtable', 'complex') + storage = DistributedStorage({'local_us': FakeStorage(None)}, ['local_us']) + settings = BlobUploadSettings('2M', 512 * 1024, 3600) + app_config = {'TESTING': True} + + builder = create_manifest_builder(repository_ref) + assert lookup_manifest_builder(repository_ref, 'anotherid') is None + assert lookup_manifest_builder(repository_ref, builder.builder_id) is not None + + blobs_by_layer = {} + for layer_id, parent_id, layer_bytes in layers: + # Start a new layer. + assert builder.start_layer(layer_id, {'id': layer_id, 'parent': parent_id}, 'local_us', None, + 60) + + checksum = hashlib.sha1(layer_bytes).hexdigest() + + # Assign it a blob. + with upload_blob(repository_ref, storage, settings) as uploader: + uploader.upload_chunk(app_config, BytesIO(layer_bytes)) + blob = uploader.commit_to_blob(app_config) + blobs_by_layer[layer_id] = blob + builder.assign_layer_blob(builder.lookup_layer(layer_id), blob, [checksum]) + + # Validate the checksum. + assert builder.validate_layer_checksum(builder.lookup_layer(layer_id), checksum) + + # Commit the manifest to a tag. + tag = builder.commit_tag_and_manifest('somenewtag', builder.lookup_layer(layers[-1][0])) + assert tag + assert tag in builder.committed_tags + + # Verify the legacy image for the tag. + found = pre_oci_model.get_repo_tag(repository_ref, 'somenewtag', include_legacy_image=True) + assert found + assert found.name == 'somenewtag' + assert found.legacy_image.docker_image_id == layers[-1][0] + + # Verify the blob and manifest. + manifest = pre_oci_model.get_manifest_for_tag(found) + assert manifest + + parsed = manifest.get_parsed_manifest() + assert len(list(parsed.layers)) == len(layers) + + for index, (layer_id, parent_id, layer_bytes) in enumerate(layers): + assert list(parsed.blob_digests)[index] == blobs_by_layer[layer_id].digest + assert list(parsed.layers)[index].v1_metadata.image_id == layer_id + assert list(parsed.layers)[index].v1_metadata.parent_image_id == parent_id + + assert parsed.leaf_layer_v1_image_id == layers[-1][0] + + +def test_build_manifest_missing_parent(fake_session, pre_oci_model): + repository_ref = pre_oci_model.lookup_repository('devtable', 'complex') + builder = create_manifest_builder(repository_ref) + + assert builder.start_layer('somelayer', {'id': 'somelayer', 'parent': 'someparent'}, + 'local_us', None, 60) is None