From 7fa350672399646e716a92e9101816a4475c3934 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Mon, 27 Aug 2018 18:03:58 -0400 Subject: [PATCH 1/6] Add created_datetime to schema1 format --- image/docker/schema1.py | 14 ++++++++++++++ image/docker/test/test_schema1.py | 4 ++++ 2 files changed, 18 insertions(+) diff --git a/image/docker/schema1.py b/image/docker/schema1.py index b11d6f2ac..0e77defb0 100644 --- a/image/docker/schema1.py +++ b/image/docker/schema1.py @@ -11,6 +11,8 @@ import logging from collections import namedtuple, OrderedDict from datetime import datetime +import dateutil.parser + from jsonschema import validate as validate_schema, ValidationError from jwkest.jws import SIGNER_ALGS, keyrep @@ -268,6 +270,18 @@ class DockerSchema1Manifest(ManifestInterface): def leaf_layer(self): return self.layers[-1] + @property + def created_datetime(self): + created_datetime_str = self.leaf_layer.v1_metadata.created + if created_datetime_str is None: + return None + + try: + return dateutil.parser.parse(created_datetime_str).replace(tzinfo=None) + except: + # parse raises different exceptions, so we cannot use a specific kind of handler here. + return None + @property def layers(self): if self._layers is None: diff --git a/image/docker/test/test_schema1.py b/image/docker/test/test_schema1.py index e584ec9ed..0c99dbd88 100644 --- a/image/docker/test/test_schema1.py +++ b/image/docker/test/test_schema1.py @@ -78,6 +78,7 @@ def test_valid_manifest(): assert manifest.layers[1].v1_metadata.parent_image_id == 'anotherid' assert manifest.leaf_layer == manifest.layers[1] + assert manifest.created_datetime is None def test_validate_manifest(): @@ -88,6 +89,7 @@ def test_validate_manifest(): manifest = DockerSchema1Manifest(manifest_bytes, validate=True) digest = manifest.digest assert digest == 'sha256:b5dc4f63fdbd64f34f2314c0747ef81008f9fcddce4edfc3fd0e8ec8b358d571' + assert manifest.created_datetime def test_validate_manifest_with_unicode(): @@ -98,6 +100,7 @@ def test_validate_manifest_with_unicode(): manifest = DockerSchema1Manifest(manifest_bytes, validate=True) digest = manifest.digest assert digest == 'sha256:815ecf45716a96b19d54d911e6ace91f78bab26ca0dd299645d9995dacd9f1ef' + assert manifest.created_datetime def test_validate_manifest_with_unicode_encoded(): @@ -108,3 +111,4 @@ def test_validate_manifest_with_unicode_encoded(): manifest = DockerSchema1Manifest.for_latin1_bytes(manifest_bytes, validate=True) digest = manifest.digest assert digest == 'sha256:dde3714ce7e23edc6413aa85c0b42792e4f2f79e9ea36afc154d63ff3d04e86c' + assert manifest.created_datetime From 6c5c2f1a752af365eaca51ee226da4ec12af62af Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Tue, 28 Aug 2018 13:02:26 -0400 Subject: [PATCH 2/6] Add new methods to registry data model interface in prep for moving verbs to using it --- data/model/blob.py | 17 ++ data/model/image.py | 29 +-- data/model/storage.py | 2 +- data/registry_model/datatype.py | 3 + data/registry_model/datatypes.py | 117 +++++++++- data/registry_model/interface.py | 75 ++++++- data/registry_model/registry_pre_oci_model.py | 210 +++++++++++++++++- .../registry_model/test/test_pre_oci_model.py | 160 ++++++++++++- endpoints/verbs/MAINTAINERS | 1 - 9 files changed, 585 insertions(+), 29 deletions(-) delete mode 100644 endpoints/verbs/MAINTAINERS diff --git a/data/model/blob.py b/data/model/blob.py index 6c74f6bde..6f6ff1f14 100644 --- a/data/model/blob.py +++ b/data/model/blob.py @@ -7,6 +7,23 @@ from data.database import (Repository, Namespace, ImageStorage, Image, ImageStor BlobUpload, ImageStorageLocation, db_random_func) +def get_repository_blob_by_digest(repository, blob_digest): + """ Find the content-addressable blob linked to the specified repository. + """ + try: + storage_id_query = (ImageStorage + .select(ImageStorage.id) + .join(Image) + .where(Image.repository == repository, + ImageStorage.content_checksum == blob_digest, + ImageStorage.uploading == False) + .limit(1)) + + return storage_model.get_storage_by_subquery(storage_id_query) + except InvalidImageException: + raise BlobDoesNotExist('Blob does not exist with digest: {0}'.format(blob_digest)) + + def get_repo_blob_by_digest(namespace, repo_name, blob_digest): """ Find the content-addressable blob linked to the specified repository. """ diff --git a/data/model/image.py b/data/model/image.py index 527f1ccb7..1b29ee33d 100644 --- a/data/model/image.py +++ b/data/model/image.py @@ -489,44 +489,33 @@ def find_or_create_derived_storage(source_image, transformation_name, preferred_ new_storage = storage.create_v1_storage(preferred_location) try: - DerivedStorageForImage.create(source_image=source_image, derivative=new_storage, - transformation=trans, uniqueness_hash=uniqueness_hash) + derived = DerivedStorageForImage.create(source_image=source_image, derivative=new_storage, + transformation=trans, uniqueness_hash=uniqueness_hash) except IntegrityError: # Storage was created while this method executed. Just return the existing. new_storage.delete_instance(recursive=True) return find_derived_storage_for_image(source_image, transformation_name, varying_metadata) - return new_storage + return derived def find_derived_storage_for_image(source_image, transformation_name, varying_metadata=None): uniqueness_hash = _get_uniqueness_hash(varying_metadata) try: - found = (ImageStorage + found = (DerivedStorageForImage .select(ImageStorage, DerivedStorageForImage) - .join(DerivedStorageForImage) + .join(ImageStorage) + .switch(DerivedStorageForImage) .join(ImageStorageTransformation) .where(DerivedStorageForImage.source_image == source_image, ImageStorageTransformation.name == transformation_name, DerivedStorageForImage.uniqueness_hash == uniqueness_hash) .get()) - - found.locations = {placement.location.name for placement in found.imagestorageplacement_set} return found - except ImageStorage.DoesNotExist: + except DerivedStorageForImage.DoesNotExist: return None -def delete_derived_storage_by_uuid(storage_uuid): - try: - image_storage = storage.get_storage_by_uuid(storage_uuid) - except InvalidImageException: - return - - try: - DerivedStorageForImage.get(derivative=image_storage) - except DerivedStorageForImage.DoesNotExist: - return - - image_storage.delete_instance(recursive=True) +def delete_derived_storage(derived_storage): + derived_storage.derivative.delete_instance(recursive=True) diff --git a/data/model/storage.py b/data/model/storage.py index 8ec237406..a7cbf4517 100644 --- a/data/model/storage.py +++ b/data/model/storage.py @@ -336,7 +336,7 @@ def get_storage_locations(uuid): def save_torrent_info(storage_object, piece_length, pieces): try: - TorrentInfo.create(storage=storage_object, piece_length=piece_length, pieces=pieces) + return TorrentInfo.create(storage=storage_object, piece_length=piece_length, pieces=pieces) except IntegrityError: # TorrentInfo already exists for this storage. pass diff --git a/data/registry_model/datatype.py b/data/registry_model/datatype.py index f5ea0b5ae..1183fd1ea 100644 --- a/data/registry_model/datatype.py +++ b/data/registry_model/datatype.py @@ -30,6 +30,9 @@ def datatype(name, static_fields): raise AttributeError('Unknown field `%s`' % name) + def __repr__(self): + return '<%s> #%s' % (name, self._db_id) + return DataType diff --git a/data/registry_model/datatypes.py b/data/registry_model/datatypes.py index 03ccfc4cd..4701571b8 100644 --- a/data/registry_model/datatypes.py +++ b/data/registry_model/datatypes.py @@ -1,5 +1,11 @@ +import hashlib + +from collections import namedtuple from enum import Enum, unique +from cachetools import lru_cache + +from data import model from data.registry_model.datatype import datatype, requiresinput from image.docker.schema1 import DockerSchema1Manifest @@ -13,6 +19,34 @@ class RepositoryReference(datatype('Repository', [])): return RepositoryReference(db_id=repo_obj.id) + @classmethod + def for_id(cls, repo_id): + return RepositoryReference(db_id=repo_id) + + @property + @lru_cache(maxsize=1) + def _repository_obj(self): + return model.repository.lookup_repository(self._db_id) + + def namespace_name(self): + """ Returns the namespace name of this repository. + """ + repository = self._repository_obj + if repository is None: + return None + + return repository.namespace_user.username + + @property + def name(self): + """ Returns the name of this repository. + """ + repository = self._repository_obj + if repository is None: + return None + + return repository.name + class Label(datatype('Label', ['key', 'value', 'uuid', 'source_type_name', 'media_type_name'])): """ Label represents a label on a manifest. """ @@ -40,7 +74,15 @@ class Tag(datatype('Tag', ['name', 'reversion', 'manifest_digest', 'lifetime_sta lifetime_start_ts=repository_tag.lifetime_start_ts, lifetime_end_ts=repository_tag.lifetime_end_ts, manifest_digest=manifest_digest, - inputs=dict(legacy_image=legacy_image)) + inputs=dict(legacy_image=legacy_image, + repository=RepositoryReference.for_id(repository_tag.repository_id))) + + @property + @requiresinput('repository') + def repository(self, repository): + """ Returns the repository under which this tag lives. + """ + return repository @property @requiresinput('legacy_image') @@ -124,3 +166,76 @@ class SecurityScanStatus(Enum): SCANNED = 'scanned' FAILED = 'failed' QUEUED = 'queued' + + +class ManifestLayer(namedtuple('ManifestLayer', ['layer_info', 'blob'])): + """ Represents a single layer in a manifest. The `layer_info` data will be manifest-type specific, + but will have a few expected fields (such as `digest`). The `blob` represents the associated + blob for this layer, optionally with placements. + """ + + def estimated_size(self, estimate_multiplier): + """ Returns the estimated size of this layer. If the layers' blob has an uncompressed size, + it is used. Otherwise, the compressed_size field in the layer is multiplied by the + multiplier. + """ + if self.blob.uncompressed_size: + return self.blob.uncompressed_size + + return (self.layer_info.compressed_size or 0) * estimate_multiplier + + +class Blob(datatype('Blob', ['uuid', 'digest', 'compressed_size', 'uncompressed_size', + 'uploading'])): + """ Blob represents a content-addressable piece of storage. """ + @classmethod + def for_image_storage(cls, image_storage, storage_path, placements=None): + if image_storage is None: + return None + + return Blob(db_id=image_storage.id, + uuid=image_storage.uuid, + inputs=dict(placements=placements, storage_path=storage_path), + digest=image_storage.content_checksum, + compressed_size=image_storage.image_size, + uncompressed_size=image_storage.uncompressed_size, + uploading=image_storage.uploading) + + @property + @requiresinput('storage_path') + def storage_path(self, storage_path): + """ Returns the path of this blob in storage. """ + # TODO: change this to take in the storage engine? + return storage_path + + @property + @requiresinput('placements') + def placements(self, placements): + """ Returns all the storage placements at which the Blob can be found. """ + return placements + + +class DerivedImage(datatype('DerivedImage', ['verb', 'varying_metadata', 'blob'])): + """ DerivedImage represents an image derived from a manifest via some form of verb. """ + @classmethod + def for_derived_storage(cls, derived, verb, varying_metadata, blob): + return DerivedImage(db_id=derived.id, + verb=verb, + varying_metadata=varying_metadata, + blob=blob) + + @property + def unique_id(self): + """ Returns a unique ID for this derived image. This call will consistently produce the same + unique ID across calls in the same code base. + """ + return hashlib.sha256('%s:%s' % (self.verb, self._db_id)).hexdigest() + + +class TorrentInfo(datatype('TorrentInfo', ['pieces', 'piece_length'])): + """ TorrentInfo represents information to pull a blob via torrent. """ + @classmethod + def for_torrent_info(cls, torrent_info): + return TorrentInfo(db_id=torrent_info.id, + pieces=torrent_info.pieces, + piece_length=torrent_info.piece_length) diff --git a/data/registry_model/interface.py b/data/registry_model/interface.py index 203c3f23b..c1014b4d4 100644 --- a/data/registry_model/interface.py +++ b/data/registry_model/interface.py @@ -26,11 +26,12 @@ class RegistryDataInterface(object): or None if none. """ @abstractmethod - def get_manifest_for_tag(self, tag): + def get_manifest_for_tag(self, tag, backfill_if_necessary=False): """ Returns the manifest associated with the given tag. """ @abstractmethod - def lookup_manifest_by_digest(self, repository_ref, manifest_digest, allow_dead=False): + def lookup_manifest_by_digest(self, repository_ref, manifest_digest, allow_dead=False, + include_legacy_image=False): """ Looks up the manifest with the given digest under the given repository and returns it or None if none. """ @@ -131,3 +132,73 @@ class RegistryDataInterface(object): NOTE: This method will only be necessary until we've completed the backfill, at which point it should be removed. """ + + @abstractmethod + def is_namespace_enabled(self, namespace_name): + """ Returns whether the given namespace exists and is enabled. """ + + @abstractmethod + def list_manifest_layers(self, manifest, include_placements=False): + """ Returns an *ordered list* of the layers found in the manifest, starting at the base and + working towards the leaf, including the associated Blob and its placements (if specified). + Returns None if the manifest could not be parsed and validated. + """ + + @abstractmethod + def lookup_derived_image(self, manifest, verb, varying_metadata=None, include_placements=False): + """ + Looks up the derived image for the given manifest, verb and optional varying metadata and + returns it or None if none. + """ + + @abstractmethod + def lookup_or_create_derived_image(self, manifest, verb, storage_location, varying_metadata=None, + include_placements=False): + """ + Looks up the derived image for the given maniest, verb and optional varying metadata + and returns it. If none exists, a new derived image is created. + """ + + @abstractmethod + def get_derived_image_signature(self, derived_image, signer_name): + """ + Returns the signature associated with the derived image and a specific signer or None if none. + """ + + @abstractmethod + def set_derived_image_signature(self, derived_image, signer_name, signature): + """ + Sets the calculated signature for the given derived image and signer to that specified. + """ + + @abstractmethod + def delete_derived_image(self, derived_image): + """ + Deletes a derived image and all of its storage. + """ + + @abstractmethod + def set_derived_image_size(self, derived_image, compressed_size): + """ + Sets the compressed size on the given derived image. + """ + + @abstractmethod + def get_torrent_info(self, blob): + """ + Returns the torrent information associated with the given blob or None if none. + """ + + @abstractmethod + def set_torrent_info(self, blob, piece_length, pieces): + """ + Sets the torrent infomation associated with the given blob to that specified. + """ + + @abstractmethod + def get_repo_blob_by_digest(self, repo_ref, blob_digest, include_placements=False): + """ + Returns the blob in the repository with the given digest, if any or None if none. Note that + there may be multiple records in the same repository for the same blob digest, so the return + value of this function may change. + """ diff --git a/data/registry_model/registry_pre_oci_model.py b/data/registry_model/registry_pre_oci_model.py index 8096979f3..ba0944047 100644 --- a/data/registry_model/registry_pre_oci_model.py +++ b/data/registry_model/registry_pre_oci_model.py @@ -1,4 +1,5 @@ # pylint: disable=protected-access +import logging from collections import defaultdict @@ -8,8 +9,12 @@ from data import database from data import model from data.registry_model.interface import RegistryDataInterface from data.registry_model.datatypes import (Tag, RepositoryReference, Manifest, LegacyImage, Label, - SecurityScanStatus) -from image.docker.schema1 import DockerSchema1ManifestBuilder + SecurityScanStatus, ManifestLayer, Blob, DerivedImage, + TorrentInfo) +from image.docker.schema1 import DockerSchema1ManifestBuilder, ManifestException + + +logger = logging.getLogger(__name__) class PreOCIModel(RegistryDataInterface): @@ -38,11 +43,14 @@ class PreOCIModel(RegistryDataInterface): repo = model.repository.get_repository(namespace_name, repo_name, kind_filter=kind_filter) return RepositoryReference.for_repo_obj(repo) - def get_manifest_for_tag(self, tag): + def get_manifest_for_tag(self, tag, backfill_if_necessary=False): """ Returns the manifest associated with the given tag. """ try: tag_manifest = database.TagManifest.get(tag_id=tag._db_id) except database.TagManifest.DoesNotExist: + if backfill_if_necessary: + return self.backfill_manifest_for_tag(tag) + return return Manifest.for_tag_manifest(tag_manifest) @@ -171,6 +179,7 @@ class PreOCIModel(RegistryDataInterface): Returns the latest, *active* tag found in the repository, with the matching name or None if none. """ + assert isinstance(tag_name, basestring) tag = model.tag.get_active_tag_for_repo(repository_ref._db_id, tag_name) if tag is None: return None @@ -355,5 +364,200 @@ class PreOCIModel(RegistryDataInterface): return Manifest.for_tag_manifest(tag_manifest) + def is_namespace_enabled(self, namespace_name): + """ Returns whether the given namespace exists and is enabled. """ + namespace = model.user.get_namespace_user(namespace_name) + return namespace is not None and namespace.enabled + + def list_manifest_layers(self, manifest, include_placements=False): + """ Returns an *ordered list* of the layers found in the manifest, starting at the base and + working towards the leaf, including the associated Blob and its placements (if specified). + Returns None if the manifest could not be parsed and validated. + """ + try: + parsed = manifest.get_parsed_manifest() + except ManifestException: + logger.exception('Could not parse and validate manifest `%s`', manifest._db_id) + return None + + try: + tag_manifest = database.TagManifest.get(id=manifest._db_id) + except database.TagManifest.DoesNotExist: + logger.exception('Could not find tag manifest for manifest `%s`', manifest._db_id) + return None + + repo = tag_manifest.tag.repository + blob_query = model.storage.lookup_repo_storages_by_content_checksum(repo, parsed.checksums) + storage_map = {blob.content_checksum: blob for blob in blob_query} + + manifest_layers = [] + for layer in parsed.layers: + digest_str = str(layer.digest) + if digest_str not in storage_map: + logger.error('Missing digest `%s` for manifest `%s`', layer.digest, manifest._db_id) + return None + + image_storage = storage_map[digest_str] + placements = None + if include_placements: + placements = list(model.storage.get_storage_locations(image_storage.uuid)) + + blob = Blob.for_image_storage(image_storage, + storage_path=model.storage.get_layer_path(image_storage), + placements=placements) + manifest_layers.append(ManifestLayer(layer, blob)) + + return manifest_layers + + def lookup_derived_image(self, manifest, verb, varying_metadata=None, include_placements=False): + """ + Looks up the derived image for the given manifest, verb and optional varying metadata and + returns it or None if none. + """ + try: + tag_manifest = database.TagManifest.get(id=manifest._db_id) + except database.TagManifest.DoesNotExist: + logger.exception('Could not find tag manifest for manifest `%s`', manifest._db_id) + return None + + repo_image = tag_manifest.tag.image + derived = model.image.find_derived_storage_for_image(repo_image, verb, varying_metadata) + return self._build_derived(derived, verb, varying_metadata, include_placements) + + def lookup_or_create_derived_image(self, manifest, verb, storage_location, varying_metadata=None, + include_placements=False): + """ + Looks up the derived image for the given maniest, verb and optional varying metadata + and returns it. If none exists, a new derived image is created. + """ + try: + tag_manifest = database.TagManifest.get(id=manifest._db_id) + except database.TagManifest.DoesNotExist: + logger.exception('Could not find tag manifest for manifest `%s`', manifest._db_id) + return None + + repo_image = tag_manifest.tag.image + derived = model.image.find_or_create_derived_storage(repo_image, verb, storage_location, + varying_metadata) + return self._build_derived(derived, verb, varying_metadata, include_placements) + + def _build_derived(self, derived, verb, varying_metadata, include_placements): + if derived is None: + return None + + derived_storage = derived.derivative + placements = None + if include_placements: + placements = list(model.storage.get_storage_locations(derived_storage.uuid)) + + blob = Blob.for_image_storage(derived_storage, + storage_path=model.storage.get_layer_path(derived_storage), + placements=placements) + + return DerivedImage.for_derived_storage(derived, verb, varying_metadata, blob) + + def get_derived_image_signature(self, derived_image, signer_name): + """ + Returns the signature associated with the derived image and a specific signer or None if none. + """ + try: + derived_storage = database.DerivedStorageForImage.get(id=derived_image._db_id) + except database.DerivedStorageForImage.DoesNotExist: + return None + + storage = derived_storage.derivative + signature_entry = model.storage.lookup_storage_signature(storage, signer_name) + if signature_entry is None: + return None + + return signature_entry.signature + + def set_derived_image_signature(self, derived_image, signer_name, signature): + """ + Sets the calculated signature for the given derived image and signer to that specified. + """ + try: + derived_storage = database.DerivedStorageForImage.get(id=derived_image._db_id) + except database.DerivedStorageForImage.DoesNotExist: + return None + + storage = derived_storage.derivative + signature_entry = model.storage.find_or_create_storage_signature(storage, signer_name) + signature_entry.signature = signature + signature_entry.uploading = False + signature_entry.save() + + def delete_derived_image(self, derived_image): + """ + Deletes a derived image and all of its storage. + """ + try: + derived_storage = database.DerivedStorageForImage.get(id=derived_image._db_id) + except database.DerivedStorageForImage.DoesNotExist: + return None + + model.image.delete_derived_storage(derived_storage) + + def set_derived_image_size(self, derived_image, compressed_size): + """ + Sets the compressed size on the given derived image. + """ + try: + derived_storage = database.DerivedStorageForImage.get(id=derived_image._db_id) + except database.DerivedStorageForImage.DoesNotExist: + return None + + storage_entry = derived_storage.derivative + storage_entry.image_size = compressed_size + storage_entry.uploading = False + storage_entry.save() + + def get_torrent_info(self, blob): + """ + Returns the torrent information associated with the given blob or None if none. + """ + try: + image_storage = database.ImageStorage.get(id=blob._db_id) + except database.ImageStorage.DoesNotExist: + return None + + try: + torrent_info = model.storage.get_torrent_info(image_storage) + except model.TorrentInfoDoesNotExist: + return None + + return TorrentInfo.for_torrent_info(torrent_info) + + def set_torrent_info(self, blob, piece_length, pieces): + """ + Sets the torrent infomation associated with the given blob to that specified. + """ + try: + image_storage = database.ImageStorage.get(id=blob._db_id) + except database.ImageStorage.DoesNotExist: + return None + + torrent_info = model.storage.save_torrent_info(image_storage, piece_length, pieces) + return TorrentInfo.for_torrent_info(torrent_info) + + def get_repo_blob_by_digest(self, repo_ref, blob_digest, include_placements=False): + """ + Returns the blob in the repository with the given digest, if any or None if none. Note that + there may be multiple records in the same repository for the same blob digest, so the return + value of this function may change. + """ + try: + image_storage = model.blob.get_repository_blob_by_digest(repo_ref._db_id, blob_digest) + except model.BlobDoesNotExist: + return None + + placements = None + if include_placements: + placements = list(model.storage.get_storage_locations(image_storage.uuid)) + + return Blob.for_image_storage(image_storage, + storage_path=model.storage.get_layer_path(image_storage), + placements=placements) + pre_oci_model = PreOCIModel() diff --git a/data/registry_model/test/test_pre_oci_model.py b/data/registry_model/test/test_pre_oci_model.py index 9d1d530a6..bdd77637f 100644 --- a/data/registry_model/test/test_pre_oci_model.py +++ b/data/registry_model/test/test_pre_oci_model.py @@ -8,7 +8,8 @@ from app import docker_v2_signing_key from data import model from data.database import (TagManifestLabelMap, TagManifestToManifest, Manifest, ManifestBlob, ManifestLegacyImage, ManifestLabel, TagManifest, RepositoryTag, Image, - TagManifestLabel, TagManifest, TagManifestLabel) + TagManifestLabel, TagManifest, TagManifestLabel, DerivedStorageForImage, + TorrentInfo) from data.registry_model.registry_pre_oci_model import PreOCIModel from data.registry_model.datatypes import RepositoryReference @@ -363,3 +364,160 @@ def test_backfill_manifest_for_tag(repo_namespace, repo_name, clear_rows, pre_oc parsed_manifest = manifest.get_parsed_manifest() assert parsed_manifest.leaf_layer_v1_image_id == legacy_image.docker_image_id assert parsed_manifest.parent_image_ids == {p.docker_image_id for p in legacy_image.parents} + + +@pytest.mark.parametrize('repo_namespace, repo_name', [ + ('devtable', 'simple'), + ('devtable', 'complex'), + ('devtable', 'history'), + ('buynlarge', 'orgrepo'), +]) +def test_backfill_manifest_on_lookup(repo_namespace, repo_name, clear_rows, pre_oci_model): + repository_ref = pre_oci_model.lookup_repository(repo_namespace, repo_name) + tags = pre_oci_model.list_repository_tags(repository_ref) + assert tags + + for tag in tags: + assert not tag.manifest_digest + assert not pre_oci_model.get_manifest_for_tag(tag) + + manifest = pre_oci_model.get_manifest_for_tag(tag, backfill_if_necessary=True) + assert manifest + + updated_tag = pre_oci_model.get_repo_tag(repository_ref, tag.name) + assert updated_tag.manifest_digest == manifest.digest + + +@pytest.mark.parametrize('namespace, expect_enabled', [ + ('devtable', True), + ('buynlarge', True), + + ('disabled', False), +]) +def test_is_namespace_enabled(namespace, expect_enabled, pre_oci_model): + assert pre_oci_model.is_namespace_enabled(namespace) == expect_enabled + + +@pytest.mark.parametrize('repo_namespace, repo_name', [ + ('devtable', 'simple'), + ('devtable', 'complex'), + ('devtable', 'history'), + ('buynlarge', 'orgrepo'), +]) +def test_list_manifest_layers(repo_namespace, repo_name, pre_oci_model): + repository_ref = pre_oci_model.lookup_repository(repo_namespace, repo_name) + tags = pre_oci_model.list_repository_tags(repository_ref) + assert tags + + for tag in tags: + manifest = pre_oci_model.get_manifest_for_tag(tag) + assert manifest + + with assert_query_count(4): + layers = pre_oci_model.list_manifest_layers(manifest) + assert layers + + layers = pre_oci_model.list_manifest_layers(manifest, include_placements=True) + assert layers + + parsed_layers = list(manifest.get_parsed_manifest().layers) + assert len(layers) == len(parsed_layers) + + for index, manifest_layer in enumerate(layers): + assert manifest_layer.layer_info == parsed_layers[index] + assert manifest_layer.blob.digest == str(parsed_layers[index].digest) + assert manifest_layer.blob.storage_path + assert manifest_layer.blob.placements + + repo_blob = pre_oci_model.get_repo_blob_by_digest(repository_ref, manifest_layer.blob.digest) + assert repo_blob.digest == manifest_layer.blob.digest + + assert manifest_layer.estimated_size(1) is not None + + +def test_derived_image(pre_oci_model): + # Clear all existing derived storage. + DerivedStorageForImage.delete().execute() + + repository_ref = pre_oci_model.lookup_repository('devtable', 'simple') + tag = pre_oci_model.get_repo_tag(repository_ref, 'latest') + manifest = pre_oci_model.get_manifest_for_tag(tag) + + # Ensure the squashed image doesn't exist. + assert pre_oci_model.lookup_derived_image(manifest, 'squash', {}) is None + + # Create a new one. + squashed = pre_oci_model.lookup_or_create_derived_image(manifest, 'squash', 'local_us', {}) + assert pre_oci_model.lookup_or_create_derived_image(manifest, 'squash', 'local_us', {}) == squashed + assert squashed.unique_id + + # Check and set the size. + assert squashed.blob.compressed_size is None + pre_oci_model.set_derived_image_size(squashed, 1234) + assert pre_oci_model.lookup_derived_image(manifest, 'squash', {}).blob.compressed_size == 1234 + assert pre_oci_model.lookup_derived_image(manifest, 'squash', {}).unique_id == squashed.unique_id + + # Ensure its returned now. + assert pre_oci_model.lookup_derived_image(manifest, 'squash', {}) == squashed + + # Ensure different metadata results in a different derived image. + assert pre_oci_model.lookup_derived_image(manifest, 'squash', {'foo': 'bar'}) is None + + squashed_foo = pre_oci_model.lookup_or_create_derived_image(manifest, 'squash', 'local_us', + {'foo': 'bar'}) + assert squashed_foo != squashed + assert pre_oci_model.lookup_derived_image(manifest, 'squash', {'foo': 'bar'}) == squashed_foo + + assert squashed.unique_id != squashed_foo.unique_id + + # Lookup with placements. + squashed = pre_oci_model.lookup_or_create_derived_image(manifest, 'squash', 'local_us', {}, + include_placements=True) + assert squashed.blob.placements + + # Delete the derived image. + pre_oci_model.delete_derived_image(squashed) + assert pre_oci_model.lookup_derived_image(manifest, 'squash', {}) is None + + +def test_derived_image_signatures(pre_oci_model): + repository_ref = pre_oci_model.lookup_repository('devtable', 'simple') + tag = pre_oci_model.get_repo_tag(repository_ref, 'latest') + manifest = pre_oci_model.get_manifest_for_tag(tag) + + derived = pre_oci_model.lookup_derived_image(manifest, 'squash', {}) + assert derived + + signature = pre_oci_model.get_derived_image_signature(derived, 'gpg2') + assert signature is None + + pre_oci_model.set_derived_image_signature(derived, 'gpg2', 'foo') + assert pre_oci_model.get_derived_image_signature(derived, 'gpg2') == 'foo' + + +def test_torrent_info(pre_oci_model): + # Remove all existing info. + TorrentInfo.delete().execute() + + repository_ref = pre_oci_model.lookup_repository('devtable', 'simple') + tag = pre_oci_model.get_repo_tag(repository_ref, 'latest') + manifest = pre_oci_model.get_manifest_for_tag(tag) + + layers = pre_oci_model.list_manifest_layers(manifest) + assert layers + + assert pre_oci_model.get_torrent_info(layers[0].blob) is None + pre_oci_model.set_torrent_info(layers[0].blob, 2, 'foo') + + torrent_info = pre_oci_model.get_torrent_info(layers[0].blob) + assert torrent_info is not None + assert torrent_info.piece_length == 2 + assert torrent_info.pieces == 'foo' + + # Try setting it again. Nothing should happen. + pre_oci_model.set_torrent_info(layers[0].blob, 3, 'bar') + + torrent_info = pre_oci_model.get_torrent_info(layers[0].blob) + assert torrent_info is not None + assert torrent_info.piece_length == 2 + assert torrent_info.pieces == 'foo' diff --git a/endpoints/verbs/MAINTAINERS b/endpoints/verbs/MAINTAINERS deleted file mode 100644 index e08d8e55b..000000000 --- a/endpoints/verbs/MAINTAINERS +++ /dev/null @@ -1 +0,0 @@ -Joseph Schorr (@josephschorr) From c92294c218b58d132c7de8178d0669af0627a373 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Tue, 28 Aug 2018 17:28:27 -0400 Subject: [PATCH 3/6] Add compressed_size to both schema formats on their layers --- image/docker/schema1.py | 7 +++++-- image/docker/schema2/manifest.py | 7 ++++--- image/docker/schema2/test/test_manifest.py | 4 ++-- image/docker/test/test_schema1.py | 3 +++ 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/image/docker/schema1.py b/image/docker/schema1.py index 0e77defb0..4897f4926 100644 --- a/image/docker/schema1.py +++ b/image/docker/schema1.py @@ -72,7 +72,8 @@ class InvalidSchema1Signature(ManifestException): pass -class Schema1Layer(namedtuple('Schema1Layer', ['digest', 'v1_metadata', 'raw_v1_metadata'])): +class Schema1Layer(namedtuple('Schema1Layer', ['digest', 'v1_metadata', 'raw_v1_metadata', + 'compressed_size'])): """ Represents all of the data about an individual layer in a given Manifest. This is the union of the fsLayers (digest) and the history entries (v1_compatibility). @@ -319,7 +320,9 @@ class DockerSchema1Manifest(ManifestInterface): extracted = Schema1V1Metadata(v1_metadata['id'], v1_metadata.get('parent'), v1_metadata.get('created'), v1_metadata.get('comment'), command, labels) - yield Schema1Layer(image_digest, extracted, metadata_string) + + compressed_size = v1_metadata.get('Size') + yield Schema1Layer(image_digest, extracted, metadata_string, compressed_size) @property def _payload(self): diff --git a/image/docker/schema2/manifest.py b/image/docker/schema2/manifest.py index 4e59d20a3..f4e443f66 100644 --- a/image/docker/schema2/manifest.py +++ b/image/docker/schema2/manifest.py @@ -25,8 +25,9 @@ DOCKER_SCHEMA2_MANIFEST_URLS_KEY = 'urls' # Named tuples. DockerV2ManifestConfig = namedtuple('DockerV2ManifestConfig', ['size', 'digest']) -DockerV2ManifestLayer = namedtuple('DockerV2ManifestLayer', ['index', 'size', 'digest', - 'is_remote', 'urls']) +DockerV2ManifestLayer = namedtuple('DockerV2ManifestLayer', ['index', 'digest', + 'is_remote', 'urls', + 'compressed_size']) LayerWithV1ID = namedtuple('LayerWithV1ID', ['layer', 'v1_id', 'v1_parent_id']) @@ -191,7 +192,7 @@ class DockerSchema2Manifest(ManifestInterface): layer[DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY]) yield DockerV2ManifestLayer(index=index, - size=layer[DOCKER_SCHEMA2_MANIFEST_SIZE_KEY], + compressed_size=layer[DOCKER_SCHEMA2_MANIFEST_SIZE_KEY], digest=digest, is_remote=is_remote, urls=layer.get(DOCKER_SCHEMA2_MANIFEST_URLS_KEY)) diff --git a/image/docker/schema2/test/test_manifest.py b/image/docker/schema2/test/test_manifest.py index 6037e22e4..03b847d7c 100644 --- a/image/docker/schema2/test/test_manifest.py +++ b/image/docker/schema2/test/test_manifest.py @@ -62,13 +62,13 @@ def test_valid_manifest(): assert len(manifest.layers) == 4 assert manifest.layers[0].is_remote - assert manifest.layers[0].size == 1234 + assert manifest.layers[0].compressed_size == 1234 assert str(manifest.layers[0].digest) == 'sha256:ec4b8955958665577945c89419d1af06b5f7636b4ac3da7f12184802ad867736' assert manifest.layers[0].urls assert manifest.leaf_layer == manifest.layers[3] assert not manifest.leaf_layer.is_remote - assert manifest.leaf_layer.size == 73109 + assert manifest.leaf_layer.compressed_size == 73109 blob_digests = list(manifest.blob_digests) assert len(blob_digests) == len(manifest.layers) diff --git a/image/docker/test/test_schema1.py b/image/docker/test/test_schema1.py index 0c99dbd88..6fc0d5719 100644 --- a/image/docker/test/test_schema1.py +++ b/image/docker/test/test_schema1.py @@ -77,6 +77,9 @@ def test_valid_manifest(): assert manifest.layers[1].v1_metadata.image_id == 'someid' assert manifest.layers[1].v1_metadata.parent_image_id == 'anotherid' + assert manifest.layers[0].compressed_size is None + assert manifest.layers[1].compressed_size is None + assert manifest.leaf_layer == manifest.layers[1] assert manifest.created_datetime is None From bafab2e73425a028ad1fbab430ee241c01c3a03d Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Tue, 28 Aug 2018 17:30:15 -0400 Subject: [PATCH 4/6] Change tar stream formatters to be based on tag and manifest, instead of legacy images --- image/appc/__init__.py | 16 ++++++++-------- image/appc/test/test_appc.py | 26 -------------------------- image/common.py | 8 ++++---- image/docker/squashed.py | 26 +++++++++----------------- 4 files changed, 21 insertions(+), 55 deletions(-) diff --git a/image/appc/__init__.py b/image/appc/__init__.py index 91730f923..5b74309c9 100644 --- a/image/appc/__init__.py +++ b/image/appc/__init__.py @@ -18,10 +18,10 @@ class AppCImageFormatter(TarImageFormatter): Image formatter which produces an tarball according to the AppC specification. """ - def stream_generator(self, repo_image, tag, synthetic_image_id, get_image_iterator, + def stream_generator(self, tag, manifest, synthetic_image_id, layer_iterator, tar_stream_getter_iterator, reporter=None): image_mtime = 0 - created = next(get_image_iterator()).v1_metadata.created + created = manifest.created_datetime if created is not None: image_mtime = calendar.timegm(created.utctimetuple()) @@ -31,8 +31,8 @@ class AppCImageFormatter(TarImageFormatter): # Yield the manifest. manifest = json.dumps(DockerV1ToACIManifestTranslator.build_manifest( - repo_image, tag, + manifest, synthetic_image_id )) yield self.tar_file('manifest', manifest, mtime=image_mtime) @@ -170,16 +170,16 @@ class DockerV1ToACIManifestTranslator(object): return volumes @staticmethod - def build_manifest(repo_image, tag, synthetic_image_id): + def build_manifest(tag, manifest, synthetic_image_id): """ Builds an ACI manifest of an existing repository image. """ - docker_layer_data = JSONPathDict(repo_image.compat_metadata) + docker_layer_data = JSONPathDict(manifest.leaf_layer.v1_metadata) config = docker_layer_data['config'] or JSONPathDict({}) - namespace = repo_image.repository.namespace_name - repo_name = repo_image.repository.name + namespace = tag.repository.namespace_name + repo_name = tag.repository.name source_url = "%s://%s/%s/%s:%s" % (app.config['PREFERRED_URL_SCHEME'], app.config['SERVER_HOSTNAME'], - namespace, repo_name, tag) + namespace, repo_name, tag.name) # ACI requires that the execution command be absolutely referenced. Therefore, if we find # a relative command, we give it as an argument to /bin/sh to resolve and execute for us. diff --git a/image/appc/test/test_appc.py b/image/appc/test/test_appc.py index 06f1e8a8d..d078fbe9f 100644 --- a/image/appc/test/test_appc.py +++ b/image/appc/test/test_appc.py @@ -1,7 +1,6 @@ import pytest from image.appc import DockerV1ToACIManifestTranslator -from endpoints.verbs.models_interface import RepositoryReference, ImageWithBlob from util.dict_wrappers import JSONPathDict @@ -74,31 +73,6 @@ EXAMPLE_MANIFEST_OBJ = { "throwaway": True } - -@pytest.fixture -def repo_image(): - repo_ref = RepositoryReference(1, 'simple', 'devtable') - return ImageWithBlob(1, None, EXAMPLE_MANIFEST_OBJ, repo_ref, 1, None) - - -def test_port_conversion(repo_image): - output = DockerV1ToACIManifestTranslator.build_manifest(repo_image, 'v3.0.15', 'abcdef') - ports = output['app']['ports'] - ports.sort() - assert {'name':'port-2379', 'port':2379, 'protocol':'tcp'} == ports[0] - assert {'name':'port-2380', 'port':2380, 'protocol':'tcp'} == ports[1] - - -def test_legacy_port_conversion(repo_image): - del repo_image.compat_metadata['config']['ExposedPorts'] - repo_image.compat_metadata['config']['ports'] = ['8080', '8081'] - output = DockerV1ToACIManifestTranslator.build_manifest(repo_image, 'v3.0.15', 'abcdef') - ports = output['app']['ports'] - ports.sort() - assert {'name':'port-8080', 'port':8080, 'protocol':'tcp'} == ports[0] - assert {'name':'port-8081', 'port':8081, 'protocol':'tcp'} == ports[1] - - @pytest.mark.parametrize("vcfg,expected", [ ({'Volumes': None}, []), ({'Volumes': {}}, []), diff --git a/image/common.py b/image/common.py index 356288316..63b00e676 100644 --- a/image/common.py +++ b/image/common.py @@ -7,17 +7,17 @@ class TarImageFormatter(object): Base class for classes which produce a tar containing image and layer data. """ - def build_stream(self, repo_image, tag, synthetic_image_id, get_image_iterator, + def build_stream(self, tag, manifest, synthetic_image_id, layer_iterator, tar_stream_getter_iterator, reporter=None): """ Builds and streams a synthetic .tar.gz that represents the formatted tar created by this class's implementation. """ - return GzipWrap(self.stream_generator(repo_image, tag, synthetic_image_id, get_image_iterator, + return GzipWrap(self.stream_generator(tag, manifest, synthetic_image_id, layer_iterator, tar_stream_getter_iterator, reporter=reporter)) - def stream_generator(self, repo_image, tag, synthetic_image_id, get_image_iterator, - tar_stream_getter_iterator): + def stream_generator(self, tag, manifest, synthetic_image_id, layer_iterator, + tar_stream_getter_iterator, reporter=None): raise NotImplementedError def tar_file(self, name, contents, mtime=None): diff --git a/image/docker/squashed.py b/image/docker/squashed.py index 5a52c7109..57b680dff 100644 --- a/image/docker/squashed.py +++ b/image/docker/squashed.py @@ -28,10 +28,10 @@ class SquashedDockerImageFormatter(TarImageFormatter): # daemon dies when trying to load the entire tar into memory. SIZE_MULTIPLIER = 1.2 - def stream_generator(self, repo_image, tag, synthetic_image_id, get_image_iterator, + def stream_generator(self, tag, manifest, synthetic_image_id, layer_iterator, tar_stream_getter_iterator, reporter=None): image_mtime = 0 - created = next(get_image_iterator()).v1_metadata.created + created = manifest.created_datetime if created is not None: image_mtime = calendar.timegm(created.utctimetuple()) @@ -50,8 +50,8 @@ class SquashedDockerImageFormatter(TarImageFormatter): hostname = app.config['SERVER_HOSTNAME'] repositories = {} - namespace = repo_image.repository.namespace_name - repository = repo_image.repository.name + namespace = tag.repository.namespace_name + repository = tag.repository.name repositories[hostname + '/' + namespace + '/' + repository] = synthetic_layer_info yield self.tar_file('repositories', json.dumps(repositories), mtime=image_mtime) @@ -60,7 +60,7 @@ class SquashedDockerImageFormatter(TarImageFormatter): yield self.tar_folder(synthetic_image_id, mtime=image_mtime) # Yield the JSON layer data. - layer_json = SquashedDockerImageFormatter._build_layer_json(repo_image, synthetic_image_id) + layer_json = SquashedDockerImageFormatter._build_layer_json(manifest, synthetic_image_id) yield self.tar_file(synthetic_image_id + '/json', json.dumps(layer_json), mtime=image_mtime) # Yield the VERSION file. @@ -68,16 +68,8 @@ class SquashedDockerImageFormatter(TarImageFormatter): # Yield the merged layer data's header. estimated_file_size = 0 - for image in get_image_iterator(): - # In V1 we have the actual uncompressed size, which is needed for back compat with - # older versions of Docker. - # In V2, we use the size given in the image JSON. - if image.blob.uncompressed_size: - estimated_file_size += image.blob.uncompressed_size - else: - image_json = image.compat_metadata - estimated_file_size += (image_json.get('Size', 0) * - SquashedDockerImageFormatter.SIZE_MULTIPLIER) + for layer in layer_iterator: + estimated_file_size += layer.estimated_file_size(SquashedDockerImageFormatter.SIZE_MULTIPLIER) # Make sure the estimated file size is an integer number of bytes. estimated_file_size = int(math.ceil(estimated_file_size)) @@ -115,8 +107,8 @@ class SquashedDockerImageFormatter(TarImageFormatter): @staticmethod - def _build_layer_json(repo_image, synthetic_image_id): - layer_json = repo_image.compat_metadata + def _build_layer_json(manifest, synthetic_image_id): + layer_json = manifest.leaf_layer.v1_metadata updated_json = copy.deepcopy(layer_json) updated_json['id'] = synthetic_image_id From f252b0b16ffdc3f5e7dd45e00a77513bbd27f917 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Tue, 28 Aug 2018 22:58:19 -0400 Subject: [PATCH 5/6] Move verbs endpoint to use new registry data model --- data/model/storage.py | 4 +- data/registry_model/datatypes.py | 3 +- data/registry_model/registry_pre_oci_model.py | 4 + .../registry_model/test/test_pre_oci_model.py | 2 + endpoints/verbs/__init__.py | 182 +++++++++------- endpoints/verbs/models_interface.py | 159 -------------- endpoints/verbs/models_pre_oci.py | 205 ------------------ image/appc/__init__.py | 6 +- image/docker/squashed.py | 12 +- storage/fakestorage.py | 2 +- test/registry/protocol_fixtures.py | 6 +- test/registry/registry_tests.py | 9 +- test/registry_tests.py | 12 +- util/audit.py | 8 + 14 files changed, 145 insertions(+), 469 deletions(-) delete mode 100644 endpoints/verbs/models_interface.py delete mode 100644 endpoints/verbs/models_pre_oci.py diff --git a/data/model/storage.py b/data/model/storage.py index a7cbf4517..cb77ece5f 100644 --- a/data/model/storage.py +++ b/data/model/storage.py @@ -254,6 +254,7 @@ def get_storage_by_uuid(storage_uuid): def get_layer_path(storage_record): """ Returns the path in the storage engine to the layer data referenced by the storage row. """ + assert storage_record.cas_path is not None return get_layer_path_for_storage(storage_record.uuid, storage_record.cas_path, storage_record.content_checksum) @@ -280,7 +281,8 @@ def lookup_repo_storages_by_content_checksum(repo, checksums): query_alias = 'q{0}'.format(counter) candidate_subq = (ImageStorage .select(ImageStorage.id, ImageStorage.content_checksum, - ImageStorage.image_size, ImageStorage.uuid) + ImageStorage.image_size, ImageStorage.uuid, ImageStorage.cas_path, + ImageStorage.uncompressed_size) .join(Image) .where(Image.repository == repo, ImageStorage.content_checksum == checksum) .limit(1) diff --git a/data/registry_model/datatypes.py b/data/registry_model/datatypes.py index 4701571b8..550baed22 100644 --- a/data/registry_model/datatypes.py +++ b/data/registry_model/datatypes.py @@ -28,6 +28,7 @@ class RepositoryReference(datatype('Repository', [])): def _repository_obj(self): return model.repository.lookup_repository(self._db_id) + @property def namespace_name(self): """ Returns the namespace name of this repository. """ @@ -183,7 +184,7 @@ class ManifestLayer(namedtuple('ManifestLayer', ['layer_info', 'blob'])): return self.blob.uncompressed_size return (self.layer_info.compressed_size or 0) * estimate_multiplier - + class Blob(datatype('Blob', ['uuid', 'digest', 'compressed_size', 'uncompressed_size', 'uploading'])): diff --git a/data/registry_model/registry_pre_oci_model.py b/data/registry_model/registry_pre_oci_model.py index ba0944047..0009192a1 100644 --- a/data/registry_model/registry_pre_oci_model.py +++ b/data/registry_model/registry_pre_oci_model.py @@ -398,6 +398,8 @@ class PreOCIModel(RegistryDataInterface): return None image_storage = storage_map[digest_str] + assert image_storage.cas_path is not None + placements = None if include_placements: placements = list(model.storage.get_storage_locations(image_storage.uuid)) @@ -551,6 +553,8 @@ class PreOCIModel(RegistryDataInterface): except model.BlobDoesNotExist: return None + assert image_storage.cas_path is not None + placements = None if include_placements: placements = list(model.storage.get_storage_locations(image_storage.uuid)) diff --git a/data/registry_model/test/test_pre_oci_model.py b/data/registry_model/test/test_pre_oci_model.py index bdd77637f..e568dd923 100644 --- a/data/registry_model/test/test_pre_oci_model.py +++ b/data/registry_model/test/test_pre_oci_model.py @@ -35,6 +35,8 @@ def test_find_matching_tag(names, expected, pre_oci_model): assert found is None else: assert found.name in expected + assert found.repository.namespace_name == 'devtable' + assert found.repository.name == 'simple' @pytest.mark.parametrize('repo_namespace, repo_name, expected', [ diff --git a/endpoints/verbs/__init__.py b/endpoints/verbs/__init__.py index 46142a893..eea5f8eff 100644 --- a/endpoints/verbs/__init__.py +++ b/endpoints/verbs/__init__.py @@ -10,13 +10,14 @@ from auth.auth_context import get_authenticated_user from auth.decorators import process_auth from auth.permissions import ReadRepositoryPermission from data import database +from data import model +from data.registry_model import registry_model from endpoints.decorators import anon_protect, anon_allowed, route_show_if, parse_repository_name -from endpoints.verbs.models_pre_oci import pre_oci_model as model from endpoints.v2.blob import BLOB_DIGEST_ROUTE from image.appc import AppCImageFormatter from image.docker.squashed import SquashedDockerImageFormatter from storage import Storage -from util.audit import track_and_log +from util.audit import track_and_log, wrap_repository from util.http import exact_abort from util.registry.filelike import wrap_with_handler from util.registry.queuefile import QueueFile @@ -40,7 +41,7 @@ class VerbReporter(TarLayerFormatterReporter): metric_queue.verb_action_passes.Inc(labelvalues=[self.kind, pass_count]) -def _open_stream(formatter, repo_image, tag, derived_image_id, handlers, reporter): +def _open_stream(formatter, tag, manifest, derived_image_id, handlers, reporter): """ This method generates a stream of data which will be replicated and read from the queue files. This method runs in a separate process. @@ -48,29 +49,25 @@ def _open_stream(formatter, repo_image, tag, derived_image_id, handlers, reporte # For performance reasons, we load the full image list here, cache it, then disconnect from # the database. with database.UseThenDisconnect(app.config): - image_list = list(model.get_manifest_layers_with_blobs(repo_image)) + layers = registry_model.list_manifest_layers(manifest, include_placements=True) - def get_next_image(): - for current_image in image_list: - yield current_image - - def image_stream_getter(store, current_image): + def image_stream_getter(store, blob): def get_stream_for_storage(): - current_image_path = model.get_blob_path(current_image.blob) - current_image_stream = store.stream_read_file(current_image.blob.locations, - current_image_path) - - logger.debug('Returning image layer %s: %s', current_image.image_id, current_image_path) + current_image_stream = store.stream_read_file(blob.placements, blob.storage_path) + logger.debug('Returning blob %s: %s', blob.digest, blob.storage_path) return current_image_stream return get_stream_for_storage def tar_stream_getter_iterator(): # Re-Initialize the storage engine because some may not respond well to forking (e.g. S3) store = Storage(app, metric_queue, config_provider=config_provider, ip_resolver=ip_resolver) - for current_image in image_list: - yield image_stream_getter(store, current_image) - stream = formatter.build_stream(repo_image, tag, derived_image_id, get_next_image, + # Note: We reverse because we have to start at the leaf layer and move upward, + # as per the spec for the formatters. + for layer in reversed(layers): + yield image_stream_getter(store, layer.blob) + + stream = formatter.build_stream(tag, manifest, derived_image_id, layers, tar_stream_getter_iterator, reporter=reporter) for handler_fn in handlers: @@ -86,14 +83,14 @@ def _sign_derived_image(verb, derived_image, queue_file): try: signature = signer.detached_sign(queue_file) except: - logger.exception('Exception when signing %s deriving image %s', verb, derived_image.ref) + logger.exception('Exception when signing %s deriving image %s', verb, derived_image) return # Setup the database (since this is a new process) and then disconnect immediately # once the operation completes. if not queue_file.raised_exception: with database.UseThenDisconnect(app.config): - model.set_derived_image_signature(derived_image, signer.name, signature) + registry_model.set_derived_image_signature(derived_image, signer.name, signature) def _write_derived_image_to_storage(verb, derived_image, queue_file): @@ -102,17 +99,16 @@ def _write_derived_image_to_storage(verb, derived_image, queue_file): """ def handle_exception(ex): - logger.debug('Exception when building %s derived image %s: %s', verb, derived_image.ref, ex) + logger.debug('Exception when building %s derived image %s: %s', verb, derived_image, ex) with database.UseThenDisconnect(app.config): - model.delete_derived_image(derived_image) + registry_model.delete_derived_image(derived_image) queue_file.add_exception_handler(handle_exception) # Re-Initialize the storage engine because some may not respond well to forking (e.g. S3) store = Storage(app, metric_queue, config_provider=config_provider, ip_resolver=ip_resolver) - image_path = model.get_blob_path(derived_image.blob) - store.stream_write(derived_image.blob.locations, image_path, queue_file) + store.stream_write(derived_image.blob.placements, derived_image.blob.storage_path, queue_file) queue_file.close() @@ -121,17 +117,16 @@ def _torrent_for_blob(blob, is_public): with an error if the state is not valid (e.g. non-public, non-user request). """ # Make sure the storage has a size. - if not blob.size: + if not blob.compressed_size: abort(404) # Lookup the torrent information for the storage. - torrent_info = model.get_torrent_info(blob) + torrent_info = registry_model.get_torrent_info(blob) if torrent_info is None: abort(404) # Lookup the webseed path for the storage. - path = model.get_blob_path(blob) - webseed = storage.get_direct_download_url(blob.locations, path, + webseed = storage.get_direct_download_url(blob.placements, blob.storage_path, expires_in=app.config['BITTORRENT_WEBSEED_LIFETIME']) if webseed is None: # We cannot support webseeds for storages that cannot provide direct downloads. @@ -151,8 +146,8 @@ def _torrent_for_blob(blob, is_public): name = per_user_torrent_filename(torrent_config, user.uuid, blob.uuid) # Return the torrent file. - torrent_file = make_torrent(torrent_config, name, webseed, blob.size, torrent_info.piece_length, - torrent_info.pieces) + torrent_file = make_torrent(torrent_config, name, webseed, blob.compressed_size, + torrent_info.piece_length, torrent_info.pieces) headers = { 'Content-Type': 'application/x-bittorrent', @@ -161,7 +156,7 @@ def _torrent_for_blob(blob, is_public): return make_response(torrent_file, 200, headers) -def _torrent_repo_verb(repo_image, tag, verb, **kwargs): +def _torrent_repo_verb(repository, tag, manifest, verb, **kwargs): """ Handles returning a torrent for the given verb on the given image and tag. """ if not features.BITTORRENT: # Torrent feature is not enabled. @@ -169,60 +164,73 @@ def _torrent_repo_verb(repo_image, tag, verb, **kwargs): # Lookup an *existing* derived storage for the verb. If the verb's image storage doesn't exist, # we cannot create it here, so we 406. - derived_image = model.lookup_derived_image(repo_image, verb, varying_metadata={'tag': tag}) + derived_image = registry_model.lookup_derived_image(manifest, verb, + varying_metadata={'tag': tag.name}, + include_placements=True) if derived_image is None: abort(406) # Return the torrent. - repo = model.get_repository(repo_image.repository.namespace_name, repo_image.repository.name) - repo_is_public = repo is not None and repo.is_public - torrent = _torrent_for_blob(derived_image.blob, repo_is_public) + torrent = _torrent_for_blob(derived_image.blob, model.repository.is_repository_public(repository)) # Log the action. - track_and_log('repo_verb', repo_image.repository, tag=tag, verb=verb, torrent=True, **kwargs) + track_and_log('repo_verb', wrap_repository(repository), tag=tag.name, verb=verb, torrent=True, + **kwargs) return torrent -def _verify_repo_verb(_, namespace, repo_name, tag, verb, checker=None): +def _verify_repo_verb(_, namespace, repo_name, tag_name, verb, checker=None): permission = ReadRepositoryPermission(namespace, repo_name) - repo = model.get_repository(namespace, repo_name) - repo_is_public = repo is not None and repo.is_public + repo = model.repository.get_repository(namespace, repo_name) + repo_is_public = repo is not None and model.repository.is_repository_public(repo) if not permission.can() and not repo_is_public: logger.debug('No permission to read repository %s/%s for user %s with verb %s', namespace, repo_name, get_authenticated_user(), verb) abort(403) - # Lookup the requested tag. - tag_image = model.get_tag_image(namespace, repo_name, tag) - if tag_image is None: - logger.debug('Tag %s does not exist in repository %s/%s for user %s', tag, namespace, repo_name, - get_authenticated_user()) - abort(404) - - if repo is not None and repo.kind != 'image': + if repo is not None and repo.kind.name != 'image': logger.debug('Repository %s/%s for user %s is not an image repo', namespace, repo_name, get_authenticated_user()) abort(405) # Make sure the repo's namespace isn't disabled. - if not model.is_namespace_enabled(namespace): + if not registry_model.is_namespace_enabled(namespace): abort(400) + # Lookup the requested tag. + repo_ref = registry_model.lookup_repository(namespace, repo_name) + tag = registry_model.get_repo_tag(repo_ref, tag_name) + if tag is None: + logger.debug('Tag %s does not exist in repository %s/%s for user %s', tag, namespace, repo_name, + get_authenticated_user()) + abort(404) + + # Get its associated manifest. + manifest = registry_model.get_manifest_for_tag(tag, backfill_if_necessary=True) + if manifest is None: + logger.debug('Could not get manifest on %s/%s:%s::%s', namespace, repo_name, tag.name, verb) + abort(404) + # If there is a data checker, call it first. if checker is not None: - if not checker(tag_image): - logger.debug('Check mismatch on %s/%s:%s, verb %s', namespace, repo_name, tag, verb) + if not checker(tag, manifest): + logger.debug('Check mismatch on %s/%s:%s, verb %s', namespace, repo_name, tag.name, verb) abort(404) - return tag_image + # Preload the tag's repository information, so it gets cached. + assert tag.repository.namespace_name + assert tag.repository.name + + return tag, manifest -def _repo_verb_signature(namespace, repository, tag, verb, checker=None, **kwargs): - # Verify that the image exists and that we have access to it. - repo_image = _verify_repo_verb(storage, namespace, repository, tag, verb, checker) +def _repo_verb_signature(namespace, repository, tag_name, verb, checker=None, **kwargs): + # Verify that the tag exists and that we have access to it. + tag, manifest = _verify_repo_verb(storage, namespace, repository, tag_name, verb, checker) - # derived_image the derived image storage for the verb. - derived_image = model.lookup_derived_image(repo_image, verb, varying_metadata={'tag': tag}) + # Find the derived image storage for the verb. + derived_image = registry_model.lookup_derived_image(manifest, verb, + varying_metadata={'tag': tag.name}) if derived_image is None or derived_image.blob.uploading: return make_response('', 202) @@ -231,7 +239,7 @@ def _repo_verb_signature(namespace, repository, tag, verb, checker=None, **kwarg abort(404) # Lookup the signature for the verb. - signature_value = model.get_derived_image_signature(derived_image, signer.name) + signature_value = registry_model.get_derived_image_signature(derived_image, signer.name) if signature_value is None: abort(404) @@ -239,53 +247,57 @@ def _repo_verb_signature(namespace, repository, tag, verb, checker=None, **kwarg return make_response(signature_value) -def _repo_verb(namespace, repository, tag, verb, formatter, sign=False, checker=None, **kwargs): +def _repo_verb(namespace, repository, tag_name, verb, formatter, sign=False, checker=None, + **kwargs): # Verify that the image exists and that we have access to it. logger.debug('Verifying repo verb %s for repository %s/%s with user %s with mimetype %s', verb, namespace, repository, get_authenticated_user(), request.accept_mimetypes.best) - repo_image = _verify_repo_verb(storage, namespace, repository, tag, verb, checker) + tag, manifest = _verify_repo_verb(storage, namespace, repository, tag_name, verb, checker) + + # Load the repository for later. + repo = model.repository.get_repository(namespace, repository) + if repo is None: + abort(404) # Check for torrent. If found, we return a torrent for the repo verb image (if the derived # image already exists). if request.accept_mimetypes.best == 'application/x-bittorrent': metric_queue.repository_pull.Inc(labelvalues=[namespace, repository, verb + '+torrent', True]) - return _torrent_repo_verb(repo_image, tag, verb, **kwargs) + return _torrent_repo_verb(repo, tag, manifest, verb, **kwargs) # Log the action. - track_and_log('repo_verb', repo_image.repository, tag=tag, verb=verb, **kwargs) + track_and_log('repo_verb', wrap_repository(repo), tag=tag.name, verb=verb, **kwargs) metric_queue.repository_pull.Inc(labelvalues=[namespace, repository, verb, True]) # Lookup/create the derived image for the verb and repo image. - derived_image = model.lookup_or_create_derived_image( - repo_image, verb, storage.preferred_locations[0], varying_metadata={'tag': tag}) + derived_image = registry_model.lookup_or_create_derived_image( + manifest, verb, storage.preferred_locations[0], varying_metadata={'tag': tag.name}, + include_placements=True) if not derived_image.blob.uploading: - logger.debug('Derived %s image %s exists in storage', verb, derived_image.ref) - derived_layer_path = model.get_blob_path(derived_image.blob) + logger.debug('Derived %s image %s exists in storage', verb, derived_image) is_head_request = request.method == 'HEAD' - download_url = storage.get_direct_download_url(derived_image.blob.locations, - derived_layer_path, head=is_head_request) + download_url = storage.get_direct_download_url(derived_image.blob.placements, + derived_image.blob.storage_path, + head=is_head_request) if download_url: - logger.debug('Redirecting to download URL for derived %s image %s', verb, derived_image.ref) + logger.debug('Redirecting to download URL for derived %s image %s', verb, derived_image) return redirect(download_url) # Close the database handle here for this process before we send the long download. database.close_db_filter(None) - logger.debug('Sending cached derived %s image %s', verb, derived_image.ref) + logger.debug('Sending cached derived %s image %s', verb, derived_image) return send_file( - storage.stream_read_file(derived_image.blob.locations, derived_layer_path), + storage.stream_read_file(derived_image.blob.placements, derived_image.blob.storage_path), mimetype=LAYER_MIMETYPE) - logger.debug('Building and returning derived %s image %s', verb, derived_image.ref) + logger.debug('Building and returning derived %s image %s', verb, derived_image) # Close the database connection before any process forking occurs. This is important because # the Postgres driver does not react kindly to forking, so we need to make sure it is closed # so that each process will get its own unique connection. database.close_db_filter(None) - # Calculate a derived image ID. - derived_image_id = hashlib.sha256(repo_image.image_id + ':' + verb).hexdigest() - def _cleanup(): # Close any existing DB connection once the process has exited. database.close_db_filter(None) @@ -294,15 +306,15 @@ def _repo_verb(namespace, repository, tag, verb, formatter, sign=False, checker= def _store_metadata_and_cleanup(): with database.UseThenDisconnect(app.config): - model.set_torrent_info(derived_image.blob, app.config['BITTORRENT_PIECE_SIZE'], - hasher.final_piece_hashes()) - model.set_blob_size(derived_image.blob, hasher.hashed_bytes) + registry_model.set_torrent_info(derived_image.blob, app.config['BITTORRENT_PIECE_SIZE'], + hasher.final_piece_hashes()) + registry_model.set_derived_image_size(derived_image, hasher.hashed_bytes) # Create a queue process to generate the data. The queue files will read from the process # and send the results to the client and storage. handlers = [hasher.update] reporter = VerbReporter(verb) - args = (formatter, repo_image, tag, derived_image_id, handlers, reporter) + args = (formatter, tag, manifest, derived_image.unique_id, handlers, reporter) queue_process = QueueProcess( _open_stream, 8 * 1024, @@ -337,8 +349,8 @@ def _repo_verb(namespace, repository, tag, verb, formatter, sign=False, checker= def os_arch_checker(os, arch): - def checker(repo_image): - image_json = repo_image.compat_metadata + def checker(tag, manifest): + image_json = manifest.leaf_layer.v1_metadata # Verify the architecture and os. operating_system = image_json.get('os', 'linux') @@ -394,8 +406,8 @@ def get_squashed_tag(namespace, repository, tag): @process_auth @parse_repository_name() def get_tag_torrent(namespace_name, repo_name, digest): - repo = model.get_repository(namespace_name, repo_name) - repo_is_public = repo is not None and repo.is_public + repo = model.repository.get_repository(namespace_name, repo_name) + repo_is_public = repo is not None and model.repository.is_repository_public(repo) permission = ReadRepositoryPermission(namespace_name, repo_name) if not permission.can() and not repo_is_public: @@ -406,10 +418,14 @@ def get_tag_torrent(namespace_name, repo_name, digest): # We can not generate a private torrent cluster without a user uuid (e.g. token auth) abort(403) - if repo is not None and repo.kind != 'image': + if repo is not None and repo.kind.name != 'image': abort(405) - blob = model.get_repo_blob_by_digest(namespace_name, repo_name, digest) + repo_ref = registry_model.lookup_repository(namespace_name, repo_name) + if repo_ref is None: + abort(404) + + blob = registry_model.get_repo_blob_by_digest(repo_ref, digest, include_placements=True) if blob is None: abort(404) diff --git a/endpoints/verbs/models_interface.py b/endpoints/verbs/models_interface.py deleted file mode 100644 index 87559b110..000000000 --- a/endpoints/verbs/models_interface.py +++ /dev/null @@ -1,159 +0,0 @@ -from abc import ABCMeta, abstractmethod -from collections import namedtuple - -from six import add_metaclass - - -class Repository( - namedtuple('Repository', ['id', 'name', 'namespace_name', 'description', 'is_public', - 'kind'])): - """ - Repository represents a namespaced collection of tags. - :type id: int - :type name: string - :type namespace_name: string - :type description: string - :type is_public: bool - :type kind: string - """ - - -class DerivedImage(namedtuple('DerivedImage', ['ref', 'blob', 'internal_source_image_db_id'])): - """ - DerivedImage represents a user-facing alias for an image which was derived from another image. - """ - - -class RepositoryReference(namedtuple('RepositoryReference', ['id', 'name', 'namespace_name'])): - """ - RepositoryReference represents a reference to a Repository, without its full metadata. - """ - - -class ImageWithBlob( - namedtuple('Image', [ - 'image_id', 'blob', 'compat_metadata', 'repository', 'internal_db_id', 'v1_metadata'])): - """ - ImageWithBlob represents a user-facing alias for referencing an image, along with its blob. - """ - - -class Blob(namedtuple('Blob', ['uuid', 'size', 'uncompressed_size', 'uploading', 'locations'])): - """ - Blob represents an opaque binary blob saved to the storage system. - """ - - -class TorrentInfo(namedtuple('TorrentInfo', ['piece_length', 'pieces'])): - """ - TorrentInfo represents the torrent piece information associated with a blob. - """ - - -@add_metaclass(ABCMeta) -class VerbsDataInterface(object): - """ - Interface that represents all data store interactions required by the registry's custom HTTP - verbs. - """ - - @abstractmethod - def get_repository(self, namespace_name, repo_name): - """ - Returns a repository tuple for the repository with the given name under the given namespace. - Returns None if no such repository was found. - """ - pass - - @abstractmethod - def get_manifest_layers_with_blobs(self, repo_image): - """ - Returns the full set of manifest layers and their associated blobs starting at the given - repository image and working upwards to the root image. - """ - pass - - @abstractmethod - def get_blob_path(self, blob): - """ - Returns the storage path for the given blob. - """ - pass - - @abstractmethod - def get_derived_image_signature(self, derived_image, signer_name): - """ - Returns the signature associated with the derived image and a specific signer or None if none. - """ - pass - - @abstractmethod - def set_derived_image_signature(self, derived_image, signer_name, signature): - """ - Sets the calculated signature for the given derived image and signer to that specified. - """ - pass - - @abstractmethod - def delete_derived_image(self, derived_image): - """ - Deletes a derived image and all of its storage. - """ - pass - - @abstractmethod - def set_blob_size(self, blob, size): - """ - Sets the size field on a blob to the value specified. - """ - pass - - @abstractmethod - def get_repo_blob_by_digest(self, namespace_name, repo_name, digest): - """ - Returns the blob with the given digest under the matching repository or None if none. - """ - pass - - @abstractmethod - def get_torrent_info(self, blob): - """ - Returns the torrent information associated with the given blob or None if none. - """ - pass - - @abstractmethod - def set_torrent_info(self, blob, piece_length, pieces): - """ - Sets the torrent infomation associated with the given blob to that specified. - """ - pass - - @abstractmethod - def lookup_derived_image(self, repo_image, verb, varying_metadata=None): - """ - Looks up the derived image for the given repository image, verb and optional varying metadata - and returns it or None if none. - """ - pass - - @abstractmethod - def lookup_or_create_derived_image(self, repo_image, verb, location, varying_metadata=None): - """ - Looks up the derived image for the given repository image, verb and optional varying metadata - and returns it. If none exists, a new derived image is created. - """ - pass - - @abstractmethod - def get_tag_image(self, namespace_name, repo_name, tag_name): - """ - Returns the image associated with the live tag with the given name under the matching repository - or None if none. - """ - pass - - @abstractmethod - def is_namespace_enabled(self, namespace_name): - """ Returns whether the given namespace exists and is enabled. """ - pass diff --git a/endpoints/verbs/models_pre_oci.py b/endpoints/verbs/models_pre_oci.py deleted file mode 100644 index 6dc894769..000000000 --- a/endpoints/verbs/models_pre_oci.py +++ /dev/null @@ -1,205 +0,0 @@ -import json - -from data import model -from image.docker.v1 import DockerV1Metadata - -from endpoints.verbs.models_interface import ( - Blob, - DerivedImage, - ImageWithBlob, - Repository, - RepositoryReference, - TorrentInfo, - VerbsDataInterface,) - - -class PreOCIModel(VerbsDataInterface): - """ - PreOCIModel implements the data model for the registry's custom HTTP verbs using a database schema - before it was changed to support the OCI specification. - """ - - def get_repository(self, namespace_name, repo_name): - repo = model.repository.get_repository(namespace_name, repo_name) - if repo is None: - return None - - return _repository_for_repo(repo) - - def get_manifest_layers_with_blobs(self, repo_image): - repo_image_record = model.image.get_image_by_id( - repo_image.repository.namespace_name, repo_image.repository.name, repo_image.image_id) - - parents = model.image.get_parent_images( - repo_image.repository.namespace_name, repo_image.repository.name, repo_image_record) - placements_map = model.image.get_placements_for_images(parents) - - yield repo_image - - for parent in parents: - metadata = {} - try: - metadata = json.loads(parent.v1_json_metadata) - except ValueError: - pass - - locations = [placement.location.name for placement in placements_map[parent.storage.id]] - yield ImageWithBlob( - image_id=parent.docker_image_id, - blob=_blob(parent.storage, locations=locations), - repository=repo_image.repository, - compat_metadata=metadata, - v1_metadata=_docker_v1_metadata(repo_image.repository.namespace_name, - repo_image.repository.name, parent), - internal_db_id=parent.id,) - - def get_derived_image_signature(self, derived_image, signer_name): - storage = model.storage.get_storage_by_uuid(derived_image.blob.uuid) - signature_entry = model.storage.lookup_storage_signature(storage, signer_name) - if signature_entry is None: - return None - - return signature_entry.signature - - def set_derived_image_signature(self, derived_image, signer_name, signature): - storage = model.storage.get_storage_by_uuid(derived_image.blob.uuid) - signature_entry = model.storage.find_or_create_storage_signature(storage, signer_name) - signature_entry.signature = signature - signature_entry.uploading = False - signature_entry.save() - - def delete_derived_image(self, derived_image): - model.image.delete_derived_storage_by_uuid(derived_image.blob.uuid) - - def set_blob_size(self, blob, size): - storage_entry = model.storage.get_storage_by_uuid(blob.uuid) - storage_entry.image_size = size - storage_entry.uploading = False - storage_entry.save() - - def get_blob_path(self, blob): - blob_record = model.storage.get_storage_by_uuid(blob.uuid) - return model.storage.get_layer_path(blob_record) - - def get_repo_blob_by_digest(self, namespace_name, repo_name, digest): - try: - blob_record = model.blob.get_repo_blob_by_digest(namespace_name, repo_name, digest) - except model.BlobDoesNotExist: - return None - - return _blob(blob_record) - - def get_torrent_info(self, blob): - blob_record = model.storage.get_storage_by_uuid(blob.uuid) - - try: - torrent_info = model.storage.get_torrent_info(blob_record) - except model.TorrentInfoDoesNotExist: - return None - - return TorrentInfo( - pieces=torrent_info.pieces, - piece_length=torrent_info.piece_length,) - - def set_torrent_info(self, blob, piece_length, pieces): - blob_record = model.storage.get_storage_by_uuid(blob.uuid) - model.storage.save_torrent_info(blob_record, piece_length, pieces) - - def lookup_derived_image(self, repo_image, verb, varying_metadata=None): - blob_record = model.image.find_derived_storage_for_image(repo_image.internal_db_id, verb, - varying_metadata) - if blob_record is None: - return None - - return _derived_image(blob_record, repo_image) - - def lookup_or_create_derived_image(self, repo_image, verb, location, varying_metadata=None): - blob_record = model.image.find_or_create_derived_storage(repo_image.internal_db_id, verb, - location, varying_metadata) - return _derived_image(blob_record, repo_image) - - def get_tag_image(self, namespace_name, repo_name, tag_name): - try: - found = model.tag.get_tag_image(namespace_name, repo_name, tag_name, include_storage=True) - except model.DataModelException: - return None - - metadata = {} - try: - metadata = json.loads(found.v1_json_metadata) - except ValueError: - pass - - return ImageWithBlob( - image_id=found.docker_image_id, - blob=_blob(found.storage), - repository=RepositoryReference( - namespace_name=namespace_name, - name=repo_name, - id=found.repository_id,), - compat_metadata=metadata, - v1_metadata=_docker_v1_metadata(namespace_name, repo_name, found), - internal_db_id=found.id,) - - def is_namespace_enabled(self, namespace_name): - namespace = model.user.get_namespace_user(namespace_name) - return namespace is not None and namespace.enabled - - -pre_oci_model = PreOCIModel() - - -def _docker_v1_metadata(namespace_name, repo_name, repo_image): - """ - Returns a DockerV1Metadata object for the given Pre-OCI repo_image under the - repository with the given namespace and name. Note that the namespace and - name are passed here as an optimization, and are *not checked* against the - image. - """ - return DockerV1Metadata( - namespace_name=namespace_name, - repo_name=repo_name, - image_id=repo_image.docker_image_id, - checksum=repo_image.v1_checksum, - compat_json=repo_image.v1_json_metadata, - created=repo_image.created, - comment=repo_image.comment, - command=repo_image.command, - - # Note: These are not needed in verbs and are expensive to load, so we just skip them. - content_checksum=None, - parent_image_id=None,) - - -def _derived_image(blob_record, repo_image): - """ - Returns a DerivedImage object for the given Pre-OCI data model blob and repo_image instance. - """ - return DerivedImage( - ref=repo_image.internal_db_id, - blob=_blob(blob_record), - internal_source_image_db_id=repo_image.internal_db_id,) - - -def _blob(blob_record, locations=None): - """ - Returns a Blob object for the given Pre-OCI data model blob instance. - """ - locations = locations or model.storage.get_storage_locations(blob_record.uuid) - return Blob( - uuid=blob_record.uuid, - size=blob_record.image_size, - uncompressed_size=blob_record.uncompressed_size, - uploading=blob_record.uploading, - locations=locations,) - - -def _repository_for_repo(repo): - """ Returns a Repository object representing the Pre-OCI data model repo instance given. """ - return Repository( - id=repo.id, - name=repo.name, - namespace_name=repo.namespace_user.username, - description=repo.description, - is_public=model.repository.is_repository_public(repo), - kind=model.repository.get_repo_kind_name(repo),) diff --git a/image/appc/__init__.py b/image/appc/__init__.py index 5b74309c9..404813859 100644 --- a/image/appc/__init__.py +++ b/image/appc/__init__.py @@ -21,7 +21,7 @@ class AppCImageFormatter(TarImageFormatter): def stream_generator(self, tag, manifest, synthetic_image_id, layer_iterator, tar_stream_getter_iterator, reporter=None): image_mtime = 0 - created = manifest.created_datetime + created = manifest.get_parsed_manifest().created_datetime if created is not None: image_mtime = calendar.timegm(created.utctimetuple()) @@ -30,12 +30,12 @@ class AppCImageFormatter(TarImageFormatter): # rootfs - The root file system # Yield the manifest. - manifest = json.dumps(DockerV1ToACIManifestTranslator.build_manifest( + aci_manifest = json.dumps(DockerV1ToACIManifestTranslator.build_manifest( tag, manifest, synthetic_image_id )) - yield self.tar_file('manifest', manifest, mtime=image_mtime) + yield self.tar_file('manifest', aci_manifest, mtime=image_mtime) # Yield the merged layer dtaa. yield self.tar_folder('rootfs', mtime=image_mtime) diff --git a/image/docker/squashed.py b/image/docker/squashed.py index 57b680dff..41c55b62a 100644 --- a/image/docker/squashed.py +++ b/image/docker/squashed.py @@ -31,7 +31,8 @@ class SquashedDockerImageFormatter(TarImageFormatter): def stream_generator(self, tag, manifest, synthetic_image_id, layer_iterator, tar_stream_getter_iterator, reporter=None): image_mtime = 0 - created = manifest.created_datetime + parsed_manifest = manifest.get_parsed_manifest() + created = parsed_manifest.created_datetime if created is not None: image_mtime = calendar.timegm(created.utctimetuple()) @@ -46,7 +47,7 @@ class SquashedDockerImageFormatter(TarImageFormatter): # Yield the repositories file: synthetic_layer_info = {} - synthetic_layer_info[tag + '.squash'] = synthetic_image_id + synthetic_layer_info[tag.name + '.squash'] = synthetic_image_id hostname = app.config['SERVER_HOSTNAME'] repositories = {} @@ -60,7 +61,7 @@ class SquashedDockerImageFormatter(TarImageFormatter): yield self.tar_folder(synthetic_image_id, mtime=image_mtime) # Yield the JSON layer data. - layer_json = SquashedDockerImageFormatter._build_layer_json(manifest, synthetic_image_id) + layer_json = SquashedDockerImageFormatter._build_layer_json(parsed_manifest, synthetic_image_id) yield self.tar_file(synthetic_image_id + '/json', json.dumps(layer_json), mtime=image_mtime) # Yield the VERSION file. @@ -69,7 +70,7 @@ class SquashedDockerImageFormatter(TarImageFormatter): # Yield the merged layer data's header. estimated_file_size = 0 for layer in layer_iterator: - estimated_file_size += layer.estimated_file_size(SquashedDockerImageFormatter.SIZE_MULTIPLIER) + estimated_file_size += layer.estimated_size(SquashedDockerImageFormatter.SIZE_MULTIPLIER) # Make sure the estimated file size is an integer number of bytes. estimated_file_size = int(math.ceil(estimated_file_size)) @@ -108,8 +109,7 @@ class SquashedDockerImageFormatter(TarImageFormatter): @staticmethod def _build_layer_json(manifest, synthetic_image_id): - layer_json = manifest.leaf_layer.v1_metadata - updated_json = copy.deepcopy(layer_json) + updated_json = json.loads(manifest.leaf_layer.raw_v1_metadata) updated_json['id'] = synthetic_image_id if 'parent' in updated_json: diff --git a/storage/fakestorage.py b/storage/fakestorage.py index 2127ccb7f..dfe552f08 100644 --- a/storage/fakestorage.py +++ b/storage/fakestorage.py @@ -26,7 +26,7 @@ class FakeStorage(BaseStorageV2): def get_content(self, path): if not path in _FAKE_STORAGE_MAP: - raise IOError('Fake file %s not found' % path) + raise IOError('Fake file %s not found. Exist: %s' % (path, _FAKE_STORAGE_MAP.keys())) _FAKE_STORAGE_MAP.get(path).seek(0) return _FAKE_STORAGE_MAP.get(path).read() diff --git a/test/registry/protocol_fixtures.py b/test/registry/protocol_fixtures.py index 44ad89964..f4ed8d9a2 100644 --- a/test/registry/protocol_fixtures.py +++ b/test/registry/protocol_fixtures.py @@ -30,8 +30,10 @@ def sized_images(): parent_bytes = layer_bytes_for_contents('parent contents', mode='') image_bytes = layer_bytes_for_contents('some contents', mode='') return [ - Image(id='parentid', bytes=parent_bytes, parent_id=None, size=len(parent_bytes)), - Image(id='someid', bytes=image_bytes, parent_id='parentid', size=len(image_bytes)), + Image(id='parentid', bytes=parent_bytes, parent_id=None, size=len(parent_bytes), + config={'foo': 'bar'}), + Image(id='someid', bytes=image_bytes, parent_id='parentid', size=len(image_bytes), + config={'foo': 'childbar'}), ] diff --git a/test/registry/registry_tests.py b/test/registry/registry_tests.py index 45f7c95f4..71f0e5c38 100644 --- a/test/registry/registry_tests.py +++ b/test/registry/registry_tests.py @@ -953,7 +953,7 @@ def test_squashed_images(use_estimates, pusher, sized_images, liveserver_session tar = tarfile.open(fileobj=StringIO(response.content)) # Verify the squashed image. - expected_image_id = '13a2d9711a3e242fcd50a7627c02d86901ac801b78dcea3147e8ff640078de52' + expected_image_id = 'cdc6d6c0d07d2cbacfc579e49ce0c256c5084b9b2b16c1b1b0c45f26a12a4ba5' expected_names = ['repositories', expected_image_id, '%s/json' % expected_image_id, @@ -968,9 +968,14 @@ def test_squashed_images(use_estimates, pusher, sized_images, liveserver_session # Ensure the JSON loads and parses. result = json.loads(json_data) assert result['id'] == expected_image_id + assert result['config']['foo'] == 'childbar' # Ensure that squashed layer tar can be opened. - tarfile.open(fileobj=tar.extractfile(tar.getmember('%s/layer.tar' % expected_image_id))) + tar = tarfile.open(fileobj=tar.extractfile(tar.getmember('%s/layer.tar' % expected_image_id))) + assert tar.getnames() == ['contents'] + + # Check the contents. + assert tar.extractfile('contents').read() == 'some contents' @pytest.mark.parametrize('push_user, push_namespace, push_repo, mount_repo_name, expected_failure', [ diff --git a/test/registry_tests.py b/test/registry_tests.py index ccb71a599..01f9009bb 100644 --- a/test/registry_tests.py +++ b/test/registry_tests.py @@ -2103,7 +2103,7 @@ class SquashingTests(RegistryTestCaseMixin, V1RegistryPushMixin, LiveServerTestC # Create the repo. self.do_push('devtable', 'newrepo', 'devtable', 'password', images=initial_images) - initial_image_id = '91081df45b58dc62dd207441785eef2b895f0383fbe601c99a3cf643c79957dc' + initial_image_id = 'cdc6d6c0d07d2cbacfc579e49ce0c256c5084b9b2b16c1b1b0c45f26a12a4ba5' # Pull the squashed version of the tag. tar, _ = self.get_squashed_image(auth=('devtable', 'password')) @@ -2119,7 +2119,7 @@ class SquashingTests(RegistryTestCaseMixin, V1RegistryPushMixin, LiveServerTestC # Create the repo. self.do_push('devtable', 'newrepo', 'devtable', 'password', images=initial_images) - initial_image_id = '91081df45b58dc62dd207441785eef2b895f0383fbe601c99a3cf643c79957dc' + initial_image_id = 'cdc6d6c0d07d2cbacfc579e49ce0c256c5084b9b2b16c1b1b0c45f26a12a4ba5' # Pull the squashed version of the tag. tar, _ = self.get_squashed_image() @@ -2134,7 +2134,7 @@ class SquashingTests(RegistryTestCaseMixin, V1RegistryPushMixin, LiveServerTestC ] self.do_push('devtable', 'newrepo', 'devtable', 'password', images=updated_images) - updated_image_id = '38df4bd4cdffc6b7d656dbd2813c73e864f2d362ad887c999ac315224ad281ac' + updated_image_id = '57e8d9226ca95ed4d9b303a4104cb6475605e00f6ce536fe6ed54a5d1f559882' # Pull the squashed version of the tag and ensure it has changed. tar, _ = self.get_squashed_image() @@ -2157,7 +2157,7 @@ class SquashingTests(RegistryTestCaseMixin, V1RegistryPushMixin, LiveServerTestC self.conduct('POST', '/__test/removeuncompressed/initialid') # Pull the squashed version of the tag. - initial_image_id = '91081df45b58dc62dd207441785eef2b895f0383fbe601c99a3cf643c79957dc' + initial_image_id = 'cdc6d6c0d07d2cbacfc579e49ce0c256c5084b9b2b16c1b1b0c45f26a12a4ba5' tar, _ = self.get_squashed_image() self.assertTrue(initial_image_id in tar.getnames()) @@ -2179,7 +2179,7 @@ class SquashingTests(RegistryTestCaseMixin, V1RegistryPushMixin, LiveServerTestC self.do_push('devtable', 'newrepo', 'devtable', 'password', images=images) # Pull the squashed version of the tag. - expected_image_id = 'bd590ae79fba5ebc6550aaf016c0bd0f49b1d78178e0f83e0ca1c56c2bb7e7bf' + expected_image_id = 'cdc6d6c0d07d2cbacfc579e49ce0c256c5084b9b2b16c1b1b0c45f26a12a4ba5' expected_names = ['repositories', expected_image_id, @@ -2212,7 +2212,7 @@ class SquashingTests(RegistryTestCaseMixin, V1RegistryPushMixin, LiveServerTestC # Create the repo. self.do_push('devtable', 'newrepo', 'devtable', 'password', images=initial_images) - initial_image_id = '91081df45b58dc62dd207441785eef2b895f0383fbe601c99a3cf643c79957dc' + initial_image_id = 'cdc6d6c0d07d2cbacfc579e49ce0c256c5084b9b2b16c1b1b0c45f26a12a4ba5' # Try to pull the torrent of the squashed image. This should fail with a 406 since the # squashed image doesn't yet exist. diff --git a/util/audit.py b/util/audit.py index 1fa6aeb31..a24f6d8d4 100644 --- a/util/audit.py +++ b/util/audit.py @@ -1,6 +1,7 @@ import logging import random +from collections import namedtuple from urlparse import urlparse from flask import request @@ -11,6 +12,13 @@ from auth.auth_context import get_authenticated_context, get_authenticated_user logger = logging.getLogger(__name__) +Repository = namedtuple('Repository', ['namespace_name', 'name', 'id']) + +def wrap_repository(repo_obj): + return Repository(namespace_name=repo_obj.namespace_user.username, name=repo_obj.name, + id=repo_obj.id) + + def track_and_log(event_name, repo_obj, analytics_name=None, analytics_sample=1, **kwargs): repo_name = repo_obj.name namespace_name = repo_obj.namespace_name From 7424a6d73ad6165a4b8cf4c0476aae0f8d01a668 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Wed, 12 Sep 2018 15:26:57 -0400 Subject: [PATCH 6/6] Add additional multi-layer complex squashing test --- test/registry/protocol_fixtures.py | 39 ++++++++++++++++++++++ test/registry/protocols.py | 11 +++++-- test/registry/registry_tests.py | 52 ++++++++++++++++++++++++++++++ 3 files changed, 99 insertions(+), 3 deletions(-) diff --git a/test/registry/protocol_fixtures.py b/test/registry/protocol_fixtures.py index f4ed8d9a2..04a1e0f37 100644 --- a/test/registry/protocol_fixtures.py +++ b/test/registry/protocol_fixtures.py @@ -37,6 +37,45 @@ def sized_images(): ] +@pytest.fixture(scope="session") +def multi_layer_images(): + """ Returns complex images (with sizes) for push and pull testing. """ + # Note: order is from base layer down to leaf. + layer1_bytes = layer_bytes_for_contents('layer 1 contents', mode='', other_files={ + 'file1': 'from-layer-1', + }) + + layer2_bytes = layer_bytes_for_contents('layer 2 contents', mode='', other_files={ + 'file2': 'from-layer-2', + }) + + layer3_bytes = layer_bytes_for_contents('layer 3 contents', mode='', other_files={ + 'file1': 'from-layer-3', + 'file3': 'from-layer-3', + }) + + layer4_bytes = layer_bytes_for_contents('layer 4 contents', mode='', other_files={ + 'file3': 'from-layer-4', + }) + + layer5_bytes = layer_bytes_for_contents('layer 5 contents', mode='', other_files={ + 'file4': 'from-layer-5', + }) + + return [ + Image(id='layer1', bytes=layer1_bytes, parent_id=None, size=len(layer1_bytes), + config={'internal_id': 'layer1'}), + Image(id='layer2', bytes=layer2_bytes, parent_id='layer1', size=len(layer2_bytes), + config={'internal_id': 'layer2'}), + Image(id='layer3', bytes=layer3_bytes, parent_id='layer2', size=len(layer3_bytes), + config={'internal_id': 'layer3'}), + Image(id='layer4', bytes=layer4_bytes, parent_id='layer3', size=len(layer4_bytes), + config={'internal_id': 'layer4'}), + Image(id='someid', bytes=layer5_bytes, parent_id='layer4', size=len(layer5_bytes), + config={'internal_id': 'layer5'}), + ] + + @pytest.fixture(scope="session") def jwk(): return RSAKey(key=RSA.generate(2048)) diff --git a/test/registry/protocols.py b/test/registry/protocols.py index 4311f271c..aa9edae3c 100644 --- a/test/registry/protocols.py +++ b/test/registry/protocols.py @@ -14,8 +14,9 @@ PushResult = namedtuple('PushResult', ['checksums', 'manifests', 'headers']) PullResult = namedtuple('PullResult', ['manifests', 'image_ids']) -def layer_bytes_for_contents(contents, mode='|gz'): +def layer_bytes_for_contents(contents, mode='|gz', other_files=None): layer_data = StringIO() + tar_file = tarfile.open(fileobj=layer_data, mode='w' + mode) def add_file(name, contents): tar_file_info = tarfile.TarInfo(name=name) @@ -23,12 +24,16 @@ def layer_bytes_for_contents(contents, mode='|gz'): tar_file_info.size = len(contents) tar_file_info.mtime = 1 - tar_file = tarfile.open(fileobj=layer_data, mode='w' + mode) tar_file.addfile(tar_file_info, StringIO(contents)) - tar_file.close() add_file('contents', contents) + if other_files is not None: + for file_name, file_contents in other_files.iteritems(): + add_file(file_name, file_contents) + + tar_file.close() + layer_bytes = layer_data.getvalue() layer_data.close() return layer_bytes diff --git a/test/registry/registry_tests.py b/test/registry/registry_tests.py index 71f0e5c38..877a02f0d 100644 --- a/test/registry/registry_tests.py +++ b/test/registry/registry_tests.py @@ -930,6 +930,58 @@ def test_squashed_image_disabled_user(pusher, sized_images, liveserver_session, assert response.status_code == 403 +@pytest.mark.parametrize('use_estimates', [ + False, + True, +]) +def test_multilayer_squashed_images(use_estimates, pusher, multi_layer_images, liveserver_session, + liveserver, registry_server_executor, app_reloader): + """ Test: Pulling of multilayer, complex squashed images. """ + credentials = ('devtable', 'password') + + # Push an image to download. + pusher.push(liveserver_session, 'devtable', 'newrepo', 'latest', multi_layer_images, + credentials=credentials) + + if use_estimates: + # Clear the uncompressed size stored for the images, to ensure that we estimate instead. + for image in multi_layer_images: + registry_server_executor.on(liveserver).clear_uncompressed_size(image.id) + + # Pull the squashed version. + response = liveserver_session.get('/c1/squash/devtable/newrepo/latest', auth=credentials) + tar = tarfile.open(fileobj=StringIO(response.content)) + + # Verify the squashed image. + expected_image_id = 'cdc6d6c0d07d2cbacfc579e49ce0c256c5084b9b2b16c1b1b0c45f26a12a4ba5' + expected_names = ['repositories', + expected_image_id, + '%s/json' % expected_image_id, + '%s/VERSION' % expected_image_id, + '%s/layer.tar' % expected_image_id] + + assert tar.getnames() == expected_names + + # Verify the JSON image data. + json_data = (tar.extractfile(tar.getmember('%s/json' % expected_image_id)).read()) + + # Ensure the JSON loads and parses. + result = json.loads(json_data) + assert result['id'] == expected_image_id + assert result['config']['internal_id'] == 'layer5' + + # Ensure that squashed layer tar can be opened. + tar = tarfile.open(fileobj=tar.extractfile(tar.getmember('%s/layer.tar' % expected_image_id))) + assert set(tar.getnames()) == {'contents', 'file1', 'file2', 'file3', 'file4'} + + # Check the contents of various files. + assert tar.extractfile('contents').read() == 'layer 5 contents' + assert tar.extractfile('file1').read() == 'from-layer-3' + assert tar.extractfile('file2').read() == 'from-layer-2' + assert tar.extractfile('file3').read() == 'from-layer-4' + assert tar.extractfile('file4').read() == 'from-layer-5' + + @pytest.mark.parametrize('use_estimates', [ False, True,