From 849e61338698de57433f46e608b163abd85073e6 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Tue, 13 Nov 2018 11:49:12 +0200 Subject: [PATCH] Implement support for schema 2 manifests --- data/model/oci/manifest.py | 20 ++--- data/registry_model/__init__.py | 16 +++- data/registry_model/interface.py | 4 + data/registry_model/registry_oci_model.py | 4 + data/registry_model/registry_pre_oci_model.py | 4 + endpoints/v2/manifest.py | 88 +++++++++++++------ 6 files changed, 97 insertions(+), 39 deletions(-) diff --git a/data/model/oci/manifest.py b/data/model/oci/manifest.py index 2cc177ecb..89a8e2c82 100644 --- a/data/model/oci/manifest.py +++ b/data/model/oci/manifest.py @@ -62,8 +62,7 @@ def _create_manifest(repository_id, manifest_interface_instance, storage): def _lookup_digest(digest): return _retrieve_bytes_in_storage(repository_id, digest, storage) - # Retrieve the child manifests, if any. If we do retrieve a child manifest, we also remove its - # blob from the list of blobs for this manifest, as the blob isn't really a "blob". + # Load, parse and get/create the child manifests, if any. child_manifest_refs = manifest_interface_instance.child_manifests(_lookup_digest) child_manifest_rows = [] child_manifest_label_dicts = [] @@ -105,18 +104,15 @@ def _create_manifest(repository_id, manifest_interface_instance, storage): child_manifest_rows.append(child_manifest_info.manifest) child_manifest_label_dicts.append(labels) - digests.remove(child_manifest.digest) # Ensure all the blobs in the manifest exist. - blob_map = {} - if digests: - query = lookup_repo_storages_by_content_checksum(repository_id, digests) - blob_map = {s.content_checksum: s for s in query} - for digest_str in digests: - if digest_str not in blob_map: - logger.warning('Unknown blob `%s` under manifest `%s` for repository `%s`', digest_str, - manifest_interface_instance.digest, repository_id) - return None + query = lookup_repo_storages_by_content_checksum(repository_id, digests) + blob_map = {s.content_checksum: s for s in query} + for digest_str in digests: + if digest_str not in blob_map: + logger.warning('Unknown blob `%s` under manifest `%s` for repository `%s`', digest_str, + manifest_interface_instance.digest, repository_id) + return None # Determine and populate the legacy image if necessary. Manifest lists will not have a legacy # image. diff --git a/data/registry_model/__init__.py b/data/registry_model/__init__.py index 561a848e5..5c2e0934f 100644 --- a/data/registry_model/__init__.py +++ b/data/registry_model/__init__.py @@ -6,5 +6,17 @@ from data.registry_model.registry_oci_model import oci_model logger = logging.getLogger(__name__) -registry_model = oci_model if os.getenv('OCI_DATA_MODEL') == 'true' else pre_oci_model -logger.debug('Using registry model `%s`', registry_model) + +class RegistryModelProxy(object): + def __init__(self): + self._model = oci_model if os.getenv('OCI_DATA_MODEL') == 'true' else pre_oci_model + + def set_for_testing(self, use_oci_model): + self._model = oci_model if use_oci_model else pre_oci_model + logger.debug('Changed registry model to `%s` for testing', self._model) + + def __getattr__(self, attr): + return getattr(self._model, attr) + +registry_model = RegistryModelProxy() +logger.debug('Using registry model `%s`', registry_model._model) diff --git a/data/registry_model/interface.py b/data/registry_model/interface.py index b342314c8..6d36d4928 100644 --- a/data/registry_model/interface.py +++ b/data/registry_model/interface.py @@ -7,6 +7,10 @@ class RegistryDataInterface(object): of all tables that store registry-specific information, such as Manifests, Blobs, Images, and Labels. """ + @abstractmethod + def supports_schema2(self, namespace_name): + """ Returns whether the implementation of the data interface supports schema 2 format + manifests. """ @abstractmethod def find_matching_tag(self, repository_ref, tag_names): diff --git a/data/registry_model/registry_oci_model.py b/data/registry_model/registry_oci_model.py index 0f93b1716..25ae0069c 100644 --- a/data/registry_model/registry_oci_model.py +++ b/data/registry_model/registry_oci_model.py @@ -21,6 +21,10 @@ class OCIModel(SharedModel, RegistryDataInterface): OCIModel implements the data model for the registry API using a database schema after it was changed to support the OCI specification. """ + def supports_schema2(self, namespace_name): + """ Returns whether the implementation of the data interface supports schema 2 format + manifests. """ + return True def find_matching_tag(self, repository_ref, tag_names): """ Finds an alive tag in the repository matching one of the given tag names and returns it diff --git a/data/registry_model/registry_pre_oci_model.py b/data/registry_model/registry_pre_oci_model.py index 1b6517ed4..8f7e24634 100644 --- a/data/registry_model/registry_pre_oci_model.py +++ b/data/registry_model/registry_pre_oci_model.py @@ -27,6 +27,10 @@ class PreOCIModel(SharedModel, RegistryDataInterface): PreOCIModel implements the data model for the registry API using a database schema before it was changed to support the OCI specification. """ + def supports_schema2(self, namespace_name): + """ Returns whether the implementation of the data interface supports schema 2 format + manifests. """ + return False def find_matching_tag(self, repository_ref, tag_names): """ Finds an alive tag in the repository matching one of the given tag names and returns it diff --git a/endpoints/v2/manifest.py b/endpoints/v2/manifest.py index 31cdf7e7e..7d350488a 100644 --- a/endpoints/v2/manifest.py +++ b/endpoints/v2/manifest.py @@ -6,7 +6,7 @@ from flask import request, url_for, Response import features -from app import app, metric_queue, storage +from app import app, metric_queue, storage, model_cache from auth.registry_jwt_auth import process_registry_jwt_auth from digest import digest_tools from data.registry_model import registry_model @@ -17,6 +17,7 @@ from endpoints.v2.errors import (ManifestInvalid, ManifestUnknown, TagInvalid, from image.docker import ManifestException from image.docker.schema1 import DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE, DockerSchema1Manifest from image.docker.schema2 import DOCKER_SCHEMA2_CONTENT_TYPES, OCI_CONTENT_TYPES +from image.docker.schemas import parse_manifest_from_bytes from notifications import spawn_notification from util.audit import track_and_log from util.names import VALID_TAG_PATTERN @@ -55,6 +56,10 @@ def fetch_manifest_by_tagname(namespace_name, repo_name, manifest_ref): # Something went wrong. raise ManifestInvalid() + manifest = _rewrite_to_schema1_if_necessary(namespace_name, repo_name, manifest_ref, manifest) + if manifest is None: + raise ManifestUnknown() + track_and_log('pull_repo', repository_ref, analytics_name='pull_repo_100x', analytics_sample=0.01, tag=manifest_ref) metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'v2', True]) @@ -83,6 +88,10 @@ def fetch_manifest_by_digest(namespace_name, repo_name, manifest_ref): if manifest is None: raise ManifestUnknown() + manifest = _rewrite_to_schema1_if_necessary(namespace_name, repo_name, '$digest', manifest) + if manifest is None: + raise ManifestUnknown() + track_and_log('pull_repo', repository_ref, manifest_digest=manifest_ref) metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'v2', True]) @@ -92,9 +101,32 @@ def fetch_manifest_by_digest(namespace_name, repo_name, manifest_ref): }) +def _rewrite_to_schema1_if_necessary(namespace_name, repo_name, tag_name, manifest): + # As per the Docker protocol, if the manifest is not schema version 1 and the manifest's + # media type is not in the Accept header, we return a schema 1 version of the manifest for + # the amd64+linux platform, if any, or None if none. + # See: https://docs.docker.com/registry/spec/manifest-v2-2 + if len(request.accept_mimetypes) != 0 and manifest.media_type in request.accept_mimetypes: + return manifest + + def lookup_fn(config_or_manifest_digest): + blob = registry_model.get_cached_repo_blob(model_cache, namespace_name, repo_name, + config_or_manifest_digest) + if blob is None: + return None + + return storage.get_content(blob.placements, blob.storage_path) + + return manifest.get_v1_compatible_manifest(namespace_name, repo_name, tag_name, lookup_fn) + + def _reject_manifest2_schema2(func): @wraps(func) def wrapped(*args, **kwargs): + namespace_name = kwargs['namespace_name'] + if registry_model.supports_schema2(namespace_name): + return func(*args, **kwargs) + if _doesnt_accept_schema_v1() or \ request.content_type in DOCKER_SCHEMA2_CONTENT_TYPES | OCI_CONTENT_TYPES: raise ManifestInvalid(detail={'message': 'manifest schema version not supported'}, @@ -111,27 +143,30 @@ def _doesnt_accept_schema_v1(): @v2_bp.route(MANIFEST_TAGNAME_ROUTE, methods=['PUT']) -@_reject_manifest2_schema2 @parse_repository_name() +@_reject_manifest2_schema2 @process_registry_jwt_auth(scopes=['pull', 'push']) @require_repo_write @anon_protect def write_manifest_by_tagname(namespace_name, repo_name, manifest_ref): + content_type = request.content_type or DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE + + if content_type == 'application/json': + # For back-compat. + content_type = DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE + try: - manifest = DockerSchema1Manifest(request.data) + manifest = parse_manifest_from_bytes(request.data, content_type) except ManifestException as me: logger.exception("failed to parse manifest when writing by tagname") raise ManifestInvalid(detail={'message': 'failed to parse manifest: %s' % me.message}) - if manifest.tag != manifest_ref: - raise TagInvalid() - - return _write_manifest_and_log(namespace_name, repo_name, manifest) + return _write_manifest_and_log(namespace_name, repo_name, manifest_ref, manifest) @v2_bp.route(MANIFEST_DIGEST_ROUTE, methods=['PUT']) -@_reject_manifest2_schema2 @parse_repository_name() +@_reject_manifest2_schema2 @process_registry_jwt_auth(scopes=['pull', 'push']) @require_repo_write @anon_protect @@ -145,7 +180,7 @@ def write_manifest_by_digest(namespace_name, repo_name, manifest_ref): if manifest.digest != manifest_ref: raise ManifestInvalid(detail={'message': 'manifest digest mismatch'}) - return _write_manifest_and_log(namespace_name, repo_name, manifest) + return _write_manifest_and_log(namespace_name, repo_name, manifest.tag, manifest) @v2_bp.route(MANIFEST_DIGEST_ROUTE, methods=['DELETE']) @@ -178,8 +213,9 @@ def delete_manifest_by_digest(namespace_name, repo_name, manifest_ref): return Response(status=202) -def _write_manifest_and_log(namespace_name, repo_name, manifest_impl): - repository_ref, manifest, tag = _write_manifest(namespace_name, repo_name, manifest_impl) +def _write_manifest_and_log(namespace_name, repo_name, tag_name, manifest_impl): + repository_ref, manifest, tag = _write_manifest(namespace_name, repo_name, tag_name, + manifest_impl) # Queue all blob manifests for replication. if features.STORAGE_REPLICATION: @@ -191,8 +227,8 @@ def _write_manifest_and_log(namespace_name, repo_name, manifest_impl): for layer in layers: queue_storage_replication(layer.blob) - track_and_log('push_repo', repository_ref, tag=manifest_impl.tag) - spawn_notification(repository_ref, 'repo_push', {'updated_tags': [manifest_impl.tag]}) + track_and_log('push_repo', repository_ref, tag=tag_name) + spawn_notification(repository_ref, 'repo_push', {'updated_tags': [tag_name]}) metric_queue.repository_push.Inc(labelvalues=[namespace_name, repo_name, 'v2', True]) return Response( @@ -208,18 +244,21 @@ def _write_manifest_and_log(namespace_name, repo_name, manifest_impl): ) -def _write_manifest(namespace_name, repo_name, manifest_impl): - if (manifest_impl.namespace == '' and features.LIBRARY_SUPPORT and - namespace_name == app.config['LIBRARY_NAMESPACE']): - pass - elif manifest_impl.namespace != namespace_name: - raise NameInvalid() +def _write_manifest(namespace_name, repo_name, tag_name, manifest_impl): + # NOTE: These extra checks are needed for schema version 1 because the manifests + # contain the repo namespace, name and tag name. + if manifest_impl.schema_version == 1: + if (manifest_impl.namespace == '' and features.LIBRARY_SUPPORT and + namespace_name == app.config['LIBRARY_NAMESPACE']): + pass + elif manifest_impl.namespace != namespace_name: + raise NameInvalid() - if manifest_impl.repo_name != repo_name: - raise NameInvalid() + if manifest_impl.repo_name != repo_name: + raise NameInvalid() - if not manifest_impl.layers: - raise ManifestInvalid(detail={'message': 'manifest does not reference any layers'}) + if not manifest_impl.layers: + raise ManifestInvalid(detail={'message': 'manifest does not reference any layers'}) # Ensure that the repository exists. repository_ref = registry_model.lookup_repository(namespace_name, repo_name) @@ -227,8 +266,7 @@ def _write_manifest(namespace_name, repo_name, manifest_impl): raise NameUnknown() manifest, tag = registry_model.create_manifest_and_retarget_tag(repository_ref, manifest_impl, - manifest_impl.tag, - storage) + tag_name, storage) if manifest is None: raise ManifestInvalid()