From 3c2e050593d2fd93aa1f4a0e190683984447358c Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Thu, 6 Dec 2018 12:40:34 -0500 Subject: [PATCH] Support pulling of schema2 manifests directly via a manifest list tag This change ensures that if a manifest list is requested with an accepts header for a *schema 2* manifest, the legacy manifest (if any) is returned as schema 2 if it was pushed as a schema 2 manifest (rather than being auto-converted to schema 1) --- data/registry_model/interface.py | 7 +++ data/registry_model/registry_oci_model.py | 16 +++++++ data/registry_model/registry_pre_oci_model.py | 12 +++++ data/registry_model/test/test_interface.py | 14 +++++- endpoints/v2/manifest.py | 24 ++++++---- image/docker/interfaces.py | 7 +++ image/docker/schema1.py | 11 +++++ image/docker/schema2/list.py | 27 +++++++++-- image/docker/schema2/manifest.py | 13 ++++++ image/docker/schema2/test/test_conversion.py | 32 ++++++++++++- image/docker/schema2/test/test_list.py | 15 ++++++- image/docker/schema2/test/test_manifest.py | 4 ++ test/registry/registry_tests.py | 45 +++++++++++++++++++ workers/manifestbackfillworker.py | 3 ++ 14 files changed, 215 insertions(+), 15 deletions(-) diff --git a/data/registry_model/interface.py b/data/registry_model/interface.py index 83f3394f7..e946554ce 100644 --- a/data/registry_model/interface.py +++ b/data/registry_model/interface.py @@ -322,3 +322,10 @@ class RegistryDataInterface(object): """ Returns a cached set of ISO country codes blacklisted for pulls for the namespace or None if the list could not be loaded. """ + + @abstractmethod + def convert_manifest(self, manifest, namespace_name, repo_name, tag_name, allowed_mediatypes, + storage): + """ Attempts to convert the specified into a parsed manifest with a media type + in the allowed_mediatypes set. If not possible, or an error occurs, returns None. + """ diff --git a/data/registry_model/registry_oci_model.py b/data/registry_model/registry_oci_model.py index a4882a10c..3b62dbe2d 100644 --- a/data/registry_model/registry_oci_model.py +++ b/data/registry_model/registry_oci_model.py @@ -482,6 +482,22 @@ class OCIModel(SharedModel, RegistryDataInterface): retriever = RepositoryContentRetriever(manifest_row.repository_id, storage) return parsed.get_schema1_manifest(namespace_name, repo_name, tag_name, retriever) + def convert_manifest(self, manifest, namespace_name, repo_name, tag_name, allowed_mediatypes, + storage): + try: + parsed = manifest.get_parsed_manifest() + except ManifestException: + return None + + try: + manifest_row = database.Manifest.get(id=manifest._db_id) + except database.Manifest.DoesNotExist: + return None + + retriever = RepositoryContentRetriever(manifest_row.repository_id, storage) + return parsed.convert_manifest(allowed_mediatypes, namespace_name, repo_name, tag_name, + retriever) + def create_manifest_with_temp_tag(self, repository_ref, manifest_interface_instance, expiration_sec, storage): """ Creates a manifest under the repository and sets a temporary tag to point to it. diff --git a/data/registry_model/registry_pre_oci_model.py b/data/registry_model/registry_pre_oci_model.py index 907d34ad5..04ba3a117 100644 --- a/data/registry_model/registry_pre_oci_model.py +++ b/data/registry_model/registry_pre_oci_model.py @@ -549,6 +549,18 @@ class PreOCIModel(SharedModel, RegistryDataInterface): except ManifestException: return None + def convert_manifest(self, manifest, namespace_name, repo_name, tag_name, allowed_mediatypes, + storage): + try: + parsed = manifest.get_parsed_manifest() + except ManifestException: + return None + + try: + return parsed.convert_manifest(allowed_mediatypes, namespace_name, repo_name, tag_name, None) + except ManifestException: + return None + def create_manifest_with_temp_tag(self, repository_ref, manifest_interface_instance, expiration_sec, storage): """ Creates a manifest under the repository and sets a temporary tag to point to it. diff --git a/data/registry_model/test/test_interface.py b/data/registry_model/test/test_interface.py index 9729bf8a2..bfb26daeb 100644 --- a/data/registry_model/test/test_interface.py +++ b/data/registry_model/test/test_interface.py @@ -23,7 +23,7 @@ from data.registry_model.datatypes import RepositoryReference from data.registry_model.blobuploader import upload_blob, BlobUploadSettings from data.registry_model.modelsplitter import SplitModel from image.docker.types import ManifestImageLayer -from image.docker.schema1 import DockerSchema1ManifestBuilder +from image.docker.schema1 import DockerSchema1ManifestBuilder, DOCKER_SCHEMA1_CONTENT_TYPES from image.docker.schema2.manifest import DockerSchema2ManifestBuilder from test.fixtures import * @@ -780,6 +780,18 @@ def test_get_schema1_parsed_manifest(registry_model): assert registry_model.get_schema1_parsed_manifest(manifest, '', '', '', storage) +def test_convert_manifest(registry_model): + repository_ref = registry_model.lookup_repository('devtable', 'simple') + latest_tag = registry_model.get_repo_tag(repository_ref, 'latest', include_legacy_image=True) + manifest = registry_model.get_manifest_for_tag(latest_tag) + + mediatypes = DOCKER_SCHEMA1_CONTENT_TYPES + assert registry_model.convert_manifest(manifest, '', '', '', mediatypes, storage) + + mediatypes = [] + assert registry_model.convert_manifest(manifest, '', '', '', mediatypes, storage) is None + + def test_create_manifest_and_retarget_tag_with_labels(registry_model): repository_ref = registry_model.lookup_repository('devtable', 'simple') latest_tag = registry_model.get_repo_tag(repository_ref, 'latest', include_legacy_image=True) diff --git a/endpoints/v2/manifest.py b/endpoints/v2/manifest.py index d4d1533c8..98cdfdb63 100644 --- a/endpoints/v2/manifest.py +++ b/endpoints/v2/manifest.py @@ -6,16 +6,16 @@ from flask import request, url_for, Response import features -from app import app, metric_queue, storage, model_cache +from app import app, metric_queue, storage from auth.registry_jwt_auth import process_registry_jwt_auth from digest import digest_tools from data.registry_model import registry_model from endpoints.decorators import anon_protect, parse_repository_name from endpoints.v2 import v2_bp, require_repo_read, require_repo_write -from endpoints.v2.errors import (ManifestInvalid, ManifestUnknown, TagInvalid, - NameInvalid, TagExpired, NameUnknown) +from endpoints.v2.errors import (ManifestInvalid, ManifestUnknown, NameInvalid, TagExpired, + NameUnknown) from image.docker import ManifestException -from image.docker.schema1 import DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE, DockerSchema1Manifest +from image.docker.schema1 import DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE from image.docker.schema2 import DOCKER_SCHEMA2_CONTENT_TYPES, OCI_CONTENT_TYPES from image.docker.schemas import parse_manifest_from_bytes from notifications import spawn_notification @@ -62,8 +62,8 @@ def fetch_manifest_by_tagname(namespace_name, repo_name, manifest_ref): logger.exception('Got exception when trying to parse manifest `%s`', manifest_ref) raise ManifestInvalid() - supported = _rewrite_to_schema1_if_necessary(namespace_name, repo_name, manifest_ref, manifest, - parsed) + supported = _rewrite_schema_if_necessary(namespace_name, repo_name, manifest_ref, manifest, + parsed) if supported is None: raise ManifestUnknown() @@ -101,8 +101,8 @@ def fetch_manifest_by_digest(namespace_name, repo_name, manifest_ref): logger.exception('Got exception when trying to parse manifest `%s`', manifest_ref) raise ManifestInvalid() - supported = _rewrite_to_schema1_if_necessary(namespace_name, repo_name, '$digest', manifest, - parsed) + supported = _rewrite_schema_if_necessary(namespace_name, repo_name, '$digest', manifest, + parsed) if supported is None: raise ManifestUnknown() @@ -115,7 +115,7 @@ def fetch_manifest_by_digest(namespace_name, repo_name, manifest_ref): }) -def _rewrite_to_schema1_if_necessary(namespace_name, repo_name, tag_name, manifest, parsed): +def _rewrite_schema_if_necessary(namespace_name, repo_name, tag_name, manifest, parsed): # As per the Docker protocol, if the manifest is not schema version 1 and the manifest's # media type is not in the Accept header, we return a schema 1 version of the manifest for # the amd64+linux platform, if any, or None if none. @@ -124,6 +124,12 @@ def _rewrite_to_schema1_if_necessary(namespace_name, repo_name, tag_name, manife if parsed.media_type in mimetypes: return parsed + converted = registry_model.convert_manifest(manifest, namespace_name, repo_name, tag_name, + mimetypes, storage) + if converted is not None: + return converted + + # For back-compat, we always default to schema 1 if the manifest could not be converted. return registry_model.get_schema1_parsed_manifest(manifest, namespace_name, repo_name, tag_name, storage) diff --git a/image/docker/interfaces.py b/image/docker/interfaces.py index aef185f8f..85072216f 100644 --- a/image/docker/interfaces.py +++ b/image/docker/interfaces.py @@ -115,6 +115,13 @@ class ManifestInterface(object): If none, returns None. """ + @abstractmethod + def convert_manifest(self, allowed_mediatypes, namespace_name, repo_name, tag_name, + content_retriever): + """ Returns a version of this schema that has a media type found in the given media type set. + If not possible, or an error occurs, returns None. + """ + @add_metaclass(ABCMeta) class ContentRetriever(object): diff --git a/image/docker/schema1.py b/image/docker/schema1.py index 874873cbb..c5dc1db22 100644 --- a/image/docker/schema1.py +++ b/image/docker/schema1.py @@ -419,6 +419,17 @@ class DockerSchema1Manifest(ManifestInterface): # used, so to ensure full backwards compatibility, we just always return the schema. return self + def convert_manifest(self, allowed_mediatypes, namespace_name, repo_name, tag_name, + content_retriever): + if self.media_type in allowed_mediatypes: + return self + + unsigned = self.unsigned() + if unsigned.media_type in allowed_mediatypes: + return unsigned + + return None + def rewrite_invalid_image_ids(self, images_map): """ Rewrites Docker v1 image IDs and returns a generator of DockerV1Metadata. diff --git a/image/docker/schema2/list.py b/image/docker/schema2/list.py index fe64341fa..d964e8792 100644 --- a/image/docker/schema2/list.py +++ b/image/docker/schema2/list.py @@ -263,6 +263,29 @@ class DockerSchema2ManifestList(ManifestInterface): """ Returns the manifest that is compatible with V1, by virtue of being `amd64` and `linux`. If none, returns None. """ + legacy_manifest = self._get_legacy_manifest(content_retriever) + if legacy_manifest is None: + return None + + return legacy_manifest.get_schema1_manifest(namespace_name, repo_name, tag_name, + content_retriever) + + def convert_manifest(self, allowed_mediatypes, namespace_name, repo_name, tag_name, + content_retriever): + if self.media_type in allowed_mediatypes: + return self + + legacy_manifest = self._get_legacy_manifest(content_retriever) + if legacy_manifest is None: + return None + + return legacy_manifest.convert_manifest(allowed_mediatypes, namespace_name, repo_name, + tag_name, content_retriever) + + def _get_legacy_manifest(self, content_retriever): + """ Returns the manifest under this list with architecture amd64 and os linux, if any, or None + if none or error. + """ for manifest_ref in self.manifests(content_retriever): platform = manifest_ref._manifest_data[DOCKER_SCHEMA2_MANIFESTLIST_PLATFORM_KEY] architecture = platform[DOCKER_SCHEMA2_MANIFESTLIST_ARCHITECTURE_KEY] @@ -271,13 +294,11 @@ class DockerSchema2ManifestList(ManifestInterface): continue try: - manifest = manifest_ref.manifest_obj + return manifest_ref.manifest_obj except (ManifestException, IOError): logger.exception('Could not load child manifest') return None - return manifest.get_schema1_manifest(namespace_name, repo_name, tag_name, content_retriever) - return None def unsigned(self): diff --git a/image/docker/schema2/manifest.py b/image/docker/schema2/manifest.py index 6f68719e7..d5717a7bf 100644 --- a/image/docker/schema2/manifest.py +++ b/image/docker/schema2/manifest.py @@ -299,6 +299,19 @@ class DockerSchema2Manifest(ManifestInterface): return [l.v1_id for l in self._manifest_image_layers(content_retriever)] + def convert_manifest(self, allowed_mediatypes, namespace_name, repo_name, tag_name, + content_retriever): + if self.media_type in allowed_mediatypes: + return self + + # If this manifest is not on the allowed list, try to convert the schema 1 version (if any) + schema1 = self.get_schema1_manifest(namespace_name, repo_name, tag_name, content_retriever) + if schema1 is None: + return None + + return schema1.convert_manifest(allowed_mediatypes, namespace_name, repo_name, tag_name, + content_retriever) + def get_schema1_manifest(self, namespace_name, repo_name, tag_name, content_retriever): if self.has_remote_layer: return None diff --git a/image/docker/schema2/test/test_conversion.py b/image/docker/schema2/test/test_conversion.py index e9e88a943..ef0bc359f 100644 --- a/image/docker/schema2/test/test_conversion.py +++ b/image/docker/schema2/test/test_conversion.py @@ -3,7 +3,7 @@ import json import pytest -from image.docker.schema1 import DockerSchema1Manifest +from image.docker.schema1 import DockerSchema1Manifest, DOCKER_SCHEMA1_CONTENT_TYPES from image.docker.schema2.manifest import DockerSchema2Manifest from image.docker.schemautil import ContentRetrieverForTesting @@ -53,6 +53,36 @@ def test_conversion(name, config_sha): schema2 = DockerSchema2Manifest(_get_test_file_contents(name, 'schema2')) schema1 = DockerSchema1Manifest(_get_test_file_contents(name, 'schema1'), validate=False) + s2to2 = schema2.convert_manifest([schema2.media_type], 'devtable', 'somerepo', 'latest', + retriever) + assert s2to2 == schema2 + + s1to1 = schema1.convert_manifest([schema1.media_type], 'devtable', 'somerepo', 'latest', + retriever) + assert s1to1 == schema1 + + s2to1 = schema2.convert_manifest(DOCKER_SCHEMA1_CONTENT_TYPES, 'devtable', 'somerepo', 'latest', + retriever) + assert s2to1.media_type in DOCKER_SCHEMA1_CONTENT_TYPES + assert len(s2to1.layers) == len(schema1.layers) + + s2toempty = schema2.convert_manifest([], 'devtable', 'somerepo', 'latest', retriever) + assert s2toempty is None + + +@pytest.mark.parametrize('name, config_sha', [ + ('simple', 'sha256:e7a06c2e5b7afb1bbfa9124812e87f1138c4c10d77e0a217f0b8c8c9694dc5cf'), + ('complex', 'sha256:ae6b78bedf88330a5e5392164f40d28ed8a38120b142905d30b652ebffece10e'), + ('ubuntu', 'sha256:93fd78260bd1495afb484371928661f63e64be306b7ac48e2d13ce9422dfee26'), +]) +def test_2to1_conversion(name, config_sha): + cr = {} + cr[config_sha] = _get_test_file_contents(name, 'config') + retriever = ContentRetrieverForTesting(cr) + + schema2 = DockerSchema2Manifest(_get_test_file_contents(name, 'schema2')) + schema1 = DockerSchema1Manifest(_get_test_file_contents(name, 'schema1'), validate=False) + converted = schema2.get_schema1_manifest('devtable', 'somerepo', 'latest', retriever) assert len(converted.layers) == len(schema1.layers) diff --git a/image/docker/schema2/test/test_list.py b/image/docker/schema2/test/test_list.py index d70f6b75f..2d2d1ef3f 100644 --- a/image/docker/schema2/test/test_list.py +++ b/image/docker/schema2/test/test_list.py @@ -1,7 +1,8 @@ import json import pytest -from image.docker.schema1 import DockerSchema1Manifest +from image.docker.schema1 import DockerSchema1Manifest, DOCKER_SCHEMA1_CONTENT_TYPES +from image.docker.schema2 import DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE from image.docker.schema2.manifest import DockerSchema2Manifest from image.docker.schema2.list import (MalformedSchema2ManifestList, DockerSchema2ManifestList, DockerSchema2ManifestListBuilder) @@ -90,9 +91,21 @@ def test_valid_manifestlist(): assert isinstance(manifest.manifest_obj, DockerSchema1Manifest) assert manifest.manifest_obj.schema_version == 1 + # Check retrieval of a schema 2 manifest. This should return None, because the schema 2 manifest + # is not amd64-compatible. + schema2_manifest = manifestlist.convert_manifest([DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE], 'foo', + 'bar', 'baz', retriever) + assert schema2_manifest is None + + # Check retrieval of a schema 1 manifest. compatible_manifest = manifestlist.get_schema1_manifest('foo', 'bar', 'baz', retriever) assert compatible_manifest.schema_version == 1 + schema1_manifest = manifestlist.convert_manifest(DOCKER_SCHEMA1_CONTENT_TYPES, 'foo', + 'bar', 'baz', retriever) + assert schema1_manifest.schema_version == 1 + assert schema1_manifest.digest == compatible_manifest.digest + def test_get_schema1_manifest_no_matching_list(): manifestlist = DockerSchema2ManifestList(NO_AMD_MANIFESTLIST_BYTES) diff --git a/image/docker/schema2/test/test_manifest.py b/image/docker/schema2/test/test_manifest.py index 67d7678d2..4f1aec2bd 100644 --- a/image/docker/schema2/test/test_manifest.py +++ b/image/docker/schema2/test/test_manifest.py @@ -278,6 +278,10 @@ def test_get_schema1_manifest(): assert schema1 is not None assert schema1.media_type == DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE + via_convert = manifest.convert_manifest([schema1.media_type], 'somenamespace', 'somename', + 'sometag', retriever) + assert via_convert.digest == schema1.digest + def test_generate_legacy_layers(): builder = DockerSchema2ManifestBuilder() diff --git a/test/registry/registry_tests.py b/test/registry/registry_tests.py index 332bfd1e2..176c8a295 100644 --- a/test/registry/registry_tests.py +++ b/test/registry/registry_tests.py @@ -20,6 +20,7 @@ from test.registry.protocols import Failures, Image, layer_bytes_for_contents, P from app import instance_keys from data.model.tag import list_repository_tags from image.docker.schema1 import DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE +from image.docker.schema2 import DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE from image.docker.schema2.list import DockerSchema2ManifestListBuilder from image.docker.schema2.manifest import DockerSchema2ManifestBuilder from util.security.registry_jwt import decode_bearer_header @@ -1728,3 +1729,47 @@ def test_geo_blocking(pusher, puller, basic_images, liveserver_session, puller.pull(liveserver_session, 'devtable', 'newrepo', 'latest', basic_images, credentials=credentials, options=options, expected_failure=Failures.GEO_BLOCKED) + + +@pytest.mark.parametrize('has_amd64_linux', [ + False, + True, +]) +def test_pull_manifest_list_schema2_only(v22_protocol, basic_images, different_images, + liveserver_session, app_reloader, data_model, + has_amd64_linux): + """ Test: Push a new tag with a manifest list containing two manifests, one schema2 (possibly) + amd64 and one not, and pull it when only accepting a schema2 manifest type. Since the manifest + list content type is not being sent, this should return just the manifest (or none if no + linux+amd64 is present.) + """ + if data_model != 'oci_model': + return + + credentials = ('devtable', 'password') + + # Build the manifests that will go in the list. + options = ProtocolOptions() + blobs = {} + + first_manifest = v22_protocol.build_schema2(basic_images, blobs, options) + second_manifest = v22_protocol.build_schema2(different_images, blobs, options) + + # Create and push the manifest list. + builder = DockerSchema2ManifestListBuilder() + builder.add_manifest(first_manifest, 'amd64' if has_amd64_linux else 'amd32', 'linux') + builder.add_manifest(second_manifest, 'arm', 'linux') + manifestlist = builder.build() + + v22_protocol.push_list(liveserver_session, 'devtable', 'newrepo', 'latest', manifestlist, + [first_manifest, second_manifest], blobs, + credentials=credentials) + + # Pull and verify the manifest. + options.accept_mimetypes = [DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE] + result = v22_protocol.pull(liveserver_session, 'devtable', 'newrepo', 'latest', basic_images, + credentials=credentials, options=options, + expected_failure=None if has_amd64_linux else Failures.UNKNOWN_TAG) + + if has_amd64_linux: + assert result.manifests['latest'].media_type == DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE diff --git a/workers/manifestbackfillworker.py b/workers/manifestbackfillworker.py index daf06076d..d64e10c3b 100644 --- a/workers/manifestbackfillworker.py +++ b/workers/manifestbackfillworker.py @@ -98,6 +98,9 @@ class BrokenManifest(ManifestInterface): def get_requires_empty_layer_blob(self, content_retriever): return False + def convert_manifest(self, media_types, namespace_name, repo_name, tag_name, lookup_fn): + return None + class ManifestBackfillWorker(Worker): def __init__(self):