diff --git a/data/registry_model/interface.py b/data/registry_model/interface.py index 6d36d4928..766ee0fff 100644 --- a/data/registry_model/interface.py +++ b/data/registry_model/interface.py @@ -282,7 +282,7 @@ class RegistryDataInterface(object): """ Mounts the blob from another repository into the specified target repository, and adds an expiration before that blob is automatically GCed. This function is useful during push - operations if an existing blob from another repositroy is being pushed. Returns False if + operations if an existing blob from another repository is being pushed. Returns False if the mounting fails. Note that this function does *not* check security for mounting the blob and the caller is responsible for doing this check (an example can be found in endpoints/v2/blob.py). @@ -293,3 +293,7 @@ class RegistryDataInterface(object): """ Sets the expiration on all tags that point to the given manifest to that specified. """ + + @abstractmethod + def get_schema1_parsed_manifest(self, manifest, namespace_name, repo_name, tag_name, storage): + """ Returns the schema 1 version of this manifest, or None if none. """ diff --git a/data/registry_model/registry_oci_model.py b/data/registry_model/registry_oci_model.py index 25ae0069c..0870f54f9 100644 --- a/data/registry_model/registry_oci_model.py +++ b/data/registry_model/registry_oci_model.py @@ -8,9 +8,11 @@ from data import model from data.model import oci, DataModelException from data.database import db_transaction, Image from data.registry_model.interface import RegistryDataInterface -from data.registry_model.datatypes import Tag, Manifest, LegacyImage, Label, SecurityScanStatus +from data.registry_model.datatypes import (Tag, Manifest, LegacyImage, Label, SecurityScanStatus, + RepositoryReference) from data.registry_model.shared import SharedModel from data.registry_model.label_handlers import apply_label_to_manifest +from image.docker import ManifestException logger = logging.getLogger(__name__) @@ -420,5 +422,27 @@ class OCIModel(SharedModel, RegistryDataInterface): """ oci.tag.set_tag_expiration_sec_for_manifest(manifest._db_id, expiration_sec) + def get_schema1_parsed_manifest(self, manifest, namespace_name, repo_name, tag_name, storage): + """ Returns the schema 1 manifest for this manifest, or None if none. """ + try: + parsed = manifest.get_parsed_manifest() + except ManifestException: + return None + + try: + manifest_row = database.Manifest.get(id=manifest._db_id) + except database.Manifest.DoesNotExist: + return None + + repository_ref = RepositoryReference.for_id(manifest_row.repository_id) + + def _lookup_blob(digest): + blob = self.get_repo_blob_by_digest(repository_ref, digest, include_placements=True) + if blob is None: + return None + + return storage.get_content(blob.placements, blob.storage_path) + + return parsed.get_v1_compatible_manifest(namespace_name, repo_name, tag_name, _lookup_blob) oci_model = OCIModel() diff --git a/data/registry_model/registry_pre_oci_model.py b/data/registry_model/registry_pre_oci_model.py index 8f7e24634..f22eb3a29 100644 --- a/data/registry_model/registry_pre_oci_model.py +++ b/data/registry_model/registry_pre_oci_model.py @@ -527,4 +527,12 @@ class PreOCIModel(SharedModel, RegistryDataInterface): model.tag.set_tag_expiration_for_manifest(tag_manifest, expiration_sec) + def get_schema1_parsed_manifest(self, manifest, namespace_name, repo_name, tag_name, storage): + """ Returns the schema 1 version of this manifest, or None if none. """ + try: + return manifest.get_parsed_manifest() + except ManifestException: + return None + + pre_oci_model = PreOCIModel() diff --git a/data/registry_model/shared.py b/data/registry_model/shared.py index 8fc23fff2..82488986f 100644 --- a/data/registry_model/shared.py +++ b/data/registry_model/shared.py @@ -317,7 +317,8 @@ class SharedModel: logger.exception('Could not parse and validate manifest `%s`', manifest._db_id) return None - blob_query = model.storage.lookup_repo_storages_by_content_checksum(repo_id, parsed.checksums) + blob_query = model.storage.lookup_repo_storages_by_content_checksum(repo_id, + parsed.blob_digests) storage_map = {blob.content_checksum: blob for blob in blob_query} manifest_layers = [] diff --git a/data/registry_model/test/test_interface.py b/data/registry_model/test/test_interface.py index c0ebe625f..2f6647c65 100644 --- a/data/registry_model/test/test_interface.py +++ b/data/registry_model/test/test_interface.py @@ -95,6 +95,9 @@ def test_lookup_manifests(repo_namespace, repo_name, registry_model): assert found.legacy_image assert found.legacy_image.parents + schema1_parsed = registry_model.get_schema1_parsed_manifest(found, 'foo', 'bar', 'baz', storage) + assert schema1_parsed is not None + def test_lookup_unknown_manifest(registry_model): repo = model.repository.get_repository('devtable', 'simple') diff --git a/endpoints/v2/manifest.py b/endpoints/v2/manifest.py index 7d350488a..e3568f85e 100644 --- a/endpoints/v2/manifest.py +++ b/endpoints/v2/manifest.py @@ -56,7 +56,13 @@ def fetch_manifest_by_tagname(namespace_name, repo_name, manifest_ref): # Something went wrong. raise ManifestInvalid() - manifest = _rewrite_to_schema1_if_necessary(namespace_name, repo_name, manifest_ref, manifest) + try: + parsed = manifest.get_parsed_manifest() + except ManifestException: + logger.exception('Got exception when trying to parse manifest `%s`', manifest_ref) + raise ManifestInvalid() + + manifest = _rewrite_to_schema1_if_necessary(namespace_name, repo_name, manifest_ref, parsed) if manifest is None: raise ManifestUnknown() @@ -65,7 +71,7 @@ def fetch_manifest_by_tagname(namespace_name, repo_name, manifest_ref): metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'v2', True]) return Response( - manifest.manifest_bytes, + manifest.bytes, status=200, headers={ 'Content-Type': manifest.media_type, @@ -88,14 +94,20 @@ def fetch_manifest_by_digest(namespace_name, repo_name, manifest_ref): if manifest is None: raise ManifestUnknown() - manifest = _rewrite_to_schema1_if_necessary(namespace_name, repo_name, '$digest', manifest) + try: + parsed = manifest.get_parsed_manifest() + except ManifestException: + logger.exception('Got exception when trying to parse manifest `%s`', manifest_ref) + raise ManifestInvalid() + + manifest = _rewrite_to_schema1_if_necessary(namespace_name, repo_name, '$digest', parsed) if manifest is None: raise ManifestUnknown() track_and_log('pull_repo', repository_ref, manifest_digest=manifest_ref) metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'v2', True]) - return Response(manifest.manifest_bytes, status=200, headers={ + return Response(manifest.bytes, status=200, headers={ 'Content-Type': manifest.media_type, 'Docker-Content-Digest': manifest.digest, }) @@ -106,7 +118,8 @@ def _rewrite_to_schema1_if_necessary(namespace_name, repo_name, tag_name, manife # media type is not in the Accept header, we return a schema 1 version of the manifest for # the amd64+linux platform, if any, or None if none. # See: https://docs.docker.com/registry/spec/manifest-v2-2 - if len(request.accept_mimetypes) != 0 and manifest.media_type in request.accept_mimetypes: + mimetypes = [mimetype for mimetype, _ in request.accept_mimetypes] + if manifest.media_type in mimetypes: return manifest def lookup_fn(config_or_manifest_digest): diff --git a/endpoints/verbs/__init__.py b/endpoints/verbs/__init__.py index 449f56757..ef28accd8 100644 --- a/endpoints/verbs/__init__.py +++ b/endpoints/verbs/__init__.py @@ -16,6 +16,7 @@ from data.registry_model import registry_model from endpoints.decorators import anon_protect, anon_allowed, route_show_if, parse_repository_name from endpoints.v2.blob import BLOB_DIGEST_ROUTE from image.appc import AppCImageFormatter +from image.docker import ManifestException from image.docker.squashed import SquashedDockerImageFormatter from storage import Storage from util.audit import track_and_log, wrap_repository @@ -42,7 +43,7 @@ class VerbReporter(TarLayerFormatterReporter): metric_queue.verb_action_passes.Inc(labelvalues=[self.kind, pass_count]) -def _open_stream(formatter, tag, manifest, derived_image_id, handlers, reporter): +def _open_stream(formatter, tag, manifest, schema1_manifest, derived_image_id, handlers, reporter): """ This method generates a stream of data which will be replicated and read from the queue files. This method runs in a separate process. @@ -68,7 +69,7 @@ def _open_stream(formatter, tag, manifest, derived_image_id, handlers, reporter) for layer in reversed(layers): yield image_stream_getter(store, layer.blob) - stream = formatter.build_stream(tag, manifest, derived_image_id, layers, + stream = formatter.build_stream(tag, schema1_manifest, derived_image_id, layers, tar_stream_getter_iterator, reporter=reporter) for handler_fn in handlers: @@ -220,9 +221,21 @@ def _verify_repo_verb(_, namespace, repo_name, tag_name, verb, checker=None): logger.debug('Could not get manifest on %s/%s:%s::%s', namespace, repo_name, tag.name, verb) abort(404) + # Ensure the manifest is not a list. + try: + schema1_manifest = registry_model.get_schema1_parsed_manifest(manifest, namespace, + repo_name, tag.name, + storage) + except ManifestException: + logger.exception('Could not get manifest on %s/%s:%s::%s', namespace, repo_name, tag.name, verb) + abort(400) + + if schema1_manifest is None: + abort(404) + # If there is a data checker, call it first. if checker is not None: - if not checker(tag, manifest): + if not checker(tag, schema1_manifest): logger.debug('Check mismatch on %s/%s:%s, verb %s', namespace, repo_name, tag.name, verb) abort(404) @@ -230,12 +243,12 @@ def _verify_repo_verb(_, namespace, repo_name, tag_name, verb, checker=None): assert tag.repository.namespace_name assert tag.repository.name - return tag, manifest + return tag, manifest, schema1_manifest def _repo_verb_signature(namespace, repository, tag_name, verb, checker=None, **kwargs): # Verify that the tag exists and that we have access to it. - tag, manifest = _verify_repo_verb(storage, namespace, repository, tag_name, verb, checker) + tag, manifest, _ = _verify_repo_verb(storage, namespace, repository, tag_name, verb, checker) # Find the derived image storage for the verb. derived_image = registry_model.lookup_derived_image(manifest, verb, @@ -261,7 +274,8 @@ def _repo_verb(namespace, repository, tag_name, verb, formatter, sign=False, che # Verify that the image exists and that we have access to it. logger.debug('Verifying repo verb %s for repository %s/%s with user %s with mimetype %s', verb, namespace, repository, get_authenticated_user(), request.accept_mimetypes.best) - tag, manifest = _verify_repo_verb(storage, namespace, repository, tag_name, verb, checker) + tag, manifest, schema1_manifest = _verify_repo_verb(storage, namespace, repository, + tag_name, verb, checker) # Load the repository for later. repo = model.repository.get_repository(namespace, repository) @@ -323,7 +337,7 @@ def _repo_verb(namespace, repository, tag_name, verb, formatter, sign=False, che # and send the results to the client and storage. handlers = [hasher.update] reporter = VerbReporter(verb) - args = (formatter, tag, manifest, derived_image.unique_id, handlers, reporter) + args = (formatter, tag, manifest, schema1_manifest, derived_image.unique_id, handlers, reporter) queue_process = QueueProcess( _open_stream, 8 * 1024, @@ -360,7 +374,7 @@ def _repo_verb(namespace, repository, tag_name, verb, formatter, sign=False, che def os_arch_checker(os, arch): def checker(tag, manifest): try: - image_json = json.loads(manifest.get_parsed_manifest().leaf_layer.raw_v1_metadata) + image_json = json.loads(manifest.leaf_layer.raw_v1_metadata) except ValueError: logger.exception('Could not parse leaf layer JSON for manifest %s', manifest) return False diff --git a/image/appc/__init__.py b/image/appc/__init__.py index 18986aef5..a1045cefa 100644 --- a/image/appc/__init__.py +++ b/image/appc/__init__.py @@ -18,10 +18,9 @@ class AppCImageFormatter(TarImageFormatter): Image formatter which produces an tarball according to the AppC specification. """ - def stream_generator(self, tag, manifest, synthetic_image_id, layer_iterator, + def stream_generator(self, tag, parsed_manifest, synthetic_image_id, layer_iterator, tar_stream_getter_iterator, reporter=None): image_mtime = 0 - parsed_manifest = manifest.get_parsed_manifest() created = parsed_manifest.created_datetime if created is not None: image_mtime = calendar.timegm(created.utctimetuple()) diff --git a/image/docker/schema2/config.py b/image/docker/schema2/config.py index 8d8ab2d61..1fd3a8b49 100644 --- a/image/docker/schema2/config.py +++ b/image/docker/schema2/config.py @@ -170,7 +170,7 @@ class DockerSchema2Config(object): def __init__(self, config_bytes): self._config_bytes = config_bytes - + try: self._parsed = json.loads(config_bytes) except ValueError as ve: @@ -191,6 +191,11 @@ class DockerSchema2Config(object): """ Returns the size of this config object. """ return len(self._config_bytes) + @property + def bytes(self): + """ Returns the bytes of this config object. """ + return self._config_bytes + @property def labels(self): """ Returns a dictionary of all the labels defined in this configuration. """ diff --git a/image/docker/squashed.py b/image/docker/squashed.py index 41c55b62a..ef6299009 100644 --- a/image/docker/squashed.py +++ b/image/docker/squashed.py @@ -28,10 +28,9 @@ class SquashedDockerImageFormatter(TarImageFormatter): # daemon dies when trying to load the entire tar into memory. SIZE_MULTIPLIER = 1.2 - def stream_generator(self, tag, manifest, synthetic_image_id, layer_iterator, + def stream_generator(self, tag, parsed_manifest, synthetic_image_id, layer_iterator, tar_stream_getter_iterator, reporter=None): image_mtime = 0 - parsed_manifest = manifest.get_parsed_manifest() created = parsed_manifest.created_datetime if created is not None: image_mtime = calendar.timegm(created.utctimetuple())