Fixes to ensuring existing code can process schema 2 manifests

2018-11-13 17:13:51 +02:00 · 2018-11-13 17:13:51 +02:00 · 7b9f56eff3
commit 7b9f56eff3
parent 9474fb7833
10 changed files with 91 additions and 21 deletions
--- a/data/registry_model/interface.py
+++ b/data/registry_model/interface.py
@ -282,7 +282,7 @@ class RegistryDataInterface(object):
    """
    Mounts the blob from another repository into the specified target repository, and adds an
    expiration before that blob is automatically GCed. This function is useful during push
-    operations if an existing blob from another repositroy is being pushed. Returns False if
+    operations if an existing blob from another repository is being pushed. Returns False if
    the mounting fails. Note that this function does *not* check security for mounting the blob
    and the caller is responsible for doing this check (an example can be found in
    endpoints/v2/blob.py).
@ -293,3 +293,7 @@ class RegistryDataInterface(object):
    """
    Sets the expiration on all tags that point to the given manifest to that specified.
    """
+
+  @abstractmethod
+  def get_schema1_parsed_manifest(self, manifest, namespace_name, repo_name, tag_name, storage):
+    """ Returns the schema 1 version of this manifest, or None if none. """
--- a/data/registry_model/registry_oci_model.py
+++ b/data/registry_model/registry_oci_model.py
@ -8,9 +8,11 @@ from data import model
 from data.model import oci, DataModelException
 from data.database import db_transaction, Image
 from data.registry_model.interface import RegistryDataInterface
-from data.registry_model.datatypes import Tag, Manifest, LegacyImage, Label, SecurityScanStatus
+from data.registry_model.datatypes import (Tag, Manifest, LegacyImage, Label, SecurityScanStatus,
+                                           RepositoryReference)
 from data.registry_model.shared import SharedModel
 from data.registry_model.label_handlers import apply_label_to_manifest
+from image.docker import ManifestException


 logger = logging.getLogger(__name__)
@ -420,5 +422,27 @@ class OCIModel(SharedModel, RegistryDataInterface):
    """
    oci.tag.set_tag_expiration_sec_for_manifest(manifest._db_id, expiration_sec)

+  def get_schema1_parsed_manifest(self, manifest, namespace_name, repo_name, tag_name, storage):
+    """ Returns the schema 1 manifest for this manifest, or None if none. """
+    try:
+      parsed = manifest.get_parsed_manifest()
+    except ManifestException:
+      return None
+
+    try:
+      manifest_row = database.Manifest.get(id=manifest._db_id)
+    except database.Manifest.DoesNotExist:
+      return None
+
+    repository_ref = RepositoryReference.for_id(manifest_row.repository_id)
+
+    def _lookup_blob(digest):
+      blob = self.get_repo_blob_by_digest(repository_ref, digest, include_placements=True)
+      if blob is None:
+        return None
+
+      return storage.get_content(blob.placements, blob.storage_path)
+
+    return parsed.get_v1_compatible_manifest(namespace_name, repo_name, tag_name, _lookup_blob)

 oci_model = OCIModel()
--- a/data/registry_model/registry_pre_oci_model.py
+++ b/data/registry_model/registry_pre_oci_model.py
@ -527,4 +527,12 @@ class PreOCIModel(SharedModel, RegistryDataInterface):

    model.tag.set_tag_expiration_for_manifest(tag_manifest, expiration_sec)

+  def get_schema1_parsed_manifest(self, manifest, namespace_name, repo_name, tag_name, storage):
+    """ Returns the schema 1 version of this manifest, or None if none. """
+    try:
+      return manifest.get_parsed_manifest()
+    except ManifestException:
+      return None
+
+
 pre_oci_model = PreOCIModel()
--- a/data/registry_model/shared.py
+++ b/data/registry_model/shared.py
@ -317,7 +317,8 @@ class SharedModel:
      logger.exception('Could not parse and validate manifest `%s`', manifest._db_id)
      return None

-    blob_query = model.storage.lookup_repo_storages_by_content_checksum(repo_id, parsed.checksums)
+    blob_query = model.storage.lookup_repo_storages_by_content_checksum(repo_id,
+                                                                        parsed.blob_digests)
    storage_map = {blob.content_checksum: blob for blob in blob_query}

    manifest_layers = []
--- a/data/registry_model/test/test_interface.py
+++ b/data/registry_model/test/test_interface.py
@ -95,6 +95,9 @@ def test_lookup_manifests(repo_namespace, repo_name, registry_model):
  assert found.legacy_image
  assert found.legacy_image.parents

+  schema1_parsed = registry_model.get_schema1_parsed_manifest(found, 'foo', 'bar', 'baz', storage)
+  assert schema1_parsed is not None
+

 def test_lookup_unknown_manifest(registry_model):
  repo = model.repository.get_repository('devtable', 'simple')
--- a/endpoints/v2/manifest.py
+++ b/endpoints/v2/manifest.py
@ -56,7 +56,13 @@ def fetch_manifest_by_tagname(namespace_name, repo_name, manifest_ref):
    # Something went wrong.
    raise ManifestInvalid()

-  manifest = _rewrite_to_schema1_if_necessary(namespace_name, repo_name, manifest_ref, manifest)
+  try:
+    parsed = manifest.get_parsed_manifest()
+  except ManifestException:
+    logger.exception('Got exception when trying to parse manifest `%s`', manifest_ref)
+    raise ManifestInvalid()
+
+  manifest = _rewrite_to_schema1_if_necessary(namespace_name, repo_name, manifest_ref, parsed)
  if manifest is None:
    raise ManifestUnknown()

@ -65,7 +71,7 @@ def fetch_manifest_by_tagname(namespace_name, repo_name, manifest_ref):
  metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'v2', True])

  return Response(
-    manifest.manifest_bytes,
+    manifest.bytes,
    status=200,
    headers={
      'Content-Type': manifest.media_type,
@ -88,14 +94,20 @@ def fetch_manifest_by_digest(namespace_name, repo_name, manifest_ref):
  if manifest is None:
    raise ManifestUnknown()

-  manifest = _rewrite_to_schema1_if_necessary(namespace_name, repo_name, '$digest', manifest)
+  try:
+    parsed = manifest.get_parsed_manifest()
+  except ManifestException:
+    logger.exception('Got exception when trying to parse manifest `%s`', manifest_ref)
+    raise ManifestInvalid()
+
+  manifest = _rewrite_to_schema1_if_necessary(namespace_name, repo_name, '$digest', parsed)
  if manifest is None:
    raise ManifestUnknown()

  track_and_log('pull_repo', repository_ref, manifest_digest=manifest_ref)
  metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'v2', True])

-  return Response(manifest.manifest_bytes, status=200, headers={
+  return Response(manifest.bytes, status=200, headers={
    'Content-Type': manifest.media_type,
    'Docker-Content-Digest': manifest.digest,
  })
@ -106,7 +118,8 @@ def _rewrite_to_schema1_if_necessary(namespace_name, repo_name, tag_name, manife
  # media type is not in the Accept header, we return a schema 1 version of the manifest for
  # the amd64+linux platform, if any, or None if none.
  # See: https://docs.docker.com/registry/spec/manifest-v2-2
-  if len(request.accept_mimetypes) != 0 and manifest.media_type in request.accept_mimetypes:
+  mimetypes = [mimetype for mimetype, _ in request.accept_mimetypes]
+  if manifest.media_type in mimetypes:
    return manifest

  def lookup_fn(config_or_manifest_digest):
--- a/endpoints/verbs/init.py
+++ b/endpoints/verbs/init.py
@ -16,6 +16,7 @@ from data.registry_model import registry_model
 from endpoints.decorators import anon_protect, anon_allowed, route_show_if, parse_repository_name
 from endpoints.v2.blob import BLOB_DIGEST_ROUTE
 from image.appc import AppCImageFormatter
+from image.docker import ManifestException
 from image.docker.squashed import SquashedDockerImageFormatter
 from storage import Storage
 from util.audit import track_and_log, wrap_repository
@ -42,7 +43,7 @@ class VerbReporter(TarLayerFormatterReporter):
    metric_queue.verb_action_passes.Inc(labelvalues=[self.kind, pass_count])


-def _open_stream(formatter, tag, manifest, derived_image_id, handlers, reporter):
+def _open_stream(formatter, tag, manifest, schema1_manifest, derived_image_id, handlers, reporter):
  """
  This method generates a stream of data which will be replicated and read from the queue files.
  This method runs in a separate process.
@ -68,7 +69,7 @@ def _open_stream(formatter, tag, manifest, derived_image_id, handlers, reporter)
    for layer in reversed(layers):
      yield image_stream_getter(store, layer.blob)

-  stream = formatter.build_stream(tag, manifest, derived_image_id, layers,
+  stream = formatter.build_stream(tag, schema1_manifest, derived_image_id, layers,
                                  tar_stream_getter_iterator, reporter=reporter)

  for handler_fn in handlers:
@ -220,9 +221,21 @@ def _verify_repo_verb(_, namespace, repo_name, tag_name, verb, checker=None):
    logger.debug('Could not get manifest on %s/%s:%s::%s', namespace, repo_name, tag.name, verb)
    abort(404)

+  # Ensure the manifest is not a list.
+  try:
+    schema1_manifest = registry_model.get_schema1_parsed_manifest(manifest, namespace,
+                                                                  repo_name, tag.name,
+                                                                  storage)
+  except ManifestException:
+    logger.exception('Could not get manifest on %s/%s:%s::%s', namespace, repo_name, tag.name, verb)
+    abort(400)
+
+  if schema1_manifest is None:
+    abort(404)
+
  # If there is a data checker, call it first.
  if checker is not None:
-    if not checker(tag, manifest):
+    if not checker(tag, schema1_manifest):
      logger.debug('Check mismatch on %s/%s:%s, verb %s', namespace, repo_name, tag.name, verb)
      abort(404)

@ -230,12 +243,12 @@ def _verify_repo_verb(_, namespace, repo_name, tag_name, verb, checker=None):
  assert tag.repository.namespace_name
  assert tag.repository.name

-  return tag, manifest
+  return tag, manifest, schema1_manifest


 def _repo_verb_signature(namespace, repository, tag_name, verb, checker=None, **kwargs):
  # Verify that the tag exists and that we have access to it.
-  tag, manifest = _verify_repo_verb(storage, namespace, repository, tag_name, verb, checker)
+  tag, manifest, _ = _verify_repo_verb(storage, namespace, repository, tag_name, verb, checker)

  # Find the derived image storage for the verb.
  derived_image = registry_model.lookup_derived_image(manifest, verb,
@ -261,7 +274,8 @@ def _repo_verb(namespace, repository, tag_name, verb, formatter, sign=False, che
  # Verify that the image exists and that we have access to it.
  logger.debug('Verifying repo verb %s for repository %s/%s with user %s with mimetype %s',
               verb, namespace, repository, get_authenticated_user(), request.accept_mimetypes.best)
-  tag, manifest = _verify_repo_verb(storage, namespace, repository, tag_name, verb, checker)
+  tag, manifest, schema1_manifest = _verify_repo_verb(storage, namespace, repository,
+                                                      tag_name, verb, checker)

  # Load the repository for later.
  repo = model.repository.get_repository(namespace, repository)
@ -323,7 +337,7 @@ def _repo_verb(namespace, repository, tag_name, verb, formatter, sign=False, che
  # and send the results to the client and storage.
  handlers = [hasher.update]
  reporter = VerbReporter(verb)
-  args = (formatter, tag, manifest, derived_image.unique_id, handlers, reporter)
+  args = (formatter, tag, manifest, schema1_manifest, derived_image.unique_id, handlers, reporter)
  queue_process = QueueProcess(
    _open_stream,
    8 * 1024,
@ -360,7 +374,7 @@ def _repo_verb(namespace, repository, tag_name, verb, formatter, sign=False, che
 def os_arch_checker(os, arch):
  def checker(tag, manifest):
    try:
-      image_json = json.loads(manifest.get_parsed_manifest().leaf_layer.raw_v1_metadata)
+      image_json = json.loads(manifest.leaf_layer.raw_v1_metadata)
    except ValueError:
      logger.exception('Could not parse leaf layer JSON for manifest %s', manifest)
      return False
--- a/image/appc/init.py
+++ b/image/appc/init.py
@ -18,10 +18,9 @@ class AppCImageFormatter(TarImageFormatter):
  Image formatter which produces an tarball according to the AppC specification.
  """

-  def stream_generator(self, tag, manifest, synthetic_image_id, layer_iterator,
+  def stream_generator(self, tag, parsed_manifest, synthetic_image_id, layer_iterator,
                       tar_stream_getter_iterator, reporter=None):
    image_mtime = 0
-    parsed_manifest = manifest.get_parsed_manifest()
    created = parsed_manifest.created_datetime
    if created is not None:
      image_mtime = calendar.timegm(created.utctimetuple())
--- a/image/docker/schema2/config.py
+++ b/image/docker/schema2/config.py
@ -170,7 +170,7 @@ class DockerSchema2Config(object):

  def __init__(self, config_bytes):
    self._config_bytes = config_bytes
-    
+
    try:
      self._parsed = json.loads(config_bytes)
    except ValueError as ve:
@ -191,6 +191,11 @@ class DockerSchema2Config(object):
    """ Returns the size of this config object. """
    return len(self._config_bytes)

+  @property
+  def bytes(self):
+    """ Returns the bytes of this config object. """
+    return self._config_bytes
+
  @property
  def labels(self):
    """ Returns a dictionary of all the labels defined in this configuration. """
--- a/image/docker/squashed.py
+++ b/image/docker/squashed.py
@ -28,10 +28,9 @@ class SquashedDockerImageFormatter(TarImageFormatter):
  # daemon dies when trying to load the entire tar into memory.
  SIZE_MULTIPLIER = 1.2

-  def stream_generator(self, tag, manifest, synthetic_image_id, layer_iterator,
+  def stream_generator(self, tag, parsed_manifest, synthetic_image_id, layer_iterator,
                       tar_stream_getter_iterator, reporter=None):
    image_mtime = 0
-    parsed_manifest = manifest.get_parsed_manifest()
    created = parsed_manifest.created_datetime
    if created is not None:
      image_mtime = calendar.timegm(created.utctimetuple())