From cbfb6054e5ea055e5f2f932c46a6cc349fc4df7c Mon Sep 17 00:00:00 2001
From: Joseph Schorr <joseph.schorr@coreos.com>
Date: Mon, 19 Nov 2018 11:55:52 +0200
Subject: [PATCH] Switch content retrieval in manifests to be behind an
 interface

This allows for easy separation of retrieval of config blobs vs manifests
---
 image/docker/interfaces.py                 |  23 +++--
 image/docker/schema1.py                    |   8 +-
 image/docker/schema2/list.py               |  33 +++----
 image/docker/schema2/manifest.py           |  30 +++---
 image/docker/schema2/test/test_list.py     |  40 +++-----
 image/docker/schema2/test/test_manifest.py | 109 ++++++++++-----------
 image/docker/schemautil.py                 |  24 +++++
 7 files changed, 143 insertions(+), 124 deletions(-)
 create mode 100644 image/docker/schemautil.py

diff --git a/image/docker/interfaces.py b/image/docker/interfaces.py
index d34e1e140..008a7eb23 100644
--- a/image/docker/interfaces.py
+++ b/image/docker/interfaces.py
@@ -71,14 +71,13 @@ class ManifestInterface(object):
     """
 
   @abstractmethod
-  def child_manifests(self, lookup_manifest_fn):
+  def child_manifests(self, content_retriever):
     """ Returns an iterator of all manifests that live under this manifest, if any or None if not
-        applicable. The lookup_manifest_fn is a function that, when given a blob content SHA,
-        returns the contents of that blob in storage if any or None if none.
+        applicable.
     """
 
   @abstractmethod
-  def get_manifest_labels(self, lookup_config_fn):
+  def get_manifest_labels(self, content_retriever):
     """ Returns a dictionary of all the labels defined inside this manifest or None if this kind
         of manifest does not support labels. """
     pass
@@ -88,7 +87,7 @@ class ManifestInterface(object):
     """ Returns an unsigned version of this manifest. """
 
   @abstractmethod
-  def generate_legacy_layers(self, images_map, lookup_config_fn):
+  def generate_legacy_layers(self, images_map, content_retriever):
     """
     Rewrites Docker v1 image IDs and returns a generator of DockerV1Metadata.
 
@@ -100,7 +99,19 @@ class ManifestInterface(object):
     """
 
   @abstractmethod
-  def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, lookup_fn):
+  def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, content_retriever):
     """ Returns the manifest that is compatible with V1, by virtue of being `amd64` and `linux`.
         If none, returns None.
     """
+
+
+@add_metaclass(ABCMeta)
+class ContentRetriever(object):
+  """ Defines the interface for retrieval of various content referneced by a manifest. """
+  @abstractmethod
+  def get_manifest_bytes_with_digest(self, digest):
+    """ Returns the bytes of the manifest with the given digest or None if none found. """
+
+  @abstractmethod
+  def get_blob_bytes_with_digest(self, digest):
+    """ Returns the bytes of the blob with the given digest or None if none found. """
diff --git a/image/docker/schema1.py b/image/docker/schema1.py
index e5698151d..23abb3a84 100644
--- a/image/docker/schema1.py
+++ b/image/docker/schema1.py
@@ -314,10 +314,10 @@ class DockerSchema1Manifest(ManifestInterface):
   def local_blob_digests(self):
     return self.blob_digests
 
-  def child_manifests(self, lookup_manifest_fn):
+  def child_manifests(self, content_retriever):
     return None
 
-  def get_manifest_labels(self, lookup_config_fn):
+  def get_manifest_labels(self, content_retriever):
     return self.layers[-1].v1_metadata.labels
 
   def unsigned(self):
@@ -374,10 +374,10 @@ class DockerSchema1Manifest(ManifestInterface):
     signed_content_tail = base64url_decode(str(parsed_protected[DOCKER_SCHEMA1_FORMAT_TAIL_KEY]))
     return signed_content_head + signed_content_tail
 
-  def generate_legacy_layers(self, images_map, lookup_config_fn):
+  def generate_legacy_layers(self, images_map, content_retriever):
     return self.rewrite_invalid_image_ids(images_map)
 
-  def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, lookup_fn):
+  def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, content_retriever):
     """ Returns the manifest that is compatible with V1, by virtue of being `amd64` and `linux`.
         If none, returns None.
     """
diff --git a/image/docker/schema2/list.py b/image/docker/schema2/list.py
index 0b068f0c6..6658f74e9 100644
--- a/image/docker/schema2/list.py
+++ b/image/docker/schema2/list.py
@@ -40,9 +40,9 @@ class MalformedSchema2ManifestList(Exception):
 
 
 class LazyManifestLoader(object):
-  def __init__(self, manifest_data, lookup_manifest_fn):
+  def __init__(self, manifest_data, content_retriever):
     self._manifest_data = manifest_data
-    self._lookup_manifest_fn = lookup_manifest_fn
+    self._content_retriever = content_retriever
     self._loaded_manifest = None
 
   @property
@@ -56,7 +56,7 @@ class LazyManifestLoader(object):
   def _load_manifest(self):
     digest = self._manifest_data[DOCKER_SCHEMA2_MANIFESTLIST_DIGEST_KEY]
     size = self._manifest_data[DOCKER_SCHEMA2_MANIFESTLIST_SIZE_KEY]
-    manifest_bytes = self._lookup_manifest_fn(digest)
+    manifest_bytes = self._content_retriever.get_manifest_bytes_with_digest(digest)
     if manifest_bytes is None:
       raise MalformedSchema2ManifestList('Could not find child manifest with digest `%s`' % digest)
 
@@ -237,24 +237,23 @@ class DockerSchema2ManifestList(ManifestInterface):
     return None
 
   @lru_cache(maxsize=1)
-  def manifests(self, lookup_manifest_fn):
-    """ Returns the manifests in the list. The `lookup_manifest_fn` is a function
-        that returns the manifest bytes for the specified digest.
+  def manifests(self, content_retriever):
+    """ Returns the manifests in the list.
     """
     manifests = self._parsed[DOCKER_SCHEMA2_MANIFESTLIST_MANIFESTS_KEY]
-    return [LazyManifestLoader(m, lookup_manifest_fn) for m in manifests]
+    return [LazyManifestLoader(m, content_retriever) for m in manifests]
 
-  def child_manifests(self, lookup_manifest_fn):
-    return self.manifests(lookup_manifest_fn)
+  def child_manifests(self, content_retriever):
+    return self.manifests(content_retriever)
 
-  def get_manifest_labels(self, lookup_config_fn):
+  def get_manifest_labels(self, content_retriever):
     return None
 
-  def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, lookup_fn):
+  def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, content_retriever):
     """ Returns the manifest that is compatible with V1, by virtue of being `amd64` and `linux`.
         If none, returns None.
     """
-    for manifest_ref in self.manifests(lookup_fn):
+    for manifest_ref in self.manifests(content_retriever):
       platform = manifest_ref._manifest_data[DOCKER_SCHEMA2_MANIFESTLIST_PLATFORM_KEY]
       architecture = platform[DOCKER_SCHEMA2_MANIFESTLIST_ARCHITECTURE_KEY]
       os = platform[DOCKER_SCHEMA2_MANIFESTLIST_OS_KEY]
@@ -263,21 +262,19 @@ class DockerSchema2ManifestList(ManifestInterface):
 
       try:
         manifest = manifest_ref.manifest_obj
-      except ManifestException:
-        logger.exception('Could not load child manifest')
-        return None
-      except IOError:
+      except (ManifestException, IOError):
         logger.exception('Could not load child manifest')
         return None
 
-      return manifest.get_v1_compatible_manifest(namespace_name, repo_name, tag_name, lookup_fn)
+      return manifest.get_v1_compatible_manifest(namespace_name, repo_name, tag_name,
+                                                 content_retriever)
 
     return None
 
   def unsigned(self):
     return self
 
-  def generate_legacy_layers(self, images_map, lookup_config_fn):
+  def generate_legacy_layers(self, images_map, content_retriever):
     return None
 
 
diff --git a/image/docker/schema2/manifest.py b/image/docker/schema2/manifest.py
index b38f53ac4..9b9ff930a 100644
--- a/image/docker/schema2/manifest.py
+++ b/image/docker/schema2/manifest.py
@@ -214,11 +214,14 @@ class DockerSchema2Manifest(ManifestInterface):
     return ([str(layer.digest) for layer in self.layers if not layer.urls] +
             [str(self.config.digest)])
 
-  def get_manifest_labels(self, lookup_config_fn):
-    return self._get_built_config(lookup_config_fn).labels
+  def get_manifest_labels(self, content_retriever):
+    return self._get_built_config(content_retriever).labels
+
+  def _get_built_config(self, content_retriever):
+    config_bytes = content_retriever.get_blob_bytes_with_digest(self.config.digest)
+    if config_bytes is None:
+      raise MalformedSchema2Manifest('Could not load config blob for manifest')
 
-  def _get_built_config(self, lookup_config_fn):
-    config_bytes = lookup_config_fn(self.config.digest)
     if len(config_bytes) != self.config.size:
       raise MalformedSchema2Manifest('Size of config does not match that retrieved: %s vs %s',
                                      len(config_bytes), self.config.size)
@@ -229,7 +232,7 @@ class DockerSchema2Manifest(ManifestInterface):
   def bytes(self):
     return self._payload
 
-  def child_manifests(self, lookup_manifest_fn):
+  def child_manifests(self, content_retriever):
     return None
 
   def _generate_layers(self):
@@ -269,13 +272,12 @@ class DockerSchema2Manifest(ManifestInterface):
       v1_layer_id = digest_history.hexdigest()
       yield LayerWithV1ID(layer=layer, v1_id=v1_layer_id, v1_parent_id=v1_layer_parent_id)
 
-  def populate_schema1_builder(self, v1_builder, lookup_config_fn):
+  def populate_schema1_builder(self, v1_builder, content_retriever):
     """ Populates a DockerSchema1ManifestBuilder with the layers and config from
-        this schema. The `lookup_config_fn` is a function that, when given the config
-        digest SHA, returns the associated configuration JSON bytes for this schema.
+        this schema.
     """
     assert not self.has_remote_layer
-    schema2_config = self._get_built_config(lookup_config_fn)
+    schema2_config = self._get_built_config(content_retriever)
 
     # Build the V1 IDs for the layers.
     layers = list(self.layers_with_v1_ids)
@@ -287,22 +289,22 @@ class DockerSchema2Manifest(ManifestInterface):
 
     return v1_builder
 
-  def generate_legacy_layers(self, images_map, lookup_config_fn):
+  def generate_legacy_layers(self, images_map, content_retriever):
     assert not self.has_remote_layer
 
     # NOTE: We use the DockerSchema1ManifestBuilder here because it already contains
     # the logic for generating the DockerV1Metadata. All of this will go away once we get
     # rid of legacy images in the database, so this is a temporary solution.
     v1_builder = DockerSchema1ManifestBuilder('', '', '')
-    self.populate_schema1_builder(v1_builder, lookup_config_fn)
-    return v1_builder.build().generate_legacy_layers(images_map, lookup_config_fn)
+    self.populate_schema1_builder(v1_builder, content_retriever)
+    return v1_builder.build().generate_legacy_layers(images_map, content_retriever)
 
-  def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, lookup_fn):
+  def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, content_retriever):
     if self.has_remote_layer:
       return None
 
     v1_builder = DockerSchema1ManifestBuilder(namespace_name, repo_name, tag_name)
-    self.populate_schema1_builder(v1_builder, lookup_fn)
+    self.populate_schema1_builder(v1_builder, content_retriever)
     return v1_builder.build()
 
   def unsigned(self):
diff --git a/image/docker/schema2/test/test_list.py b/image/docker/schema2/test/test_list.py
index dc5ca5252..c4bfc16ac 100644
--- a/image/docker/schema2/test/test_list.py
+++ b/image/docker/schema2/test/test_list.py
@@ -6,6 +6,7 @@ from image.docker.schema2.manifest import DockerSchema2Manifest
 from image.docker.schema2.list import (MalformedSchema2ManifestList, DockerSchema2ManifestList,
                                        DockerSchema2ManifestListBuilder)
 from image.docker.schema2.test.test_manifest import MANIFEST_BYTES as v22_bytes
+from image.docker.schemautil import ContentRetrieverForTesting
 from image.docker.test.test_schema1 import MANIFEST_BYTES as v21_bytes
 
 @pytest.mark.parametrize('json_data', [
@@ -66,22 +67,21 @@ NO_AMD_MANIFESTLIST_BYTES = json.dumps({
   ]
 })
 
-def test_valid_manifestlist():
-  def _get_manifest(digest):
-    if digest == 'sha256:e6':
-      return v22_bytes
-    else:
-      return v21_bytes
+retriever = ContentRetrieverForTesting({
+  'sha256:e6': v22_bytes,
+  'sha256:5b': v21_bytes,
+})
 
+def test_valid_manifestlist():
   manifestlist = DockerSchema2ManifestList(MANIFESTLIST_BYTES)
-  assert len(manifestlist.manifests(_get_manifest)) == 2
+  assert len(manifestlist.manifests(retriever)) == 2
 
   assert manifestlist.media_type == 'application/vnd.docker.distribution.manifest.list.v2+json'
   assert manifestlist.bytes == MANIFESTLIST_BYTES
   assert manifestlist.manifest_dict == json.loads(MANIFESTLIST_BYTES)
   assert set(manifestlist.blob_digests) == {'sha256:e6', 'sha256:5b'}
 
-  for index, manifest in enumerate(manifestlist.manifests(_get_manifest)):
+  for index, manifest in enumerate(manifestlist.manifests(retriever)):
     if index == 0:
       assert isinstance(manifest.manifest_obj, DockerSchema2Manifest)
       assert manifest.manifest_obj.schema_version == 2
@@ -89,7 +89,7 @@ def test_valid_manifestlist():
       assert isinstance(manifest.manifest_obj, DockerSchema1Manifest)
       assert manifest.manifest_obj.schema_version == 1
 
-  compatible_manifest = manifestlist.get_v1_compatible_manifest('foo', 'bar', 'baz', _get_manifest)
+  compatible_manifest = manifestlist.get_v1_compatible_manifest('foo', 'bar', 'baz', retriever)
   assert compatible_manifest.schema_version == 1
 
   assert manifestlist.layers is None
@@ -98,35 +98,21 @@ def test_valid_manifestlist():
 
 
 def test_get_v1_compatible_manifest_no_matching_list():
-  def _get_manifest(digest):
-    if digest == 'sha256:e6':
-      return v22_bytes
-    else:
-      return v21_bytes
-
   manifestlist = DockerSchema2ManifestList(NO_AMD_MANIFESTLIST_BYTES)
-  assert len(manifestlist.manifests(_get_manifest)) == 1
+  assert len(manifestlist.manifests(retriever)) == 1
 
   assert manifestlist.media_type == 'application/vnd.docker.distribution.manifest.list.v2+json'
   assert manifestlist.bytes == NO_AMD_MANIFESTLIST_BYTES
 
-  compatible_manifest = manifestlist.get_v1_compatible_manifest('foo', 'bar', 'baz', _get_manifest)
+  compatible_manifest = manifestlist.get_v1_compatible_manifest('foo', 'bar', 'baz', retriever)
   assert compatible_manifest is None
 
 
 def test_builder():
-  def _get_manifest(digest):
-    if digest == 'sha256:e6':
-      return v22_bytes
-    else:
-      return v21_bytes
-
   existing = DockerSchema2ManifestList(MANIFESTLIST_BYTES)
-
   builder = DockerSchema2ManifestListBuilder()
-  for index, manifest in enumerate(existing.manifests(_get_manifest)):
+  for index, manifest in enumerate(existing.manifests(retriever)):
     builder.add_manifest(manifest.manifest_obj, "amd64", "os")
 
   built = builder.build()
-  assert len(built.manifests(_get_manifest)) == 2
-
+  assert len(built.manifests(retriever)) == 2
diff --git a/image/docker/schema2/test/test_manifest.py b/image/docker/schema2/test/test_manifest.py
index 560c4bb26..1a515893f 100644
--- a/image/docker/schema2/test/test_manifest.py
+++ b/image/docker/schema2/test/test_manifest.py
@@ -8,6 +8,8 @@ from image.docker.schema1 import (DockerSchema1ManifestBuilder,
 from image.docker.schema2.manifest import (MalformedSchema2Manifest, DockerSchema2Manifest,
                                            DockerSchema2ManifestBuilder)
 from image.docker.schema2.test.test_config import CONFIG_BYTES
+from image.docker.schemautil import ContentRetrieverForTesting
+
 
 @pytest.mark.parametrize('json_data', [
   '',
@@ -156,27 +158,28 @@ def test_schema2_builder():
 
 def test_get_manifest_labels():
   labels = dict(foo='bar', baz='meh')
-
-  def _lookup_config(digest):
-    config_str = json.dumps({
-      "config": {
-        "Labels": labels,
-      },
-      "rootfs": {"type": "layers", "diff_ids": []},
-      "history": [],
-    })
-    return config_str + ' ' * (1885 - len(config_str))
+  retriever = ContentRetrieverForTesting.for_config({
+    "config": {
+      "Labels": labels,
+    },
+    "rootfs": {"type": "layers", "diff_ids": []},
+    "history": [],
+  }, 'sha256:b5b2b2c507a0944348e0303114d8d93aaaa081732b86451d9bce1f432a537bc7', 1885)
 
   manifest = DockerSchema2Manifest(MANIFEST_BYTES)
-  assert manifest.get_manifest_labels(_lookup_config) == labels
+  assert manifest.get_manifest_labels(retriever) == labels
 
 
 def test_build_schema1():
   manifest = DockerSchema2Manifest(MANIFEST_BYTES)
   assert not manifest.has_remote_layer
 
+  retriever = ContentRetrieverForTesting({
+    'sha256:b5b2b2c507a0944348e0303114d8d93aaaa081732b86451d9bce1f432a537bc7': CONFIG_BYTES,
+  })
+
   builder = DockerSchema1ManifestBuilder('somenamespace', 'somename', 'sometag')
-  manifest.populate_schema1_builder(builder, lambda digest: CONFIG_BYTES)
+  manifest.populate_schema1_builder(builder, retriever)
   schema1 = builder.build(docker_v2_signing_key)
 
   assert schema1.media_type == DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE
@@ -196,35 +199,33 @@ def test_build_schema1():
 
 
 def test_get_v1_compatible_manifest():
-  def _get_config(digest):
-    config_str = json.dumps({
-      "config": {
-        "Labels": {},
+  retriever = ContentRetrieverForTesting.for_config({
+    "config": {
+      "Labels": {},
+    },
+    "rootfs": {"type": "layers", "diff_ids": []},
+    "history": [
+      {
+        "created": "2018-04-03T18:37:09.284840891Z",
+        "created_by": "foo"
       },
-      "rootfs": {"type": "layers", "diff_ids": []},
-      "history": [
-        {
-          "created": "2018-04-03T18:37:09.284840891Z",
-          "created_by": "foo"
-        },
-        {
-          "created": "2018-04-12T18:37:09.284840891Z",
-          "created_by": "bar"
-        },
-        {
-          "created": "2018-04-03T18:37:09.284840891Z",
-          "created_by": "foo"
-        },
-        {
-          "created": "2018-04-12T18:37:09.284840891Z",
-          "created_by": "bar"
-        },
-      ],
-    })
-    return config_str + ' ' * (1885 - len(config_str))
+      {
+        "created": "2018-04-12T18:37:09.284840891Z",
+        "created_by": "bar"
+      },
+      {
+        "created": "2018-04-03T18:37:09.284840891Z",
+        "created_by": "foo"
+      },
+      {
+        "created": "2018-04-12T18:37:09.284840891Z",
+        "created_by": "bar"
+      },
+    ],
+  }, 'sha256:b5b2b2c507a0944348e0303114d8d93aaaa081732b86451d9bce1f432a537bc7', 1885)
 
   manifest = DockerSchema2Manifest(MANIFEST_BYTES)
-  schema1 = manifest.get_v1_compatible_manifest('somenamespace', 'somename', 'sometag', _get_config)
+  schema1 = manifest.get_v1_compatible_manifest('somenamespace', 'somename', 'sometag', retriever)
   assert schema1 is not None
   assert schema1.media_type == DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE
   assert len(schema1.layers) == len(manifest.layers)
@@ -240,25 +241,23 @@ def test_generate_legacy_layers():
   builder.set_config_digest('sha256:def456', 2000)
   manifest = builder.build()
 
-  def _lookup_config(digest):
-    config_str = json.dumps({
-      "config": {
+  retriever = ContentRetrieverForTesting.for_config({
+    "config": {
+    },
+    "rootfs": {"type": "layers", "diff_ids": []},
+    "history": [
+      {
+        "created": "2018-04-03T18:37:09.284840891Z",
+        "created_by": "foo"
       },
-      "rootfs": {"type": "layers", "diff_ids": []},
-      "history": [
-        {
-          "created": "2018-04-03T18:37:09.284840891Z",
-          "created_by": "foo"
-        },
-        {
-          "created": "2018-04-12T18:37:09.284840891Z",
-          "created_by": "bar"
-        },
-      ],
-    })
-    return config_str + ' ' * (2000 - len(config_str))
+      {
+        "created": "2018-04-12T18:37:09.284840891Z",
+        "created_by": "bar"
+      },
+    ],
+  }, 'sha256:def456', 2000)
 
-  legacy_layers = list(manifest.generate_legacy_layers({}, _lookup_config))
+  legacy_layers = list(manifest.generate_legacy_layers({}, retriever))
   assert len(legacy_layers) == 2
   assert legacy_layers[0].content_checksum == 'sha256:abc123'
   assert legacy_layers[1].content_checksum == 'sha256:def456'
diff --git a/image/docker/schemautil.py b/image/docker/schemautil.py
new file mode 100644
index 000000000..1a231c62c
--- /dev/null
+++ b/image/docker/schemautil.py
@@ -0,0 +1,24 @@
+import json
+
+from image.docker.interfaces import ContentRetriever
+
+class ContentRetrieverForTesting(ContentRetriever):
+  def __init__(self, digests=None):
+    self.digests = digests or {}
+
+  def add_digest(self, digest, content):
+    self.digests[digest] = content
+
+  def get_manifest_bytes_with_digest(self, digest):
+    return self.digests.get(digest)
+
+  def get_blob_bytes_with_digest(self, digest):
+    return self.digests.get(digest)
+
+  @classmethod
+  def for_config(cls, config_obj, digest, size):
+    config_str = json.dumps(config_obj)
+    padded_string = config_str + ' ' * (size - len(config_str))
+    digests = {}
+    digests[digest] = padded_string
+    return ContentRetrieverForTesting(digests)