Switch content retrieval in manifests to be behind an interface

This allows for easy separation of retrieval of config blobs vs manifests
This commit is contained in:
Joseph Schorr 2018-11-19 11:55:52 +02:00
parent 1eaf5b18dd
commit cbfb6054e5
7 changed files with 143 additions and 124 deletions

View file

@ -71,14 +71,13 @@ class ManifestInterface(object):
""" """
@abstractmethod @abstractmethod
def child_manifests(self, lookup_manifest_fn): def child_manifests(self, content_retriever):
""" Returns an iterator of all manifests that live under this manifest, if any or None if not """ Returns an iterator of all manifests that live under this manifest, if any or None if not
applicable. The lookup_manifest_fn is a function that, when given a blob content SHA, applicable.
returns the contents of that blob in storage if any or None if none.
""" """
@abstractmethod @abstractmethod
def get_manifest_labels(self, lookup_config_fn): def get_manifest_labels(self, content_retriever):
""" Returns a dictionary of all the labels defined inside this manifest or None if this kind """ Returns a dictionary of all the labels defined inside this manifest or None if this kind
of manifest does not support labels. """ of manifest does not support labels. """
pass pass
@ -88,7 +87,7 @@ class ManifestInterface(object):
""" Returns an unsigned version of this manifest. """ """ Returns an unsigned version of this manifest. """
@abstractmethod @abstractmethod
def generate_legacy_layers(self, images_map, lookup_config_fn): def generate_legacy_layers(self, images_map, content_retriever):
""" """
Rewrites Docker v1 image IDs and returns a generator of DockerV1Metadata. Rewrites Docker v1 image IDs and returns a generator of DockerV1Metadata.
@ -100,7 +99,19 @@ class ManifestInterface(object):
""" """
@abstractmethod @abstractmethod
def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, lookup_fn): def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, content_retriever):
""" Returns the manifest that is compatible with V1, by virtue of being `amd64` and `linux`. """ Returns the manifest that is compatible with V1, by virtue of being `amd64` and `linux`.
If none, returns None. If none, returns None.
""" """
@add_metaclass(ABCMeta)
class ContentRetriever(object):
""" Defines the interface for retrieval of various content referneced by a manifest. """
@abstractmethod
def get_manifest_bytes_with_digest(self, digest):
""" Returns the bytes of the manifest with the given digest or None if none found. """
@abstractmethod
def get_blob_bytes_with_digest(self, digest):
""" Returns the bytes of the blob with the given digest or None if none found. """

View file

@ -314,10 +314,10 @@ class DockerSchema1Manifest(ManifestInterface):
def local_blob_digests(self): def local_blob_digests(self):
return self.blob_digests return self.blob_digests
def child_manifests(self, lookup_manifest_fn): def child_manifests(self, content_retriever):
return None return None
def get_manifest_labels(self, lookup_config_fn): def get_manifest_labels(self, content_retriever):
return self.layers[-1].v1_metadata.labels return self.layers[-1].v1_metadata.labels
def unsigned(self): def unsigned(self):
@ -374,10 +374,10 @@ class DockerSchema1Manifest(ManifestInterface):
signed_content_tail = base64url_decode(str(parsed_protected[DOCKER_SCHEMA1_FORMAT_TAIL_KEY])) signed_content_tail = base64url_decode(str(parsed_protected[DOCKER_SCHEMA1_FORMAT_TAIL_KEY]))
return signed_content_head + signed_content_tail return signed_content_head + signed_content_tail
def generate_legacy_layers(self, images_map, lookup_config_fn): def generate_legacy_layers(self, images_map, content_retriever):
return self.rewrite_invalid_image_ids(images_map) return self.rewrite_invalid_image_ids(images_map)
def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, lookup_fn): def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, content_retriever):
""" Returns the manifest that is compatible with V1, by virtue of being `amd64` and `linux`. """ Returns the manifest that is compatible with V1, by virtue of being `amd64` and `linux`.
If none, returns None. If none, returns None.
""" """

View file

@ -40,9 +40,9 @@ class MalformedSchema2ManifestList(Exception):
class LazyManifestLoader(object): class LazyManifestLoader(object):
def __init__(self, manifest_data, lookup_manifest_fn): def __init__(self, manifest_data, content_retriever):
self._manifest_data = manifest_data self._manifest_data = manifest_data
self._lookup_manifest_fn = lookup_manifest_fn self._content_retriever = content_retriever
self._loaded_manifest = None self._loaded_manifest = None
@property @property
@ -56,7 +56,7 @@ class LazyManifestLoader(object):
def _load_manifest(self): def _load_manifest(self):
digest = self._manifest_data[DOCKER_SCHEMA2_MANIFESTLIST_DIGEST_KEY] digest = self._manifest_data[DOCKER_SCHEMA2_MANIFESTLIST_DIGEST_KEY]
size = self._manifest_data[DOCKER_SCHEMA2_MANIFESTLIST_SIZE_KEY] size = self._manifest_data[DOCKER_SCHEMA2_MANIFESTLIST_SIZE_KEY]
manifest_bytes = self._lookup_manifest_fn(digest) manifest_bytes = self._content_retriever.get_manifest_bytes_with_digest(digest)
if manifest_bytes is None: if manifest_bytes is None:
raise MalformedSchema2ManifestList('Could not find child manifest with digest `%s`' % digest) raise MalformedSchema2ManifestList('Could not find child manifest with digest `%s`' % digest)
@ -237,24 +237,23 @@ class DockerSchema2ManifestList(ManifestInterface):
return None return None
@lru_cache(maxsize=1) @lru_cache(maxsize=1)
def manifests(self, lookup_manifest_fn): def manifests(self, content_retriever):
""" Returns the manifests in the list. The `lookup_manifest_fn` is a function """ Returns the manifests in the list.
that returns the manifest bytes for the specified digest.
""" """
manifests = self._parsed[DOCKER_SCHEMA2_MANIFESTLIST_MANIFESTS_KEY] manifests = self._parsed[DOCKER_SCHEMA2_MANIFESTLIST_MANIFESTS_KEY]
return [LazyManifestLoader(m, lookup_manifest_fn) for m in manifests] return [LazyManifestLoader(m, content_retriever) for m in manifests]
def child_manifests(self, lookup_manifest_fn): def child_manifests(self, content_retriever):
return self.manifests(lookup_manifest_fn) return self.manifests(content_retriever)
def get_manifest_labels(self, lookup_config_fn): def get_manifest_labels(self, content_retriever):
return None return None
def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, lookup_fn): def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, content_retriever):
""" Returns the manifest that is compatible with V1, by virtue of being `amd64` and `linux`. """ Returns the manifest that is compatible with V1, by virtue of being `amd64` and `linux`.
If none, returns None. If none, returns None.
""" """
for manifest_ref in self.manifests(lookup_fn): for manifest_ref in self.manifests(content_retriever):
platform = manifest_ref._manifest_data[DOCKER_SCHEMA2_MANIFESTLIST_PLATFORM_KEY] platform = manifest_ref._manifest_data[DOCKER_SCHEMA2_MANIFESTLIST_PLATFORM_KEY]
architecture = platform[DOCKER_SCHEMA2_MANIFESTLIST_ARCHITECTURE_KEY] architecture = platform[DOCKER_SCHEMA2_MANIFESTLIST_ARCHITECTURE_KEY]
os = platform[DOCKER_SCHEMA2_MANIFESTLIST_OS_KEY] os = platform[DOCKER_SCHEMA2_MANIFESTLIST_OS_KEY]
@ -263,21 +262,19 @@ class DockerSchema2ManifestList(ManifestInterface):
try: try:
manifest = manifest_ref.manifest_obj manifest = manifest_ref.manifest_obj
except ManifestException: except (ManifestException, IOError):
logger.exception('Could not load child manifest')
return None
except IOError:
logger.exception('Could not load child manifest') logger.exception('Could not load child manifest')
return None return None
return manifest.get_v1_compatible_manifest(namespace_name, repo_name, tag_name, lookup_fn) return manifest.get_v1_compatible_manifest(namespace_name, repo_name, tag_name,
content_retriever)
return None return None
def unsigned(self): def unsigned(self):
return self return self
def generate_legacy_layers(self, images_map, lookup_config_fn): def generate_legacy_layers(self, images_map, content_retriever):
return None return None

View file

@ -214,11 +214,14 @@ class DockerSchema2Manifest(ManifestInterface):
return ([str(layer.digest) for layer in self.layers if not layer.urls] + return ([str(layer.digest) for layer in self.layers if not layer.urls] +
[str(self.config.digest)]) [str(self.config.digest)])
def get_manifest_labels(self, lookup_config_fn): def get_manifest_labels(self, content_retriever):
return self._get_built_config(lookup_config_fn).labels return self._get_built_config(content_retriever).labels
def _get_built_config(self, content_retriever):
config_bytes = content_retriever.get_blob_bytes_with_digest(self.config.digest)
if config_bytes is None:
raise MalformedSchema2Manifest('Could not load config blob for manifest')
def _get_built_config(self, lookup_config_fn):
config_bytes = lookup_config_fn(self.config.digest)
if len(config_bytes) != self.config.size: if len(config_bytes) != self.config.size:
raise MalformedSchema2Manifest('Size of config does not match that retrieved: %s vs %s', raise MalformedSchema2Manifest('Size of config does not match that retrieved: %s vs %s',
len(config_bytes), self.config.size) len(config_bytes), self.config.size)
@ -229,7 +232,7 @@ class DockerSchema2Manifest(ManifestInterface):
def bytes(self): def bytes(self):
return self._payload return self._payload
def child_manifests(self, lookup_manifest_fn): def child_manifests(self, content_retriever):
return None return None
def _generate_layers(self): def _generate_layers(self):
@ -269,13 +272,12 @@ class DockerSchema2Manifest(ManifestInterface):
v1_layer_id = digest_history.hexdigest() v1_layer_id = digest_history.hexdigest()
yield LayerWithV1ID(layer=layer, v1_id=v1_layer_id, v1_parent_id=v1_layer_parent_id) yield LayerWithV1ID(layer=layer, v1_id=v1_layer_id, v1_parent_id=v1_layer_parent_id)
def populate_schema1_builder(self, v1_builder, lookup_config_fn): def populate_schema1_builder(self, v1_builder, content_retriever):
""" Populates a DockerSchema1ManifestBuilder with the layers and config from """ Populates a DockerSchema1ManifestBuilder with the layers and config from
this schema. The `lookup_config_fn` is a function that, when given the config this schema.
digest SHA, returns the associated configuration JSON bytes for this schema.
""" """
assert not self.has_remote_layer assert not self.has_remote_layer
schema2_config = self._get_built_config(lookup_config_fn) schema2_config = self._get_built_config(content_retriever)
# Build the V1 IDs for the layers. # Build the V1 IDs for the layers.
layers = list(self.layers_with_v1_ids) layers = list(self.layers_with_v1_ids)
@ -287,22 +289,22 @@ class DockerSchema2Manifest(ManifestInterface):
return v1_builder return v1_builder
def generate_legacy_layers(self, images_map, lookup_config_fn): def generate_legacy_layers(self, images_map, content_retriever):
assert not self.has_remote_layer assert not self.has_remote_layer
# NOTE: We use the DockerSchema1ManifestBuilder here because it already contains # NOTE: We use the DockerSchema1ManifestBuilder here because it already contains
# the logic for generating the DockerV1Metadata. All of this will go away once we get # the logic for generating the DockerV1Metadata. All of this will go away once we get
# rid of legacy images in the database, so this is a temporary solution. # rid of legacy images in the database, so this is a temporary solution.
v1_builder = DockerSchema1ManifestBuilder('', '', '') v1_builder = DockerSchema1ManifestBuilder('', '', '')
self.populate_schema1_builder(v1_builder, lookup_config_fn) self.populate_schema1_builder(v1_builder, content_retriever)
return v1_builder.build().generate_legacy_layers(images_map, lookup_config_fn) return v1_builder.build().generate_legacy_layers(images_map, content_retriever)
def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, lookup_fn): def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, content_retriever):
if self.has_remote_layer: if self.has_remote_layer:
return None return None
v1_builder = DockerSchema1ManifestBuilder(namespace_name, repo_name, tag_name) v1_builder = DockerSchema1ManifestBuilder(namespace_name, repo_name, tag_name)
self.populate_schema1_builder(v1_builder, lookup_fn) self.populate_schema1_builder(v1_builder, content_retriever)
return v1_builder.build() return v1_builder.build()
def unsigned(self): def unsigned(self):

View file

@ -6,6 +6,7 @@ from image.docker.schema2.manifest import DockerSchema2Manifest
from image.docker.schema2.list import (MalformedSchema2ManifestList, DockerSchema2ManifestList, from image.docker.schema2.list import (MalformedSchema2ManifestList, DockerSchema2ManifestList,
DockerSchema2ManifestListBuilder) DockerSchema2ManifestListBuilder)
from image.docker.schema2.test.test_manifest import MANIFEST_BYTES as v22_bytes from image.docker.schema2.test.test_manifest import MANIFEST_BYTES as v22_bytes
from image.docker.schemautil import ContentRetrieverForTesting
from image.docker.test.test_schema1 import MANIFEST_BYTES as v21_bytes from image.docker.test.test_schema1 import MANIFEST_BYTES as v21_bytes
@pytest.mark.parametrize('json_data', [ @pytest.mark.parametrize('json_data', [
@ -66,22 +67,21 @@ NO_AMD_MANIFESTLIST_BYTES = json.dumps({
] ]
}) })
def test_valid_manifestlist(): retriever = ContentRetrieverForTesting({
def _get_manifest(digest): 'sha256:e6': v22_bytes,
if digest == 'sha256:e6': 'sha256:5b': v21_bytes,
return v22_bytes })
else:
return v21_bytes
def test_valid_manifestlist():
manifestlist = DockerSchema2ManifestList(MANIFESTLIST_BYTES) manifestlist = DockerSchema2ManifestList(MANIFESTLIST_BYTES)
assert len(manifestlist.manifests(_get_manifest)) == 2 assert len(manifestlist.manifests(retriever)) == 2
assert manifestlist.media_type == 'application/vnd.docker.distribution.manifest.list.v2+json' assert manifestlist.media_type == 'application/vnd.docker.distribution.manifest.list.v2+json'
assert manifestlist.bytes == MANIFESTLIST_BYTES assert manifestlist.bytes == MANIFESTLIST_BYTES
assert manifestlist.manifest_dict == json.loads(MANIFESTLIST_BYTES) assert manifestlist.manifest_dict == json.loads(MANIFESTLIST_BYTES)
assert set(manifestlist.blob_digests) == {'sha256:e6', 'sha256:5b'} assert set(manifestlist.blob_digests) == {'sha256:e6', 'sha256:5b'}
for index, manifest in enumerate(manifestlist.manifests(_get_manifest)): for index, manifest in enumerate(manifestlist.manifests(retriever)):
if index == 0: if index == 0:
assert isinstance(manifest.manifest_obj, DockerSchema2Manifest) assert isinstance(manifest.manifest_obj, DockerSchema2Manifest)
assert manifest.manifest_obj.schema_version == 2 assert manifest.manifest_obj.schema_version == 2
@ -89,7 +89,7 @@ def test_valid_manifestlist():
assert isinstance(manifest.manifest_obj, DockerSchema1Manifest) assert isinstance(manifest.manifest_obj, DockerSchema1Manifest)
assert manifest.manifest_obj.schema_version == 1 assert manifest.manifest_obj.schema_version == 1
compatible_manifest = manifestlist.get_v1_compatible_manifest('foo', 'bar', 'baz', _get_manifest) compatible_manifest = manifestlist.get_v1_compatible_manifest('foo', 'bar', 'baz', retriever)
assert compatible_manifest.schema_version == 1 assert compatible_manifest.schema_version == 1
assert manifestlist.layers is None assert manifestlist.layers is None
@ -98,35 +98,21 @@ def test_valid_manifestlist():
def test_get_v1_compatible_manifest_no_matching_list(): def test_get_v1_compatible_manifest_no_matching_list():
def _get_manifest(digest):
if digest == 'sha256:e6':
return v22_bytes
else:
return v21_bytes
manifestlist = DockerSchema2ManifestList(NO_AMD_MANIFESTLIST_BYTES) manifestlist = DockerSchema2ManifestList(NO_AMD_MANIFESTLIST_BYTES)
assert len(manifestlist.manifests(_get_manifest)) == 1 assert len(manifestlist.manifests(retriever)) == 1
assert manifestlist.media_type == 'application/vnd.docker.distribution.manifest.list.v2+json' assert manifestlist.media_type == 'application/vnd.docker.distribution.manifest.list.v2+json'
assert manifestlist.bytes == NO_AMD_MANIFESTLIST_BYTES assert manifestlist.bytes == NO_AMD_MANIFESTLIST_BYTES
compatible_manifest = manifestlist.get_v1_compatible_manifest('foo', 'bar', 'baz', _get_manifest) compatible_manifest = manifestlist.get_v1_compatible_manifest('foo', 'bar', 'baz', retriever)
assert compatible_manifest is None assert compatible_manifest is None
def test_builder(): def test_builder():
def _get_manifest(digest):
if digest == 'sha256:e6':
return v22_bytes
else:
return v21_bytes
existing = DockerSchema2ManifestList(MANIFESTLIST_BYTES) existing = DockerSchema2ManifestList(MANIFESTLIST_BYTES)
builder = DockerSchema2ManifestListBuilder() builder = DockerSchema2ManifestListBuilder()
for index, manifest in enumerate(existing.manifests(_get_manifest)): for index, manifest in enumerate(existing.manifests(retriever)):
builder.add_manifest(manifest.manifest_obj, "amd64", "os") builder.add_manifest(manifest.manifest_obj, "amd64", "os")
built = builder.build() built = builder.build()
assert len(built.manifests(_get_manifest)) == 2 assert len(built.manifests(retriever)) == 2

View file

@ -8,6 +8,8 @@ from image.docker.schema1 import (DockerSchema1ManifestBuilder,
from image.docker.schema2.manifest import (MalformedSchema2Manifest, DockerSchema2Manifest, from image.docker.schema2.manifest import (MalformedSchema2Manifest, DockerSchema2Manifest,
DockerSchema2ManifestBuilder) DockerSchema2ManifestBuilder)
from image.docker.schema2.test.test_config import CONFIG_BYTES from image.docker.schema2.test.test_config import CONFIG_BYTES
from image.docker.schemautil import ContentRetrieverForTesting
@pytest.mark.parametrize('json_data', [ @pytest.mark.parametrize('json_data', [
'', '',
@ -156,27 +158,28 @@ def test_schema2_builder():
def test_get_manifest_labels(): def test_get_manifest_labels():
labels = dict(foo='bar', baz='meh') labels = dict(foo='bar', baz='meh')
retriever = ContentRetrieverForTesting.for_config({
def _lookup_config(digest):
config_str = json.dumps({
"config": { "config": {
"Labels": labels, "Labels": labels,
}, },
"rootfs": {"type": "layers", "diff_ids": []}, "rootfs": {"type": "layers", "diff_ids": []},
"history": [], "history": [],
}) }, 'sha256:b5b2b2c507a0944348e0303114d8d93aaaa081732b86451d9bce1f432a537bc7', 1885)
return config_str + ' ' * (1885 - len(config_str))
manifest = DockerSchema2Manifest(MANIFEST_BYTES) manifest = DockerSchema2Manifest(MANIFEST_BYTES)
assert manifest.get_manifest_labels(_lookup_config) == labels assert manifest.get_manifest_labels(retriever) == labels
def test_build_schema1(): def test_build_schema1():
manifest = DockerSchema2Manifest(MANIFEST_BYTES) manifest = DockerSchema2Manifest(MANIFEST_BYTES)
assert not manifest.has_remote_layer assert not manifest.has_remote_layer
retriever = ContentRetrieverForTesting({
'sha256:b5b2b2c507a0944348e0303114d8d93aaaa081732b86451d9bce1f432a537bc7': CONFIG_BYTES,
})
builder = DockerSchema1ManifestBuilder('somenamespace', 'somename', 'sometag') builder = DockerSchema1ManifestBuilder('somenamespace', 'somename', 'sometag')
manifest.populate_schema1_builder(builder, lambda digest: CONFIG_BYTES) manifest.populate_schema1_builder(builder, retriever)
schema1 = builder.build(docker_v2_signing_key) schema1 = builder.build(docker_v2_signing_key)
assert schema1.media_type == DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE assert schema1.media_type == DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE
@ -196,8 +199,7 @@ def test_build_schema1():
def test_get_v1_compatible_manifest(): def test_get_v1_compatible_manifest():
def _get_config(digest): retriever = ContentRetrieverForTesting.for_config({
config_str = json.dumps({
"config": { "config": {
"Labels": {}, "Labels": {},
}, },
@ -220,11 +222,10 @@ def test_get_v1_compatible_manifest():
"created_by": "bar" "created_by": "bar"
}, },
], ],
}) }, 'sha256:b5b2b2c507a0944348e0303114d8d93aaaa081732b86451d9bce1f432a537bc7', 1885)
return config_str + ' ' * (1885 - len(config_str))
manifest = DockerSchema2Manifest(MANIFEST_BYTES) manifest = DockerSchema2Manifest(MANIFEST_BYTES)
schema1 = manifest.get_v1_compatible_manifest('somenamespace', 'somename', 'sometag', _get_config) schema1 = manifest.get_v1_compatible_manifest('somenamespace', 'somename', 'sometag', retriever)
assert schema1 is not None assert schema1 is not None
assert schema1.media_type == DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE assert schema1.media_type == DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE
assert len(schema1.layers) == len(manifest.layers) assert len(schema1.layers) == len(manifest.layers)
@ -240,8 +241,7 @@ def test_generate_legacy_layers():
builder.set_config_digest('sha256:def456', 2000) builder.set_config_digest('sha256:def456', 2000)
manifest = builder.build() manifest = builder.build()
def _lookup_config(digest): retriever = ContentRetrieverForTesting.for_config({
config_str = json.dumps({
"config": { "config": {
}, },
"rootfs": {"type": "layers", "diff_ids": []}, "rootfs": {"type": "layers", "diff_ids": []},
@ -255,10 +255,9 @@ def test_generate_legacy_layers():
"created_by": "bar" "created_by": "bar"
}, },
], ],
}) }, 'sha256:def456', 2000)
return config_str + ' ' * (2000 - len(config_str))
legacy_layers = list(manifest.generate_legacy_layers({}, _lookup_config)) legacy_layers = list(manifest.generate_legacy_layers({}, retriever))
assert len(legacy_layers) == 2 assert len(legacy_layers) == 2
assert legacy_layers[0].content_checksum == 'sha256:abc123' assert legacy_layers[0].content_checksum == 'sha256:abc123'
assert legacy_layers[1].content_checksum == 'sha256:def456' assert legacy_layers[1].content_checksum == 'sha256:def456'

View file

@ -0,0 +1,24 @@
import json
from image.docker.interfaces import ContentRetriever
class ContentRetrieverForTesting(ContentRetriever):
def __init__(self, digests=None):
self.digests = digests or {}
def add_digest(self, digest, content):
self.digests[digest] = content
def get_manifest_bytes_with_digest(self, digest):
return self.digests.get(digest)
def get_blob_bytes_with_digest(self, digest):
return self.digests.get(digest)
@classmethod
def for_config(cls, config_obj, digest, size):
config_str = json.dumps(config_obj)
padded_string = config_str + ' ' * (size - len(config_str))
digests = {}
digests[digest] = padded_string
return ContentRetrieverForTesting(digests)