Switch content retrieval in manifests to be behind an interface

This allows for easy separation of retrieval of config blobs vs manifests
This commit is contained in:
Joseph Schorr 2018-11-19 11:55:52 +02:00
parent 1eaf5b18dd
commit cbfb6054e5
7 changed files with 143 additions and 124 deletions

View file

@ -71,14 +71,13 @@ class ManifestInterface(object):
"""
@abstractmethod
def child_manifests(self, lookup_manifest_fn):
def child_manifests(self, content_retriever):
""" Returns an iterator of all manifests that live under this manifest, if any or None if not
applicable. The lookup_manifest_fn is a function that, when given a blob content SHA,
returns the contents of that blob in storage if any or None if none.
applicable.
"""
@abstractmethod
def get_manifest_labels(self, lookup_config_fn):
def get_manifest_labels(self, content_retriever):
""" Returns a dictionary of all the labels defined inside this manifest or None if this kind
of manifest does not support labels. """
pass
@ -88,7 +87,7 @@ class ManifestInterface(object):
""" Returns an unsigned version of this manifest. """
@abstractmethod
def generate_legacy_layers(self, images_map, lookup_config_fn):
def generate_legacy_layers(self, images_map, content_retriever):
"""
Rewrites Docker v1 image IDs and returns a generator of DockerV1Metadata.
@ -100,7 +99,19 @@ class ManifestInterface(object):
"""
@abstractmethod
def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, lookup_fn):
def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, content_retriever):
""" Returns the manifest that is compatible with V1, by virtue of being `amd64` and `linux`.
If none, returns None.
"""
@add_metaclass(ABCMeta)
class ContentRetriever(object):
""" Defines the interface for retrieval of various content referneced by a manifest. """
@abstractmethod
def get_manifest_bytes_with_digest(self, digest):
""" Returns the bytes of the manifest with the given digest or None if none found. """
@abstractmethod
def get_blob_bytes_with_digest(self, digest):
""" Returns the bytes of the blob with the given digest or None if none found. """

View file

@ -314,10 +314,10 @@ class DockerSchema1Manifest(ManifestInterface):
def local_blob_digests(self):
return self.blob_digests
def child_manifests(self, lookup_manifest_fn):
def child_manifests(self, content_retriever):
return None
def get_manifest_labels(self, lookup_config_fn):
def get_manifest_labels(self, content_retriever):
return self.layers[-1].v1_metadata.labels
def unsigned(self):
@ -374,10 +374,10 @@ class DockerSchema1Manifest(ManifestInterface):
signed_content_tail = base64url_decode(str(parsed_protected[DOCKER_SCHEMA1_FORMAT_TAIL_KEY]))
return signed_content_head + signed_content_tail
def generate_legacy_layers(self, images_map, lookup_config_fn):
def generate_legacy_layers(self, images_map, content_retriever):
return self.rewrite_invalid_image_ids(images_map)
def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, lookup_fn):
def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, content_retriever):
""" Returns the manifest that is compatible with V1, by virtue of being `amd64` and `linux`.
If none, returns None.
"""

View file

@ -40,9 +40,9 @@ class MalformedSchema2ManifestList(Exception):
class LazyManifestLoader(object):
def __init__(self, manifest_data, lookup_manifest_fn):
def __init__(self, manifest_data, content_retriever):
self._manifest_data = manifest_data
self._lookup_manifest_fn = lookup_manifest_fn
self._content_retriever = content_retriever
self._loaded_manifest = None
@property
@ -56,7 +56,7 @@ class LazyManifestLoader(object):
def _load_manifest(self):
digest = self._manifest_data[DOCKER_SCHEMA2_MANIFESTLIST_DIGEST_KEY]
size = self._manifest_data[DOCKER_SCHEMA2_MANIFESTLIST_SIZE_KEY]
manifest_bytes = self._lookup_manifest_fn(digest)
manifest_bytes = self._content_retriever.get_manifest_bytes_with_digest(digest)
if manifest_bytes is None:
raise MalformedSchema2ManifestList('Could not find child manifest with digest `%s`' % digest)
@ -237,24 +237,23 @@ class DockerSchema2ManifestList(ManifestInterface):
return None
@lru_cache(maxsize=1)
def manifests(self, lookup_manifest_fn):
""" Returns the manifests in the list. The `lookup_manifest_fn` is a function
that returns the manifest bytes for the specified digest.
def manifests(self, content_retriever):
""" Returns the manifests in the list.
"""
manifests = self._parsed[DOCKER_SCHEMA2_MANIFESTLIST_MANIFESTS_KEY]
return [LazyManifestLoader(m, lookup_manifest_fn) for m in manifests]
return [LazyManifestLoader(m, content_retriever) for m in manifests]
def child_manifests(self, lookup_manifest_fn):
return self.manifests(lookup_manifest_fn)
def child_manifests(self, content_retriever):
return self.manifests(content_retriever)
def get_manifest_labels(self, lookup_config_fn):
def get_manifest_labels(self, content_retriever):
return None
def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, lookup_fn):
def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, content_retriever):
""" Returns the manifest that is compatible with V1, by virtue of being `amd64` and `linux`.
If none, returns None.
"""
for manifest_ref in self.manifests(lookup_fn):
for manifest_ref in self.manifests(content_retriever):
platform = manifest_ref._manifest_data[DOCKER_SCHEMA2_MANIFESTLIST_PLATFORM_KEY]
architecture = platform[DOCKER_SCHEMA2_MANIFESTLIST_ARCHITECTURE_KEY]
os = platform[DOCKER_SCHEMA2_MANIFESTLIST_OS_KEY]
@ -263,21 +262,19 @@ class DockerSchema2ManifestList(ManifestInterface):
try:
manifest = manifest_ref.manifest_obj
except ManifestException:
logger.exception('Could not load child manifest')
return None
except IOError:
except (ManifestException, IOError):
logger.exception('Could not load child manifest')
return None
return manifest.get_v1_compatible_manifest(namespace_name, repo_name, tag_name, lookup_fn)
return manifest.get_v1_compatible_manifest(namespace_name, repo_name, tag_name,
content_retriever)
return None
def unsigned(self):
return self
def generate_legacy_layers(self, images_map, lookup_config_fn):
def generate_legacy_layers(self, images_map, content_retriever):
return None

View file

@ -214,11 +214,14 @@ class DockerSchema2Manifest(ManifestInterface):
return ([str(layer.digest) for layer in self.layers if not layer.urls] +
[str(self.config.digest)])
def get_manifest_labels(self, lookup_config_fn):
return self._get_built_config(lookup_config_fn).labels
def get_manifest_labels(self, content_retriever):
return self._get_built_config(content_retriever).labels
def _get_built_config(self, content_retriever):
config_bytes = content_retriever.get_blob_bytes_with_digest(self.config.digest)
if config_bytes is None:
raise MalformedSchema2Manifest('Could not load config blob for manifest')
def _get_built_config(self, lookup_config_fn):
config_bytes = lookup_config_fn(self.config.digest)
if len(config_bytes) != self.config.size:
raise MalformedSchema2Manifest('Size of config does not match that retrieved: %s vs %s',
len(config_bytes), self.config.size)
@ -229,7 +232,7 @@ class DockerSchema2Manifest(ManifestInterface):
def bytes(self):
return self._payload
def child_manifests(self, lookup_manifest_fn):
def child_manifests(self, content_retriever):
return None
def _generate_layers(self):
@ -269,13 +272,12 @@ class DockerSchema2Manifest(ManifestInterface):
v1_layer_id = digest_history.hexdigest()
yield LayerWithV1ID(layer=layer, v1_id=v1_layer_id, v1_parent_id=v1_layer_parent_id)
def populate_schema1_builder(self, v1_builder, lookup_config_fn):
def populate_schema1_builder(self, v1_builder, content_retriever):
""" Populates a DockerSchema1ManifestBuilder with the layers and config from
this schema. The `lookup_config_fn` is a function that, when given the config
digest SHA, returns the associated configuration JSON bytes for this schema.
this schema.
"""
assert not self.has_remote_layer
schema2_config = self._get_built_config(lookup_config_fn)
schema2_config = self._get_built_config(content_retriever)
# Build the V1 IDs for the layers.
layers = list(self.layers_with_v1_ids)
@ -287,22 +289,22 @@ class DockerSchema2Manifest(ManifestInterface):
return v1_builder
def generate_legacy_layers(self, images_map, lookup_config_fn):
def generate_legacy_layers(self, images_map, content_retriever):
assert not self.has_remote_layer
# NOTE: We use the DockerSchema1ManifestBuilder here because it already contains
# the logic for generating the DockerV1Metadata. All of this will go away once we get
# rid of legacy images in the database, so this is a temporary solution.
v1_builder = DockerSchema1ManifestBuilder('', '', '')
self.populate_schema1_builder(v1_builder, lookup_config_fn)
return v1_builder.build().generate_legacy_layers(images_map, lookup_config_fn)
self.populate_schema1_builder(v1_builder, content_retriever)
return v1_builder.build().generate_legacy_layers(images_map, content_retriever)
def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, lookup_fn):
def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, content_retriever):
if self.has_remote_layer:
return None
v1_builder = DockerSchema1ManifestBuilder(namespace_name, repo_name, tag_name)
self.populate_schema1_builder(v1_builder, lookup_fn)
self.populate_schema1_builder(v1_builder, content_retriever)
return v1_builder.build()
def unsigned(self):

View file

@ -6,6 +6,7 @@ from image.docker.schema2.manifest import DockerSchema2Manifest
from image.docker.schema2.list import (MalformedSchema2ManifestList, DockerSchema2ManifestList,
DockerSchema2ManifestListBuilder)
from image.docker.schema2.test.test_manifest import MANIFEST_BYTES as v22_bytes
from image.docker.schemautil import ContentRetrieverForTesting
from image.docker.test.test_schema1 import MANIFEST_BYTES as v21_bytes
@pytest.mark.parametrize('json_data', [
@ -66,22 +67,21 @@ NO_AMD_MANIFESTLIST_BYTES = json.dumps({
]
})
def test_valid_manifestlist():
def _get_manifest(digest):
if digest == 'sha256:e6':
return v22_bytes
else:
return v21_bytes
retriever = ContentRetrieverForTesting({
'sha256:e6': v22_bytes,
'sha256:5b': v21_bytes,
})
def test_valid_manifestlist():
manifestlist = DockerSchema2ManifestList(MANIFESTLIST_BYTES)
assert len(manifestlist.manifests(_get_manifest)) == 2
assert len(manifestlist.manifests(retriever)) == 2
assert manifestlist.media_type == 'application/vnd.docker.distribution.manifest.list.v2+json'
assert manifestlist.bytes == MANIFESTLIST_BYTES
assert manifestlist.manifest_dict == json.loads(MANIFESTLIST_BYTES)
assert set(manifestlist.blob_digests) == {'sha256:e6', 'sha256:5b'}
for index, manifest in enumerate(manifestlist.manifests(_get_manifest)):
for index, manifest in enumerate(manifestlist.manifests(retriever)):
if index == 0:
assert isinstance(manifest.manifest_obj, DockerSchema2Manifest)
assert manifest.manifest_obj.schema_version == 2
@ -89,7 +89,7 @@ def test_valid_manifestlist():
assert isinstance(manifest.manifest_obj, DockerSchema1Manifest)
assert manifest.manifest_obj.schema_version == 1
compatible_manifest = manifestlist.get_v1_compatible_manifest('foo', 'bar', 'baz', _get_manifest)
compatible_manifest = manifestlist.get_v1_compatible_manifest('foo', 'bar', 'baz', retriever)
assert compatible_manifest.schema_version == 1
assert manifestlist.layers is None
@ -98,35 +98,21 @@ def test_valid_manifestlist():
def test_get_v1_compatible_manifest_no_matching_list():
def _get_manifest(digest):
if digest == 'sha256:e6':
return v22_bytes
else:
return v21_bytes
manifestlist = DockerSchema2ManifestList(NO_AMD_MANIFESTLIST_BYTES)
assert len(manifestlist.manifests(_get_manifest)) == 1
assert len(manifestlist.manifests(retriever)) == 1
assert manifestlist.media_type == 'application/vnd.docker.distribution.manifest.list.v2+json'
assert manifestlist.bytes == NO_AMD_MANIFESTLIST_BYTES
compatible_manifest = manifestlist.get_v1_compatible_manifest('foo', 'bar', 'baz', _get_manifest)
compatible_manifest = manifestlist.get_v1_compatible_manifest('foo', 'bar', 'baz', retriever)
assert compatible_manifest is None
def test_builder():
def _get_manifest(digest):
if digest == 'sha256:e6':
return v22_bytes
else:
return v21_bytes
existing = DockerSchema2ManifestList(MANIFESTLIST_BYTES)
builder = DockerSchema2ManifestListBuilder()
for index, manifest in enumerate(existing.manifests(_get_manifest)):
for index, manifest in enumerate(existing.manifests(retriever)):
builder.add_manifest(manifest.manifest_obj, "amd64", "os")
built = builder.build()
assert len(built.manifests(_get_manifest)) == 2
assert len(built.manifests(retriever)) == 2

View file

@ -8,6 +8,8 @@ from image.docker.schema1 import (DockerSchema1ManifestBuilder,
from image.docker.schema2.manifest import (MalformedSchema2Manifest, DockerSchema2Manifest,
DockerSchema2ManifestBuilder)
from image.docker.schema2.test.test_config import CONFIG_BYTES
from image.docker.schemautil import ContentRetrieverForTesting
@pytest.mark.parametrize('json_data', [
'',
@ -156,27 +158,28 @@ def test_schema2_builder():
def test_get_manifest_labels():
labels = dict(foo='bar', baz='meh')
def _lookup_config(digest):
config_str = json.dumps({
"config": {
"Labels": labels,
},
"rootfs": {"type": "layers", "diff_ids": []},
"history": [],
})
return config_str + ' ' * (1885 - len(config_str))
retriever = ContentRetrieverForTesting.for_config({
"config": {
"Labels": labels,
},
"rootfs": {"type": "layers", "diff_ids": []},
"history": [],
}, 'sha256:b5b2b2c507a0944348e0303114d8d93aaaa081732b86451d9bce1f432a537bc7', 1885)
manifest = DockerSchema2Manifest(MANIFEST_BYTES)
assert manifest.get_manifest_labels(_lookup_config) == labels
assert manifest.get_manifest_labels(retriever) == labels
def test_build_schema1():
manifest = DockerSchema2Manifest(MANIFEST_BYTES)
assert not manifest.has_remote_layer
retriever = ContentRetrieverForTesting({
'sha256:b5b2b2c507a0944348e0303114d8d93aaaa081732b86451d9bce1f432a537bc7': CONFIG_BYTES,
})
builder = DockerSchema1ManifestBuilder('somenamespace', 'somename', 'sometag')
manifest.populate_schema1_builder(builder, lambda digest: CONFIG_BYTES)
manifest.populate_schema1_builder(builder, retriever)
schema1 = builder.build(docker_v2_signing_key)
assert schema1.media_type == DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE
@ -196,35 +199,33 @@ def test_build_schema1():
def test_get_v1_compatible_manifest():
def _get_config(digest):
config_str = json.dumps({
"config": {
"Labels": {},
retriever = ContentRetrieverForTesting.for_config({
"config": {
"Labels": {},
},
"rootfs": {"type": "layers", "diff_ids": []},
"history": [
{
"created": "2018-04-03T18:37:09.284840891Z",
"created_by": "foo"
},
"rootfs": {"type": "layers", "diff_ids": []},
"history": [
{
"created": "2018-04-03T18:37:09.284840891Z",
"created_by": "foo"
},
{
"created": "2018-04-12T18:37:09.284840891Z",
"created_by": "bar"
},
{
"created": "2018-04-03T18:37:09.284840891Z",
"created_by": "foo"
},
{
"created": "2018-04-12T18:37:09.284840891Z",
"created_by": "bar"
},
],
})
return config_str + ' ' * (1885 - len(config_str))
{
"created": "2018-04-12T18:37:09.284840891Z",
"created_by": "bar"
},
{
"created": "2018-04-03T18:37:09.284840891Z",
"created_by": "foo"
},
{
"created": "2018-04-12T18:37:09.284840891Z",
"created_by": "bar"
},
],
}, 'sha256:b5b2b2c507a0944348e0303114d8d93aaaa081732b86451d9bce1f432a537bc7', 1885)
manifest = DockerSchema2Manifest(MANIFEST_BYTES)
schema1 = manifest.get_v1_compatible_manifest('somenamespace', 'somename', 'sometag', _get_config)
schema1 = manifest.get_v1_compatible_manifest('somenamespace', 'somename', 'sometag', retriever)
assert schema1 is not None
assert schema1.media_type == DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE
assert len(schema1.layers) == len(manifest.layers)
@ -240,25 +241,23 @@ def test_generate_legacy_layers():
builder.set_config_digest('sha256:def456', 2000)
manifest = builder.build()
def _lookup_config(digest):
config_str = json.dumps({
"config": {
retriever = ContentRetrieverForTesting.for_config({
"config": {
},
"rootfs": {"type": "layers", "diff_ids": []},
"history": [
{
"created": "2018-04-03T18:37:09.284840891Z",
"created_by": "foo"
},
"rootfs": {"type": "layers", "diff_ids": []},
"history": [
{
"created": "2018-04-03T18:37:09.284840891Z",
"created_by": "foo"
},
{
"created": "2018-04-12T18:37:09.284840891Z",
"created_by": "bar"
},
],
})
return config_str + ' ' * (2000 - len(config_str))
{
"created": "2018-04-12T18:37:09.284840891Z",
"created_by": "bar"
},
],
}, 'sha256:def456', 2000)
legacy_layers = list(manifest.generate_legacy_layers({}, _lookup_config))
legacy_layers = list(manifest.generate_legacy_layers({}, retriever))
assert len(legacy_layers) == 2
assert legacy_layers[0].content_checksum == 'sha256:abc123'
assert legacy_layers[1].content_checksum == 'sha256:def456'

View file

@ -0,0 +1,24 @@
import json
from image.docker.interfaces import ContentRetriever
class ContentRetrieverForTesting(ContentRetriever):
def __init__(self, digests=None):
self.digests = digests or {}
def add_digest(self, digest, content):
self.digests[digest] = content
def get_manifest_bytes_with_digest(self, digest):
return self.digests.get(digest)
def get_blob_bytes_with_digest(self, digest):
return self.digests.get(digest)
@classmethod
def for_config(cls, config_obj, digest, size):
config_str = json.dumps(config_obj)
padded_string = config_str + ' ' * (size - len(config_str))
digests = {}
digests[digest] = padded_string
return ContentRetrieverForTesting(digests)