This repository has been archived on 2020-03-24. You can view files and clone it, but cannot push or open issues or pull requests.
quay/image/docker/schema2/manifest.py
Joseph Schorr cbfb6054e5 Switch content retrieval in manifests to be behind an interface
This allows for easy separation of retrieval of config blobs vs manifests
2018-11-19 11:55:52 +02:00

374 lines
14 KiB
Python

import json
import logging
import hashlib
from collections import namedtuple
from jsonschema import validate as validate_schema, ValidationError
from digest import digest_tools
from image.docker import ManifestException
from image.docker.interfaces import ManifestInterface
from image.docker.schema2 import (DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE,
DOCKER_SCHEMA2_CONFIG_CONTENT_TYPE,
DOCKER_SCHEMA2_LAYER_CONTENT_TYPE,
DOCKER_SCHEMA2_REMOTE_LAYER_CONTENT_TYPE)
from image.docker.schema1 import DockerSchema1ManifestBuilder
from image.docker.schema2.config import DockerSchema2Config
# Keys.
DOCKER_SCHEMA2_MANIFEST_VERSION_KEY = 'schemaVersion'
DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY = 'mediaType'
DOCKER_SCHEMA2_MANIFEST_CONFIG_KEY = 'config'
DOCKER_SCHEMA2_MANIFEST_SIZE_KEY = 'size'
DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY = 'digest'
DOCKER_SCHEMA2_MANIFEST_LAYERS_KEY = 'layers'
DOCKER_SCHEMA2_MANIFEST_URLS_KEY = 'urls'
# Named tuples.
DockerV2ManifestConfig = namedtuple('DockerV2ManifestConfig', ['size', 'digest'])
DockerV2ManifestLayer = namedtuple('DockerV2ManifestLayer', ['index', 'digest',
'is_remote', 'urls',
'compressed_size'])
LayerWithV1ID = namedtuple('LayerWithV1ID', ['layer', 'v1_id', 'v1_parent_id'])
logger = logging.getLogger(__name__)
class MalformedSchema2Manifest(ManifestException):
"""
Raised when a manifest fails an assertion that should be true according to the Docker Manifest
v2.2 Specification.
"""
pass
class DockerSchema2Manifest(ManifestInterface):
METASCHEMA = {
'type': 'object',
'properties': {
DOCKER_SCHEMA2_MANIFEST_VERSION_KEY: {
'type': 'number',
'description': 'The version of the schema. Must always be `2`.',
'minimum': 2,
'maximum': 2,
},
DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY: {
'type': 'string',
'description': 'The media type of the schema.',
'enum': [DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE],
},
DOCKER_SCHEMA2_MANIFEST_CONFIG_KEY: {
'type': 'object',
'description': 'The config field references a configuration object for a container, ' +
'by digest. This configuration item is a JSON blob that the runtime ' +
'uses to set up the container.',
'properties': {
DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY: {
'type': 'string',
'description': 'The MIME type of the referenced object. This should generally be ' +
'application/vnd.docker.container.image.v1+json',
'enum': [DOCKER_SCHEMA2_CONFIG_CONTENT_TYPE],
},
DOCKER_SCHEMA2_MANIFEST_SIZE_KEY: {
'type': 'number',
'description': 'The size in bytes of the object. This field exists so that a ' +
'client will have an expected size for the content before ' +
'validating. If the length of the retrieved content does not ' +
'match the specified length, the content should not be trusted.',
},
DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY: {
'type': 'string',
'description': 'The content addressable digest of the config in the blob store',
},
},
'required': [DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY, DOCKER_SCHEMA2_MANIFEST_SIZE_KEY,
DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY],
},
DOCKER_SCHEMA2_MANIFEST_LAYERS_KEY: {
'type': 'array',
'description': 'The layer list is ordered starting from the base ' +
'image (opposite order of schema1).',
'items': {
'type': 'object',
'properties': {
DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY: {
'type': 'string',
'description': 'The MIME type of the referenced object. This should generally be ' +
'application/vnd.docker.image.rootfs.diff.tar.gzip. Layers of type ' +
'application/vnd.docker.image.rootfs.foreign.diff.tar.gzip may be ' +
'pulled from a remote location but they should never be pushed.',
'enum': [DOCKER_SCHEMA2_LAYER_CONTENT_TYPE, DOCKER_SCHEMA2_REMOTE_LAYER_CONTENT_TYPE],
},
DOCKER_SCHEMA2_MANIFEST_SIZE_KEY: {
'type': 'number',
'description': 'The size in bytes of the object. This field exists so that a ' +
'client will have an expected size for the content before ' +
'validating. If the length of the retrieved content does not ' +
'match the specified length, the content should not be trusted.',
},
DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY: {
'type': 'string',
'description': 'The content addressable digest of the layer in the blob store',
},
},
'required': [
DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY, DOCKER_SCHEMA2_MANIFEST_SIZE_KEY,
DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY,
],
},
},
},
'required': [DOCKER_SCHEMA2_MANIFEST_VERSION_KEY, DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY,
DOCKER_SCHEMA2_MANIFEST_CONFIG_KEY, DOCKER_SCHEMA2_MANIFEST_LAYERS_KEY],
}
def __init__(self, manifest_bytes):
self._layers = None
self._payload = manifest_bytes
try:
self._parsed = json.loads(manifest_bytes)
except ValueError as ve:
raise MalformedSchema2Manifest('malformed manifest data: %s' % ve)
try:
validate_schema(self._parsed, DockerSchema2Manifest.METASCHEMA)
except ValidationError as ve:
raise MalformedSchema2Manifest('manifest data does not match schema: %s' % ve)
for layer in self.layers:
if layer.is_remote and not layer.urls:
raise MalformedSchema2Manifest('missing `urls` for remote layer')
@property
def is_manifest_list(self):
return False
@property
def schema_version(self):
return 2
@property
def manifest_dict(self):
return self._parsed
@property
def media_type(self):
return self._parsed[DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY]
@property
def digest(self):
return digest_tools.sha256_digest(self._payload)
@property
def config(self):
config = self._parsed[DOCKER_SCHEMA2_MANIFEST_CONFIG_KEY]
return DockerV2ManifestConfig(size=config[DOCKER_SCHEMA2_MANIFEST_SIZE_KEY],
digest=config[DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY])
@property
def layers(self):
""" Returns the layers of this manifest, from base to leaf. """
if self._layers is None:
self._layers = list(self._generate_layers())
return self._layers
@property
def layers_compressed_size(self):
return sum(layer.compressed_size for layer in self.layers)
@property
def leaf_layer(self):
return self.layers[-1]
@property
def has_remote_layer(self):
for layer in self.layers:
if layer.is_remote:
return True
return False
@property
def leaf_layer_v1_image_id(self):
# NOTE: If there exists a layer with remote content, then we consider this manifest
# to not support legacy images.
if self.has_remote_layer:
return None
return list(self.layers_with_v1_ids)[-1].v1_id
@property
def legacy_image_ids(self):
if self.has_remote_layer:
return None
return [l.v1_id for l in self.layers_with_v1_ids]
@property
def blob_digests(self):
return [str(layer.digest) for layer in self.layers] + [str(self.config.digest)]
@property
def local_blob_digests(self):
return ([str(layer.digest) for layer in self.layers if not layer.urls] +
[str(self.config.digest)])
def get_manifest_labels(self, content_retriever):
return self._get_built_config(content_retriever).labels
def _get_built_config(self, content_retriever):
config_bytes = content_retriever.get_blob_bytes_with_digest(self.config.digest)
if config_bytes is None:
raise MalformedSchema2Manifest('Could not load config blob for manifest')
if len(config_bytes) != self.config.size:
raise MalformedSchema2Manifest('Size of config does not match that retrieved: %s vs %s',
len(config_bytes), self.config.size)
return DockerSchema2Config(config_bytes)
@property
def bytes(self):
return self._payload
def child_manifests(self, content_retriever):
return None
def _generate_layers(self):
for index, layer in enumerate(self._parsed[DOCKER_SCHEMA2_MANIFEST_LAYERS_KEY]):
content_type = layer[DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY]
is_remote = content_type == DOCKER_SCHEMA2_REMOTE_LAYER_CONTENT_TYPE
try:
digest = digest_tools.Digest.parse_digest(layer[DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY])
except digest_tools.InvalidDigestException:
raise MalformedSchema2Manifest('could not parse manifest digest: %s' %
layer[DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY])
yield DockerV2ManifestLayer(index=index,
compressed_size=layer[DOCKER_SCHEMA2_MANIFEST_SIZE_KEY],
digest=digest,
is_remote=is_remote,
urls=layer.get(DOCKER_SCHEMA2_MANIFEST_URLS_KEY))
@property
def layers_with_v1_ids(self):
assert not self.has_remote_layer
digest_history = hashlib.sha256()
v1_layer_parent_id = None
v1_layer_id = None
for layer in self.layers:
v1_layer_parent_id = v1_layer_id
# Create a new synthesized V1 ID for the layer by adding its digest and index to the
# existing digest history hash builder. This will ensure unique V1s across *all* schemas in
# a repository.
digest_history.update(str(layer.digest))
digest_history.update("#")
digest_history.update(str(layer.index))
digest_history.update("|")
v1_layer_id = digest_history.hexdigest()
yield LayerWithV1ID(layer=layer, v1_id=v1_layer_id, v1_parent_id=v1_layer_parent_id)
def populate_schema1_builder(self, v1_builder, content_retriever):
""" Populates a DockerSchema1ManifestBuilder with the layers and config from
this schema.
"""
assert not self.has_remote_layer
schema2_config = self._get_built_config(content_retriever)
# Build the V1 IDs for the layers.
layers = list(self.layers_with_v1_ids)
for layer_with_ids in reversed(layers): # Schema1 has layers in reverse order
v1_compatibility = schema2_config.build_v1_compatibility(layer_with_ids.layer.index,
layer_with_ids.v1_id,
layer_with_ids.v1_parent_id)
v1_builder.add_layer(str(layer_with_ids.layer.digest), json.dumps(v1_compatibility))
return v1_builder
def generate_legacy_layers(self, images_map, content_retriever):
assert not self.has_remote_layer
# NOTE: We use the DockerSchema1ManifestBuilder here because it already contains
# the logic for generating the DockerV1Metadata. All of this will go away once we get
# rid of legacy images in the database, so this is a temporary solution.
v1_builder = DockerSchema1ManifestBuilder('', '', '')
self.populate_schema1_builder(v1_builder, content_retriever)
return v1_builder.build().generate_legacy_layers(images_map, content_retriever)
def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, content_retriever):
if self.has_remote_layer:
return None
v1_builder = DockerSchema1ManifestBuilder(namespace_name, repo_name, tag_name)
self.populate_schema1_builder(v1_builder, content_retriever)
return v1_builder.build()
def unsigned(self):
return self
class DockerSchema2ManifestBuilder(object):
"""
A convenient abstraction around creating new DockerSchema2Manifests.
"""
def __init__(self):
self.config = None
self.layers = []
def set_config(self, schema2_config):
""" Sets the configuration for the manifest being built. """
self.set_config_digest(schema2_config.digest, schema2_config.size)
def set_config_digest(self, config_digest, config_size):
""" Sets the digest and size of the configuration layer. """
self.config = DockerV2ManifestConfig(size=config_size, digest=config_digest)
def add_layer(self, digest, size, urls=None):
""" Adds a layer to the manifest. """
self.layers.append(DockerV2ManifestLayer(index=len(self.layers),
digest=digest,
compressed_size=size,
urls=urls,
is_remote=bool(urls)))
def build(self):
""" Builds and returns the DockerSchema2Manifest. """
assert self.layers
assert self.config
def _build_layer(layer):
if layer.urls:
return {
DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY: DOCKER_SCHEMA2_REMOTE_LAYER_CONTENT_TYPE,
DOCKER_SCHEMA2_MANIFEST_SIZE_KEY: layer.compressed_size,
DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY: str(layer.digest),
DOCKER_SCHEMA2_MANIFEST_URLS_KEY: layer.urls,
}
return {
DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY: DOCKER_SCHEMA2_LAYER_CONTENT_TYPE,
DOCKER_SCHEMA2_MANIFEST_SIZE_KEY: layer.compressed_size,
DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY: str(layer.digest),
}
manifest_dict = {
DOCKER_SCHEMA2_MANIFEST_VERSION_KEY: 2,
DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY: DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE,
# Config
DOCKER_SCHEMA2_MANIFEST_CONFIG_KEY: {
DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY: DOCKER_SCHEMA2_CONFIG_CONTENT_TYPE,
DOCKER_SCHEMA2_MANIFEST_SIZE_KEY: self.config.size,
DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY: str(self.config.digest),
},
# Layers
DOCKER_SCHEMA2_MANIFEST_LAYERS_KEY: [
_build_layer(layer) for layer in self.layers
],
}
return DockerSchema2Manifest(json.dumps(manifest_dict, indent=3))