import json import logging import hashlib from collections import namedtuple from jsonschema import validate as validate_schema, ValidationError from digest import digest_tools from image.docker import ManifestException from image.docker.interfaces import ManifestInterface from image.docker.schema2 import (DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE, DOCKER_SCHEMA2_CONFIG_CONTENT_TYPE, DOCKER_SCHEMA2_LAYER_CONTENT_TYPE, DOCKER_SCHEMA2_REMOTE_LAYER_CONTENT_TYPE) from image.docker.schema1 import DockerSchema1ManifestBuilder from image.docker.schema2.config import DockerSchema2Config # Keys. DOCKER_SCHEMA2_MANIFEST_VERSION_KEY = 'schemaVersion' DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY = 'mediaType' DOCKER_SCHEMA2_MANIFEST_CONFIG_KEY = 'config' DOCKER_SCHEMA2_MANIFEST_SIZE_KEY = 'size' DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY = 'digest' DOCKER_SCHEMA2_MANIFEST_LAYERS_KEY = 'layers' DOCKER_SCHEMA2_MANIFEST_URLS_KEY = 'urls' # Named tuples. DockerV2ManifestConfig = namedtuple('DockerV2ManifestConfig', ['size', 'digest']) DockerV2ManifestLayer = namedtuple('DockerV2ManifestLayer', ['index', 'digest', 'is_remote', 'urls', 'compressed_size']) LayerWithV1ID = namedtuple('LayerWithV1ID', ['layer', 'v1_id', 'v1_parent_id']) logger = logging.getLogger(__name__) class MalformedSchema2Manifest(ManifestException): """ Raised when a manifest fails an assertion that should be true according to the Docker Manifest v2.2 Specification. """ pass class DockerSchema2Manifest(ManifestInterface): METASCHEMA = { 'type': 'object', 'properties': { DOCKER_SCHEMA2_MANIFEST_VERSION_KEY: { 'type': 'number', 'description': 'The version of the schema. Must always be `2`.', 'minimum': 2, 'maximum': 2, }, DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY: { 'type': 'string', 'description': 'The media type of the schema.', 'enum': [DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE], }, DOCKER_SCHEMA2_MANIFEST_CONFIG_KEY: { 'type': 'object', 'description': 'The config field references a configuration object for a container, ' + 'by digest. This configuration item is a JSON blob that the runtime ' + 'uses to set up the container.', 'properties': { DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY: { 'type': 'string', 'description': 'The MIME type of the referenced object. This should generally be ' + 'application/vnd.docker.container.image.v1+json', 'enum': [DOCKER_SCHEMA2_CONFIG_CONTENT_TYPE], }, DOCKER_SCHEMA2_MANIFEST_SIZE_KEY: { 'type': 'number', 'description': 'The size in bytes of the object. This field exists so that a ' + 'client will have an expected size for the content before ' + 'validating. If the length of the retrieved content does not ' + 'match the specified length, the content should not be trusted.', }, DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY: { 'type': 'string', 'description': 'The content addressable digest of the config in the blob store', }, }, 'required': [DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY, DOCKER_SCHEMA2_MANIFEST_SIZE_KEY, DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY], }, DOCKER_SCHEMA2_MANIFEST_LAYERS_KEY: { 'type': 'array', 'description': 'The layer list is ordered starting from the base ' + 'image (opposite order of schema1).', 'items': { 'type': 'object', 'properties': { DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY: { 'type': 'string', 'description': 'The MIME type of the referenced object. This should generally be ' + 'application/vnd.docker.image.rootfs.diff.tar.gzip. Layers of type ' + 'application/vnd.docker.image.rootfs.foreign.diff.tar.gzip may be ' + 'pulled from a remote location but they should never be pushed.', 'enum': [DOCKER_SCHEMA2_LAYER_CONTENT_TYPE, DOCKER_SCHEMA2_REMOTE_LAYER_CONTENT_TYPE], }, DOCKER_SCHEMA2_MANIFEST_SIZE_KEY: { 'type': 'number', 'description': 'The size in bytes of the object. This field exists so that a ' + 'client will have an expected size for the content before ' + 'validating. If the length of the retrieved content does not ' + 'match the specified length, the content should not be trusted.', }, DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY: { 'type': 'string', 'description': 'The content addressable digest of the layer in the blob store', }, }, 'required': [ DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY, DOCKER_SCHEMA2_MANIFEST_SIZE_KEY, DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY, ], }, }, }, 'required': [DOCKER_SCHEMA2_MANIFEST_VERSION_KEY, DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY, DOCKER_SCHEMA2_MANIFEST_CONFIG_KEY, DOCKER_SCHEMA2_MANIFEST_LAYERS_KEY], } def __init__(self, manifest_bytes): self._layers = None self._payload = manifest_bytes try: self._parsed = json.loads(manifest_bytes) except ValueError as ve: raise MalformedSchema2Manifest('malformed manifest data: %s' % ve) try: validate_schema(self._parsed, DockerSchema2Manifest.METASCHEMA) except ValidationError as ve: raise MalformedSchema2Manifest('manifest data does not match schema: %s' % ve) for layer in self.layers: if layer.is_remote and not layer.urls: raise MalformedSchema2Manifest('missing `urls` for remote layer') @property def is_manifest_list(self): return False @property def schema_version(self): return 2 @property def manifest_dict(self): return self._parsed @property def media_type(self): return self._parsed[DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY] @property def digest(self): return digest_tools.sha256_digest(self._payload) @property def config(self): config = self._parsed[DOCKER_SCHEMA2_MANIFEST_CONFIG_KEY] return DockerV2ManifestConfig(size=config[DOCKER_SCHEMA2_MANIFEST_SIZE_KEY], digest=config[DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY]) @property def layers(self): """ Returns the layers of this manifest, from base to leaf. """ if self._layers is None: self._layers = list(self._generate_layers()) return self._layers @property def layers_compressed_size(self): return sum(layer.compressed_size for layer in self.layers) @property def leaf_layer(self): return self.layers[-1] @property def has_remote_layer(self): for layer in self.layers: if layer.is_remote: return True return False @property def leaf_layer_v1_image_id(self): # NOTE: If there exists a layer with remote content, then we consider this manifest # to not support legacy images. if self.has_remote_layer: return None return list(self.layers_with_v1_ids)[-1].v1_id @property def legacy_image_ids(self): if self.has_remote_layer: return None return [l.v1_id for l in self.layers_with_v1_ids] @property def blob_digests(self): return [str(layer.digest) for layer in self.layers] + [str(self.config.digest)] @property def local_blob_digests(self): return ([str(layer.digest) for layer in self.layers if not layer.urls] + [str(self.config.digest)]) def get_manifest_labels(self, content_retriever): return self._get_built_config(content_retriever).labels def _get_built_config(self, content_retriever): config_bytes = content_retriever.get_blob_bytes_with_digest(self.config.digest) if config_bytes is None: raise MalformedSchema2Manifest('Could not load config blob for manifest') if len(config_bytes) != self.config.size: raise MalformedSchema2Manifest('Size of config does not match that retrieved: %s vs %s', len(config_bytes), self.config.size) return DockerSchema2Config(config_bytes) @property def bytes(self): return self._payload def child_manifests(self, content_retriever): return None def _generate_layers(self): for index, layer in enumerate(self._parsed[DOCKER_SCHEMA2_MANIFEST_LAYERS_KEY]): content_type = layer[DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY] is_remote = content_type == DOCKER_SCHEMA2_REMOTE_LAYER_CONTENT_TYPE try: digest = digest_tools.Digest.parse_digest(layer[DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY]) except digest_tools.InvalidDigestException: raise MalformedSchema2Manifest('could not parse manifest digest: %s' % layer[DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY]) yield DockerV2ManifestLayer(index=index, compressed_size=layer[DOCKER_SCHEMA2_MANIFEST_SIZE_KEY], digest=digest, is_remote=is_remote, urls=layer.get(DOCKER_SCHEMA2_MANIFEST_URLS_KEY)) @property def layers_with_v1_ids(self): assert not self.has_remote_layer digest_history = hashlib.sha256() v1_layer_parent_id = None v1_layer_id = None for layer in self.layers: v1_layer_parent_id = v1_layer_id # Create a new synthesized V1 ID for the layer by adding its digest and index to the # existing digest history hash builder. This will ensure unique V1s across *all* schemas in # a repository. digest_history.update(str(layer.digest)) digest_history.update("#") digest_history.update(str(layer.index)) digest_history.update("|") v1_layer_id = digest_history.hexdigest() yield LayerWithV1ID(layer=layer, v1_id=v1_layer_id, v1_parent_id=v1_layer_parent_id) def populate_schema1_builder(self, v1_builder, content_retriever): """ Populates a DockerSchema1ManifestBuilder with the layers and config from this schema. """ assert not self.has_remote_layer schema2_config = self._get_built_config(content_retriever) # Build the V1 IDs for the layers. layers = list(self.layers_with_v1_ids) for layer_with_ids in reversed(layers): # Schema1 has layers in reverse order v1_compatibility = schema2_config.build_v1_compatibility(layer_with_ids.layer.index, layer_with_ids.v1_id, layer_with_ids.v1_parent_id) v1_builder.add_layer(str(layer_with_ids.layer.digest), json.dumps(v1_compatibility)) return v1_builder def generate_legacy_layers(self, images_map, content_retriever): assert not self.has_remote_layer # NOTE: We use the DockerSchema1ManifestBuilder here because it already contains # the logic for generating the DockerV1Metadata. All of this will go away once we get # rid of legacy images in the database, so this is a temporary solution. v1_builder = DockerSchema1ManifestBuilder('', '', '') self.populate_schema1_builder(v1_builder, content_retriever) return v1_builder.build().generate_legacy_layers(images_map, content_retriever) def get_v1_compatible_manifest(self, namespace_name, repo_name, tag_name, content_retriever): if self.has_remote_layer: return None v1_builder = DockerSchema1ManifestBuilder(namespace_name, repo_name, tag_name) self.populate_schema1_builder(v1_builder, content_retriever) return v1_builder.build() def unsigned(self): return self class DockerSchema2ManifestBuilder(object): """ A convenient abstraction around creating new DockerSchema2Manifests. """ def __init__(self): self.config = None self.layers = [] def set_config(self, schema2_config): """ Sets the configuration for the manifest being built. """ self.set_config_digest(schema2_config.digest, schema2_config.size) def set_config_digest(self, config_digest, config_size): """ Sets the digest and size of the configuration layer. """ self.config = DockerV2ManifestConfig(size=config_size, digest=config_digest) def add_layer(self, digest, size, urls=None): """ Adds a layer to the manifest. """ self.layers.append(DockerV2ManifestLayer(index=len(self.layers), digest=digest, compressed_size=size, urls=urls, is_remote=bool(urls))) def build(self): """ Builds and returns the DockerSchema2Manifest. """ assert self.layers assert self.config def _build_layer(layer): if layer.urls: return { DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY: DOCKER_SCHEMA2_REMOTE_LAYER_CONTENT_TYPE, DOCKER_SCHEMA2_MANIFEST_SIZE_KEY: layer.compressed_size, DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY: str(layer.digest), DOCKER_SCHEMA2_MANIFEST_URLS_KEY: layer.urls, } return { DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY: DOCKER_SCHEMA2_LAYER_CONTENT_TYPE, DOCKER_SCHEMA2_MANIFEST_SIZE_KEY: layer.compressed_size, DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY: str(layer.digest), } manifest_dict = { DOCKER_SCHEMA2_MANIFEST_VERSION_KEY: 2, DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY: DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE, # Config DOCKER_SCHEMA2_MANIFEST_CONFIG_KEY: { DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY: DOCKER_SCHEMA2_CONFIG_CONTENT_TYPE, DOCKER_SCHEMA2_MANIFEST_SIZE_KEY: self.config.size, DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY: str(self.config.digest), }, # Layers DOCKER_SCHEMA2_MANIFEST_LAYERS_KEY: [ _build_layer(layer) for layer in self.layers ], } return DockerSchema2Manifest(json.dumps(manifest_dict, indent=3))