import json import logging import hashlib from collections import namedtuple from jsonschema import validate as validate_schema, ValidationError from digest import digest_tools from image.docker import ManifestException from image.docker.schema2 import (DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE, DOCKER_SCHEMA2_CONFIG_CONTENT_TYPE, DOCKER_SCHEMA2_LAYER_CONTENT_TYPE, DOCKER_SCHEMA2_REMOTE_LAYER_CONTENT_TYPE) from image.docker.schema2.config import DockerSchema2Config # Keys. DOCKER_SCHEMA2_MANIFEST_VERSION_KEY = 'schemaVersion' DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY = 'mediaType' DOCKER_SCHEMA2_MANIFEST_CONFIG_KEY = 'config' DOCKER_SCHEMA2_MANIFEST_SIZE_KEY = 'size' DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY = 'digest' DOCKER_SCHEMA2_MANIFEST_LAYERS_KEY = 'layers' DOCKER_SCHEMA2_MANIFEST_URLS_KEY = 'urls' # Named tuples. DockerV2ManifestConfig = namedtuple('DockerV2ManifestConfig', ['size', 'digest']) DockerV2ManifestLayer = namedtuple('DockerV2ManifestLayer', ['index', 'size', 'digest', 'is_remote', 'urls']) LayerWithV1ID = namedtuple('LayerWithV1ID', ['layer', 'v1_id', 'v1_parent_id']) logger = logging.getLogger(__name__) class MalformedSchema2Manifest(ManifestException): """ Raised when a manifest fails an assertion that should be true according to the Docker Manifest v2.2 Specification. """ pass class DockerSchema2Manifest(object): METASCHEMA = { 'type': 'object', 'properties': { DOCKER_SCHEMA2_MANIFEST_VERSION_KEY: { 'type': 'number', 'description': 'The version of the schema. Must always be `2`.', 'minimum': 2, 'maximum': 2, }, DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY: { 'type': 'string', 'description': 'The media type of the schema.', 'enum': [DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE], }, DOCKER_SCHEMA2_MANIFEST_CONFIG_KEY: { 'type': 'object', 'description': 'The config field references a configuration object for a container, ' + 'by digest. This configuration item is a JSON blob that the runtime ' + 'uses to set up the container.', 'properties': { DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY: { 'type': 'string', 'description': 'The MIME type of the referenced object. This should generally be ' + 'application/vnd.docker.container.image.v1+json', 'enum': [DOCKER_SCHEMA2_CONFIG_CONTENT_TYPE], }, DOCKER_SCHEMA2_MANIFEST_SIZE_KEY: { 'type': 'number', 'description': 'The size in bytes of the object. This field exists so that a ' + 'client will have an expected size for the content before ' + 'validating. If the length of the retrieved content does not ' + 'match the specified length, the content should not be trusted.', }, DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY: { 'type': 'string', 'description': 'The content addressable digest of the config in the blob store', }, }, 'required': [DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY, DOCKER_SCHEMA2_MANIFEST_SIZE_KEY, DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY], }, DOCKER_SCHEMA2_MANIFEST_LAYERS_KEY: { 'type': 'array', 'description': 'The layer list is ordered starting from the base ' + 'image (opposite order of schema1).', 'items': { 'type': 'object', 'properties': { DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY: { 'type': 'string', 'description': 'The MIME type of the referenced object. This should generally be ' + 'application/vnd.docker.image.rootfs.diff.tar.gzip. Layers of type ' + 'application/vnd.docker.image.rootfs.foreign.diff.tar.gzip may be ' + 'pulled from a remote location but they should never be pushed.', 'enum': [DOCKER_SCHEMA2_LAYER_CONTENT_TYPE, DOCKER_SCHEMA2_REMOTE_LAYER_CONTENT_TYPE], }, DOCKER_SCHEMA2_MANIFEST_SIZE_KEY: { 'type': 'number', 'description': 'The size in bytes of the object. This field exists so that a ' + 'client will have an expected size for the content before ' + 'validating. If the length of the retrieved content does not ' + 'match the specified length, the content should not be trusted.', }, DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY: { 'type': 'string', 'description': 'The content addressable digest of the layer in the blob store', }, }, 'required': [ DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY, DOCKER_SCHEMA2_MANIFEST_SIZE_KEY, DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY, ], }, }, }, 'required': [DOCKER_SCHEMA2_MANIFEST_VERSION_KEY, DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY, DOCKER_SCHEMA2_MANIFEST_CONFIG_KEY, DOCKER_SCHEMA2_MANIFEST_LAYERS_KEY], } def __init__(self, manifest_bytes): self._layers = None try: self._parsed = json.loads(manifest_bytes) except ValueError as ve: raise MalformedSchema2Manifest('malformed manifest data: %s' % ve) try: validate_schema(self._parsed, DockerSchema2Manifest.METASCHEMA) except ValidationError as ve: raise MalformedSchema2Manifest('manifest data does not match schema: %s' % ve) @property def schema_version(self): return 2 @property def config(self): config = self._parsed[DOCKER_SCHEMA2_MANIFEST_CONFIG_KEY] return DockerV2ManifestConfig(size=config[DOCKER_SCHEMA2_MANIFEST_SIZE_KEY], digest=config[DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY]) @property def layers(self): """ Returns the layers of this manifest, from base to leaf. """ if self._layers is None: self._layers = list(self._generate_layers()) return self._layers @property def leaf_layer(self): return self.layers[-1] def _generate_layers(self): for index, layer in enumerate(self._parsed[DOCKER_SCHEMA2_MANIFEST_LAYERS_KEY]): content_type = layer[DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY] is_remote = content_type == DOCKER_SCHEMA2_REMOTE_LAYER_CONTENT_TYPE try: digest = digest_tools.Digest.parse_digest(layer[DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY]) except digest_tools.InvalidDigestException: raise MalformedSchema2Manifest('could not parse manifest digest: %s' % layer[DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY]) yield DockerV2ManifestLayer(index=index, size=layer[DOCKER_SCHEMA2_MANIFEST_SIZE_KEY], digest=digest, is_remote=is_remote, urls=layer.get(DOCKER_SCHEMA2_MANIFEST_URLS_KEY)) @property def layers_with_v1_ids(self): digest_history = hashlib.sha256() v1_layer_parent_id = None v1_layer_id = None for layer in self.layers: v1_layer_parent_id = v1_layer_id # Create a new synthesized V1 ID for the layer by adding its digest and index to the # existing digest history hash builder. This will ensure unique V1s across *all* schemas in # a repository. digest_history.update(str(layer.digest)) digest_history.update("#") digest_history.update(str(layer.index)) digest_history.update("|") v1_layer_id = digest_history.hexdigest() yield LayerWithV1ID(layer=layer, v1_id=v1_layer_id, v1_parent_id=v1_layer_parent_id) def populate_schema1_builder(self, v1_builder, lookup_config_fn): """ Populates a DockerSchema1ManifestBuilder with the layers and config from this schema. The `lookup_config_fn` is a function that, when given the config digest SHA, returns the associated configuration JSON bytes for this schema. """ config_bytes = lookup_config_fn(self.config.digest) if len(config_bytes) != self.config.size: raise MalformedSchema2Manifest('Size of config does not match that retrieved: %s vs %s', len(config_bytes), self.config.size) schema2_config = DockerSchema2Config(config_bytes) # Build the V1 IDs for the layers. layers = list(self.layers_with_v1_ids) for layer_with_ids in reversed(layers): # Schema1 has layers in reverse order v1_compatibility = schema2_config.build_v1_compatibility(layer_with_ids.layer.index, layer_with_ids.v1_id, layer_with_ids.v1_parent_id) v1_builder.add_layer(str(layer_with_ids.layer.digest), json.dumps(v1_compatibility)) return v1_builder