import json import logging import hashlib from collections import namedtuple from jsonschema import validate as validate_schema, ValidationError from digest import digest_tools from image.docker import ManifestException from image.docker.interfaces import ManifestInterface from image.docker.schema2 import (DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE, DOCKER_SCHEMA2_CONFIG_CONTENT_TYPE, DOCKER_SCHEMA2_LAYER_CONTENT_TYPE, DOCKER_SCHEMA2_REMOTE_LAYER_CONTENT_TYPE) from image.docker.schema1 import DockerSchema1ManifestBuilder from image.docker.schema2.config import DockerSchema2Config # Keys. DOCKER_SCHEMA2_MANIFEST_VERSION_KEY = 'schemaVersion' DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY = 'mediaType' DOCKER_SCHEMA2_MANIFEST_CONFIG_KEY = 'config' DOCKER_SCHEMA2_MANIFEST_SIZE_KEY = 'size' DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY = 'digest' DOCKER_SCHEMA2_MANIFEST_LAYERS_KEY = 'layers' DOCKER_SCHEMA2_MANIFEST_URLS_KEY = 'urls' # Named tuples. DockerV2ManifestConfig = namedtuple('DockerV2ManifestConfig', ['size', 'digest']) DockerV2ManifestLayer = namedtuple('DockerV2ManifestLayer', ['index', 'digest', 'is_remote', 'urls', 'compressed_size']) ManifestImageLayer = namedtuple('ManifestImageLayer', ['history', 'blob_layer', 'v1_id', 'v1_parent_id', 'compressed_size', 'blob_digest']) EMPTY_BLOB_DIGEST = 'sha256:a3ed95caeb02ffe68cdd9fd84406680ae93d633cb16422d00e8a7c22955b46d4' logger = logging.getLogger(__name__) class MalformedSchema2Manifest(ManifestException): """ Raised when a manifest fails an assertion that should be true according to the Docker Manifest v2.2 Specification. """ pass class DockerSchema2Manifest(ManifestInterface): METASCHEMA = { 'type': 'object', 'properties': { DOCKER_SCHEMA2_MANIFEST_VERSION_KEY: { 'type': 'number', 'description': 'The version of the schema. Must always be `2`.', 'minimum': 2, 'maximum': 2, }, DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY: { 'type': 'string', 'description': 'The media type of the schema.', 'enum': [DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE], }, DOCKER_SCHEMA2_MANIFEST_CONFIG_KEY: { 'type': 'object', 'description': 'The config field references a configuration object for a container, ' + 'by digest. This configuration item is a JSON blob that the runtime ' + 'uses to set up the container.', 'properties': { DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY: { 'type': 'string', 'description': 'The MIME type of the referenced object. This should generally be ' + 'application/vnd.docker.container.image.v1+json', 'enum': [DOCKER_SCHEMA2_CONFIG_CONTENT_TYPE], }, DOCKER_SCHEMA2_MANIFEST_SIZE_KEY: { 'type': 'number', 'description': 'The size in bytes of the object. This field exists so that a ' + 'client will have an expected size for the content before ' + 'validating. If the length of the retrieved content does not ' + 'match the specified length, the content should not be trusted.', }, DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY: { 'type': 'string', 'description': 'The content addressable digest of the config in the blob store', }, }, 'required': [DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY, DOCKER_SCHEMA2_MANIFEST_SIZE_KEY, DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY], }, DOCKER_SCHEMA2_MANIFEST_LAYERS_KEY: { 'type': 'array', 'description': 'The layer list is ordered starting from the base ' + 'image (opposite order of schema1).', 'items': { 'type': 'object', 'properties': { DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY: { 'type': 'string', 'description': 'The MIME type of the referenced object. This should generally be ' + 'application/vnd.docker.image.rootfs.diff.tar.gzip. Layers of type ' + 'application/vnd.docker.image.rootfs.foreign.diff.tar.gzip may be ' + 'pulled from a remote location but they should never be pushed.', 'enum': [DOCKER_SCHEMA2_LAYER_CONTENT_TYPE, DOCKER_SCHEMA2_REMOTE_LAYER_CONTENT_TYPE], }, DOCKER_SCHEMA2_MANIFEST_SIZE_KEY: { 'type': 'number', 'description': 'The size in bytes of the object. This field exists so that a ' + 'client will have an expected size for the content before ' + 'validating. If the length of the retrieved content does not ' + 'match the specified length, the content should not be trusted.', }, DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY: { 'type': 'string', 'description': 'The content addressable digest of the layer in the blob store', }, }, 'required': [ DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY, DOCKER_SCHEMA2_MANIFEST_SIZE_KEY, DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY, ], }, }, }, 'required': [DOCKER_SCHEMA2_MANIFEST_VERSION_KEY, DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY, DOCKER_SCHEMA2_MANIFEST_CONFIG_KEY, DOCKER_SCHEMA2_MANIFEST_LAYERS_KEY], } def __init__(self, manifest_bytes): self._layers = None self._payload = manifest_bytes try: self._parsed = json.loads(manifest_bytes) except ValueError as ve: raise MalformedSchema2Manifest('malformed manifest data: %s' % ve) try: validate_schema(self._parsed, DockerSchema2Manifest.METASCHEMA) except ValidationError as ve: raise MalformedSchema2Manifest('manifest data does not match schema: %s' % ve) for layer in self.layers: if layer.is_remote and not layer.urls: raise MalformedSchema2Manifest('missing `urls` for remote layer') @property def is_manifest_list(self): return False @property def schema_version(self): return 2 @property def manifest_dict(self): return self._parsed @property def media_type(self): return self._parsed[DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY] @property def digest(self): return digest_tools.sha256_digest(self._payload) @property def config(self): config = self._parsed[DOCKER_SCHEMA2_MANIFEST_CONFIG_KEY] return DockerV2ManifestConfig(size=config[DOCKER_SCHEMA2_MANIFEST_SIZE_KEY], digest=config[DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY]) @property def layers(self): """ Returns the layers of this manifest, from base to leaf. """ if self._layers is None: self._layers = list(self._generate_layers()) return self._layers @property def leaf_layer(self): """ Returns the leaf layer for this manifest. """ return self.layers[-1] @property def layers_compressed_size(self): return sum(layer.compressed_size for layer in self.layers) @property def has_remote_layer(self): for layer in self.layers: if layer.is_remote: return True return False @property def blob_digests(self): return [str(layer.digest) for layer in self.layers] + [str(self.config.digest)] @property def local_blob_digests(self): return ([str(layer.digest) for layer in self.layers if not layer.urls] + [str(self.config.digest)]) def get_manifest_labels(self, content_retriever): return self._get_built_config(content_retriever).labels @property def bytes(self): return self._payload def child_manifests(self, content_retriever): return None def _manifest_image_layers(self, content_retriever, schema2_config=None): assert not self.has_remote_layer # Retrieve the configuration for the manifest. config = schema2_config or self._get_built_config(content_retriever) history = list(config.history) if len(history) < len(self.layers): raise MalformedSchema2Manifest('Found less history than layer blobs') digest_history = hashlib.sha256() v1_layer_parent_id = None v1_layer_id = None blob_index = 0 for history_index, history_entry in enumerate(history): if not history_entry.is_empty and blob_index >= len(self.layers): raise MalformedSchema2Manifest('Missing history entry #%s' % blob_index) v1_layer_parent_id = v1_layer_id blob_layer = None if history_entry.is_empty else self.layers[blob_index] blob_digest = EMPTY_BLOB_DIGEST if blob_layer is None else str(blob_layer.digest) compressed_size = 0 if blob_layer is None else blob_layer.compressed_size # Create a new synthesized V1 ID for the history layer by hashing its content and # the blob associated withn it. digest_history.update(json.dumps(history_entry.raw_entry)) digest_history.update("|") digest_history.update(str(history_index)) digest_history.update("|") digest_history.update(blob_digest) digest_history.update("||") v1_layer_id = digest_history.hexdigest() yield ManifestImageLayer(history=history_entry, blob_layer=blob_layer, blob_digest=blob_digest, v1_id=v1_layer_id, v1_parent_id=v1_layer_parent_id, compressed_size=compressed_size) if not history_entry.is_empty: blob_index += 1 @property def has_legacy_image(self): return not self.has_remote_layer def generate_legacy_layers(self, images_map, content_retriever): assert not self.has_remote_layer # NOTE: We use the DockerSchema1ManifestBuilder here because it already contains # the logic for generating the DockerV1Metadata. All of this will go away once we get # rid of legacy images in the database, so this is a temporary solution. v1_builder = DockerSchema1ManifestBuilder('', '', '') self._populate_schema1_builder(v1_builder, content_retriever) return v1_builder.build().generate_legacy_layers(images_map, content_retriever) def get_leaf_layer_v1_image_id(self, content_retriever): # NOTE: If there exists a layer with remote content, then we consider this manifest # to not support legacy images. if self.has_remote_layer: return None return list(self._manifest_image_layers(content_retriever))[-1].v1_id def get_legacy_image_ids(self, content_retriever): if self.has_remote_layer: return None return [l.v1_id for l in self._manifest_image_layers(content_retriever)] def get_schema1_manifest(self, namespace_name, repo_name, tag_name, content_retriever): if self.has_remote_layer: return None v1_builder = DockerSchema1ManifestBuilder(namespace_name, repo_name, tag_name) self._populate_schema1_builder(v1_builder, content_retriever) return v1_builder.build() def unsigned(self): return self def _populate_schema1_builder(self, v1_builder, content_retriever): """ Populates a DockerSchema1ManifestBuilder with the layers and config from this schema. """ assert not self.has_remote_layer schema2_config = self._get_built_config(content_retriever) layers = list(self._manifest_image_layers(content_retriever, schema2_config)) for index, layer in enumerate(reversed(layers)): # Schema 1 layers are in reverse order v1_compatibility = schema2_config.build_v1_compatibility(layer.history, layer.v1_id, layer.v1_parent_id, index == 0, layer.compressed_size) v1_builder.add_layer(str(layer.blob_digest), json.dumps(v1_compatibility)) return v1_builder def _get_built_config(self, content_retriever): config_bytes = content_retriever.get_blob_bytes_with_digest(self.config.digest) if config_bytes is None: raise MalformedSchema2Manifest('Could not load config blob for manifest') if len(config_bytes) != self.config.size: raise MalformedSchema2Manifest('Size of config does not match that retrieved: %s vs %s', len(config_bytes), self.config.size) return DockerSchema2Config(config_bytes) def _generate_layers(self): for index, layer in enumerate(self._parsed[DOCKER_SCHEMA2_MANIFEST_LAYERS_KEY]): content_type = layer[DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY] is_remote = content_type == DOCKER_SCHEMA2_REMOTE_LAYER_CONTENT_TYPE try: digest = digest_tools.Digest.parse_digest(layer[DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY]) except digest_tools.InvalidDigestException: raise MalformedSchema2Manifest('could not parse manifest digest: %s' % layer[DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY]) yield DockerV2ManifestLayer(index=index, compressed_size=layer[DOCKER_SCHEMA2_MANIFEST_SIZE_KEY], digest=digest, is_remote=is_remote, urls=layer.get(DOCKER_SCHEMA2_MANIFEST_URLS_KEY)) class DockerSchema2ManifestBuilder(object): """ A convenient abstraction around creating new DockerSchema2Manifests. """ def __init__(self): self.config = None self.layers = [] def set_config(self, schema2_config): """ Sets the configuration for the manifest being built. """ self.set_config_digest(schema2_config.digest, schema2_config.size) def set_config_digest(self, config_digest, config_size): """ Sets the digest and size of the configuration layer. """ self.config = DockerV2ManifestConfig(size=config_size, digest=config_digest) def add_layer(self, digest, size, urls=None): """ Adds a layer to the manifest. """ self.layers.append(DockerV2ManifestLayer(index=len(self.layers), digest=digest, compressed_size=size, urls=urls, is_remote=bool(urls))) def build(self): """ Builds and returns the DockerSchema2Manifest. """ assert self.layers assert self.config def _build_layer(layer): if layer.urls: return { DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY: DOCKER_SCHEMA2_REMOTE_LAYER_CONTENT_TYPE, DOCKER_SCHEMA2_MANIFEST_SIZE_KEY: layer.compressed_size, DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY: str(layer.digest), DOCKER_SCHEMA2_MANIFEST_URLS_KEY: layer.urls, } return { DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY: DOCKER_SCHEMA2_LAYER_CONTENT_TYPE, DOCKER_SCHEMA2_MANIFEST_SIZE_KEY: layer.compressed_size, DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY: str(layer.digest), } manifest_dict = { DOCKER_SCHEMA2_MANIFEST_VERSION_KEY: 2, DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY: DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE, # Config DOCKER_SCHEMA2_MANIFEST_CONFIG_KEY: { DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY: DOCKER_SCHEMA2_CONFIG_CONTENT_TYPE, DOCKER_SCHEMA2_MANIFEST_SIZE_KEY: self.config.size, DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY: str(self.config.digest), }, # Layers DOCKER_SCHEMA2_MANIFEST_LAYERS_KEY: [ _build_layer(layer) for layer in self.layers ], } return DockerSchema2Manifest(json.dumps(manifest_dict, indent=3))