""" schema1 implements pure data transformations according to the Docker Manifest v2.1 Specification. https://github.com/docker/distribution/blob/master/docs/spec/manifest-v2-1.md """ import hashlib import json import logging from collections import namedtuple, OrderedDict from datetime import datetime import dateutil.parser from jsonschema import validate as validate_schema, ValidationError from jwkest.jws import SIGNER_ALGS, keyrep, BadSignature from jwt.utils import base64url_encode, base64url_decode from digest import digest_tools from image.docker import ManifestException from image.docker.types import ManifestImageLayer from image.docker.interfaces import ManifestInterface from image.docker.v1 import DockerV1Metadata from image.docker.schemautil import to_canonical_json from util.bytes import Bytes logger = logging.getLogger(__name__) # Content Types DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v1+json' DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v1+prettyjws' DOCKER_SCHEMA1_CONTENT_TYPES = {DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE, DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE} # Keys for signature-related data DOCKER_SCHEMA1_SIGNATURES_KEY = 'signatures' DOCKER_SCHEMA1_HEADER_KEY = 'header' DOCKER_SCHEMA1_SIGNATURE_KEY = 'signature' DOCKER_SCHEMA1_PROTECTED_KEY = 'protected' DOCKER_SCHEMA1_FORMAT_LENGTH_KEY = 'formatLength' DOCKER_SCHEMA1_FORMAT_TAIL_KEY = 'formatTail' # Keys for manifest-related data DOCKER_SCHEMA1_REPO_NAME_KEY = 'name' DOCKER_SCHEMA1_REPO_TAG_KEY = 'tag' DOCKER_SCHEMA1_ARCH_KEY = 'architecture' DOCKER_SCHEMA1_FS_LAYERS_KEY = 'fsLayers' DOCKER_SCHEMA1_BLOB_SUM_KEY = 'blobSum' DOCKER_SCHEMA1_HISTORY_KEY = 'history' DOCKER_SCHEMA1_V1_COMPAT_KEY = 'v1Compatibility' DOCKER_SCHEMA1_SCHEMA_VER_KEY = 'schemaVersion' # Format for time used in the protected payload. _ISO_DATETIME_FORMAT_ZULU = '%Y-%m-%dT%H:%M:%SZ' # The algorithm we use to sign the JWS. _JWS_SIGNING_ALGORITHM = 'RS256' class MalformedSchema1Manifest(ManifestException): """ Raised when a manifest fails an assertion that should be true according to the Docker Manifest v2.1 Specification. """ pass class InvalidSchema1Signature(ManifestException): """ Raised when there is a failure verifying the signature of a signed Docker 2.1 Manifest. """ pass class Schema1Layer(namedtuple('Schema1Layer', ['digest', 'v1_metadata', 'raw_v1_metadata', 'compressed_size', 'is_remote', 'urls'])): """ Represents all of the data about an individual layer in a given Manifest. This is the union of the fsLayers (digest) and the history entries (v1_compatibility). """ class Schema1V1Metadata(namedtuple('Schema1V1Metadata', ['image_id', 'parent_image_id', 'created', 'comment', 'command', 'author', 'labels'])): """ Represents the necessary data extracted from the v1 compatibility string in a given layer of a Manifest. """ class DockerSchema1Manifest(ManifestInterface): METASCHEMA = { 'type': 'object', 'properties': { DOCKER_SCHEMA1_SIGNATURES_KEY: { 'type': 'array', 'items': { 'type': 'object', 'properties': { DOCKER_SCHEMA1_PROTECTED_KEY: { 'type': 'string', }, DOCKER_SCHEMA1_HEADER_KEY: { 'type': 'object', 'properties': { 'alg': { 'type': 'string', }, 'jwk': { 'type': 'object', }, }, 'required': ['alg', 'jwk'], }, DOCKER_SCHEMA1_SIGNATURE_KEY: { 'type': 'string', }, }, 'required': [DOCKER_SCHEMA1_PROTECTED_KEY, DOCKER_SCHEMA1_HEADER_KEY, DOCKER_SCHEMA1_SIGNATURE_KEY], }, }, DOCKER_SCHEMA1_REPO_TAG_KEY: { 'type': 'string', }, DOCKER_SCHEMA1_REPO_NAME_KEY: { 'type': 'string', }, DOCKER_SCHEMA1_HISTORY_KEY: { 'type': 'array', 'items': { 'type': 'object', 'properties': { DOCKER_SCHEMA1_V1_COMPAT_KEY: { 'type': 'string', }, }, 'required': [DOCKER_SCHEMA1_V1_COMPAT_KEY], }, }, DOCKER_SCHEMA1_FS_LAYERS_KEY: { 'type': 'array', 'items': { 'type': 'object', 'properties': { DOCKER_SCHEMA1_BLOB_SUM_KEY: { 'type': 'string', }, }, 'required': [DOCKER_SCHEMA1_BLOB_SUM_KEY], }, }, }, 'required': [DOCKER_SCHEMA1_REPO_TAG_KEY, DOCKER_SCHEMA1_REPO_NAME_KEY, DOCKER_SCHEMA1_FS_LAYERS_KEY, DOCKER_SCHEMA1_HISTORY_KEY], } def __init__(self, manifest_bytes, validate=True): assert isinstance(manifest_bytes, Bytes) self._layers = None self._bytes = manifest_bytes try: self._parsed = json.loads(manifest_bytes.as_encoded_str()) except ValueError as ve: raise MalformedSchema1Manifest('malformed manifest data: %s' % ve) try: validate_schema(self._parsed, DockerSchema1Manifest.METASCHEMA) except ValidationError as ve: raise MalformedSchema1Manifest('manifest data does not match schema: %s' % ve) self._signatures = self._parsed.get(DOCKER_SCHEMA1_SIGNATURES_KEY) self._architecture = self._parsed.get(DOCKER_SCHEMA1_ARCH_KEY) self._tag = self._parsed[DOCKER_SCHEMA1_REPO_TAG_KEY] repo_name = self._parsed[DOCKER_SCHEMA1_REPO_NAME_KEY] repo_name_tuple = repo_name.split('/') if len(repo_name_tuple) > 1: self._namespace, self._repo_name = repo_name_tuple elif len(repo_name_tuple) == 1: self._namespace = '' self._repo_name = repo_name_tuple[0] else: raise MalformedSchema1Manifest('malformed repository name: %s' % repo_name) if validate: self._validate() def _validate(self): if not self._signatures: return payload_str = self._payload for signature in self._signatures: bytes_to_verify = '{0}.{1}'.format(signature['protected'], base64url_encode(payload_str)) signer = SIGNER_ALGS[signature['header']['alg']] key = keyrep(signature['header']['jwk']) gk = key.get_key() sig = base64url_decode(signature['signature'].encode('utf-8')) try: verified = signer.verify(bytes_to_verify, sig, gk) except BadSignature: raise InvalidSchema1Signature() if not verified: raise InvalidSchema1Signature() @property def is_manifest_list(self): return False @property def schema_version(self): return 1 @property def content_type(self): return (DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE if self._signatures else DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE) @property def media_type(self): return self.content_type @property def signatures(self): return self._signatures @property def namespace(self): return self._namespace @property def repo_name(self): return self._repo_name @property def tag(self): return self._tag @property def bytes(self): return self._bytes @property def manifest_json(self): return self._parsed @property def manifest_dict(self): return self._parsed @property def layers_compressed_size(self): return None @property def digest(self): return digest_tools.sha256_digest(self._payload) @property def image_ids(self): return {mdata.v1_metadata.image_id for mdata in self.layers} @property def parent_image_ids(self): return {mdata.v1_metadata.parent_image_id for mdata in self.layers if mdata.v1_metadata.parent_image_id} @property def checksums(self): return list({str(mdata.digest) for mdata in self.layers}) @property def leaf_layer(self): return self.layers[-1] @property def created_datetime(self): created_datetime_str = self.leaf_layer.v1_metadata.created if created_datetime_str is None: return None try: return dateutil.parser.parse(created_datetime_str).replace(tzinfo=None) except: # parse raises different exceptions, so we cannot use a specific kind of handler here. return None @property def layers(self): if self._layers is None: self._layers = list(self._generate_layers()) return self._layers def get_layers(self, content_retriever): """ Returns the layers of this manifest, from base to leaf or None if this kind of manifest does not support layers. """ for layer in self.layers: created_datetime = None try: created_datetime = dateutil.parser.parse(layer.v1_metadata.created).replace(tzinfo=None) except: pass yield ManifestImageLayer(layer_id=layer.v1_metadata.image_id, compressed_size=layer.compressed_size, is_remote=False, urls=None, command=layer.v1_metadata.command, comment=layer.v1_metadata.comment, author=layer.v1_metadata.author, blob_digest=layer.digest, created_datetime=created_datetime, internal_layer=layer) @property def blob_digests(self): return [str(layer.digest) for layer in self.layers] @property def local_blob_digests(self): return self.blob_digests def child_manifests(self, content_retriever): return None def get_manifest_labels(self, content_retriever): return self.layers[-1].v1_metadata.labels def get_requires_empty_layer_blob(self, content_retriever): return False def unsigned(self): if self.media_type == DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE: return self # Create an unsigned version of the manifest. builder = DockerSchema1ManifestBuilder(self._namespace, self._repo_name, self._tag, self._architecture) for layer in reversed(self.layers): builder.add_layer(str(layer.digest), layer.raw_v1_metadata) return builder.build() def with_tag_name(self, tag_name, json_web_key=None): """ Returns a copy of this manifest, with the tag changed to the given tag name. """ builder = DockerSchema1ManifestBuilder(self._namespace, self._repo_name, tag_name, self._architecture) for layer in reversed(self.layers): builder.add_layer(str(layer.digest), layer.raw_v1_metadata) return builder.build(json_web_key) def _generate_layers(self): """ Returns a generator of objects that have the blobSum and v1Compatibility keys in them, starting from the base image and working toward the leaf node. """ for blob_sum_obj, history_obj in reversed(zip(self._parsed[DOCKER_SCHEMA1_FS_LAYERS_KEY], self._parsed[DOCKER_SCHEMA1_HISTORY_KEY])): try: image_digest = digest_tools.Digest.parse_digest(blob_sum_obj[DOCKER_SCHEMA1_BLOB_SUM_KEY]) except digest_tools.InvalidDigestException: raise MalformedSchema1Manifest('could not parse manifest digest: %s' % blob_sum_obj[DOCKER_SCHEMA1_BLOB_SUM_KEY]) metadata_string = history_obj[DOCKER_SCHEMA1_V1_COMPAT_KEY] v1_metadata = json.loads(metadata_string) command_list = v1_metadata.get('container_config', {}).get('Cmd', None) command = to_canonical_json(command_list) if command_list else None if not 'id' in v1_metadata: raise MalformedSchema1Manifest('id field missing from v1Compatibility JSON') labels = v1_metadata.get('config', {}).get('Labels', {}) or {} extracted = Schema1V1Metadata(image_id=v1_metadata['id'], parent_image_id=v1_metadata.get('parent'), created=v1_metadata.get('created'), comment=v1_metadata.get('comment'), author=v1_metadata.get('author'), command=command, labels=labels) compressed_size = v1_metadata.get('Size') yield Schema1Layer(image_digest, extracted, metadata_string, compressed_size, False, None) @property def _payload(self): if self._signatures is None: return self._bytes.as_encoded_str() byte_data = self._bytes.as_encoded_str() protected = str(self._signatures[0][DOCKER_SCHEMA1_PROTECTED_KEY]) parsed_protected = json.loads(base64url_decode(protected)) signed_content_head = byte_data[:parsed_protected[DOCKER_SCHEMA1_FORMAT_LENGTH_KEY]] signed_content_tail = base64url_decode(str(parsed_protected[DOCKER_SCHEMA1_FORMAT_TAIL_KEY])) return signed_content_head + signed_content_tail def generate_legacy_layers(self, images_map, content_retriever): return self.rewrite_invalid_image_ids(images_map) def get_legacy_image_ids(self, content_retriever): return self.legacy_image_ids @property def legacy_image_ids(self): return {mdata.v1_metadata.image_id for mdata in self.layers} @property def has_legacy_image(self): return True @property def leaf_layer_v1_image_id(self): return self.layers[-1].v1_metadata.image_id def get_leaf_layer_v1_image_id(self, content_retriever): return self.layers[-1].v1_metadata.image_id def get_schema1_manifest(self, namespace_name, repo_name, tag_name, content_retriever): """ Returns the manifest that is compatible with V1, by virtue of being `amd64` and `linux`. If none, returns None. """ # Note: schema1 *technically* supports non-amd64 architectures, but in practice these were never # used, so to ensure full backwards compatibility, we just always return the schema. return self def convert_manifest(self, allowed_mediatypes, namespace_name, repo_name, tag_name, content_retriever): if self.media_type in allowed_mediatypes: return self unsigned = self.unsigned() if unsigned.media_type in allowed_mediatypes: return unsigned return None def rewrite_invalid_image_ids(self, images_map): """ Rewrites Docker v1 image IDs and returns a generator of DockerV1Metadata. If Docker gives us a layer with a v1 image ID that already points to existing content, but the checksums don't match, then we need to rewrite the image ID to something new in order to ensure consistency. """ # Used to synthesize a new "content addressable" image id digest_history = hashlib.sha256() has_rewritten_ids = False updated_id_map = {} for layer in self.layers: digest_str = str(layer.digest) extracted_v1_metadata = layer.v1_metadata working_image_id = extracted_v1_metadata.image_id # Update our digest_history hash for the new layer data. digest_history.update(digest_str) digest_history.update("@") digest_history.update(layer.raw_v1_metadata.encode('utf-8')) digest_history.update("|") # Ensure that the v1 image's storage matches the V2 blob. If not, we've # found a data inconsistency and need to create a new layer ID for the V1 # image, and all images that follow it in the ancestry chain. digest_mismatch = (extracted_v1_metadata.image_id in images_map and images_map[extracted_v1_metadata.image_id].content_checksum != digest_str) if digest_mismatch or has_rewritten_ids: working_image_id = digest_history.hexdigest() has_rewritten_ids = True # Store the new docker id in the map updated_id_map[extracted_v1_metadata.image_id] = working_image_id # Lookup the parent image for the layer, if any. parent_image_id = extracted_v1_metadata.parent_image_id if parent_image_id is not None: parent_image_id = updated_id_map.get(parent_image_id, parent_image_id) # Synthesize and store the v1 metadata in the db. v1_metadata_json = layer.raw_v1_metadata if has_rewritten_ids: v1_metadata_json = _updated_v1_metadata(v1_metadata_json, updated_id_map) updated_image = DockerV1Metadata( namespace_name=self.namespace, repo_name=self.repo_name, image_id=working_image_id, created=extracted_v1_metadata.created, comment=extracted_v1_metadata.comment, author=extracted_v1_metadata.author, command=extracted_v1_metadata.command, compat_json=v1_metadata_json, parent_image_id=parent_image_id, checksum=None, # TODO: Check if we need this. content_checksum=digest_str, ) yield updated_image class DockerSchema1ManifestBuilder(object): """ A convenient abstraction around creating new DockerSchema1Manifests. """ def __init__(self, namespace_name, repo_name, tag, architecture='amd64'): repo_name_key = '{0}/{1}'.format(namespace_name, repo_name) if namespace_name == '': repo_name_key = repo_name self._base_payload = { DOCKER_SCHEMA1_REPO_TAG_KEY: tag, DOCKER_SCHEMA1_REPO_NAME_KEY: repo_name_key, DOCKER_SCHEMA1_ARCH_KEY: architecture, DOCKER_SCHEMA1_SCHEMA_VER_KEY: 1, } self._fs_layer_digests = [] self._history = [] def add_layer(self, layer_digest, v1_json_metadata): self._fs_layer_digests.append({ DOCKER_SCHEMA1_BLOB_SUM_KEY: layer_digest, }) self._history.append({ DOCKER_SCHEMA1_V1_COMPAT_KEY: v1_json_metadata, }) return self def build(self, json_web_key=None, ensure_ascii=True): """ Builds a DockerSchema1Manifest object, with optional signature. """ payload = OrderedDict(self._base_payload) payload.update({ DOCKER_SCHEMA1_HISTORY_KEY: self._history, DOCKER_SCHEMA1_FS_LAYERS_KEY: self._fs_layer_digests, }) payload_str = json.dumps(payload, indent=3, ensure_ascii=ensure_ascii) if json_web_key is None: return DockerSchema1Manifest(Bytes.for_string_or_unicode(payload_str)) payload_str = Bytes.for_string_or_unicode(payload_str).as_encoded_str() split_point = payload_str.rfind('\n}') protected_payload = { 'formatTail': base64url_encode(payload_str[split_point:]), 'formatLength': split_point, 'time': datetime.utcnow().strftime(_ISO_DATETIME_FORMAT_ZULU), } protected = base64url_encode(json.dumps(protected_payload, ensure_ascii=ensure_ascii)) logger.debug('Generated protected block: %s', protected) bytes_to_sign = '{0}.{1}'.format(protected, base64url_encode(payload_str)) signer = SIGNER_ALGS[_JWS_SIGNING_ALGORITHM] signature = base64url_encode(signer.sign(bytes_to_sign, json_web_key.get_key())) logger.debug('Generated signature: %s', signature) public_members = set(json_web_key.public_members) public_key = {comp: value for comp, value in json_web_key.to_dict().items() if comp in public_members} signature_block = { DOCKER_SCHEMA1_HEADER_KEY: {'jwk': public_key, 'alg': _JWS_SIGNING_ALGORITHM}, DOCKER_SCHEMA1_SIGNATURE_KEY: signature, DOCKER_SCHEMA1_PROTECTED_KEY: protected, } logger.debug('Encoded signature block: %s', json.dumps(signature_block)) payload.update({DOCKER_SCHEMA1_SIGNATURES_KEY: [signature_block]}) json_str = json.dumps(payload, indent=3, ensure_ascii=ensure_ascii) return DockerSchema1Manifest(Bytes.for_string_or_unicode(json_str)) def _updated_v1_metadata(v1_metadata_json, updated_id_map): """ Updates v1_metadata with new image IDs. """ parsed = json.loads(v1_metadata_json) parsed['id'] = updated_id_map[parsed['id']] if parsed.get('parent') and parsed['parent'] in updated_id_map: parsed['parent'] = updated_id_map[parsed['parent']] if parsed.get('container_config', {}).get('Image'): existing_image = parsed['container_config']['Image'] if existing_image in updated_id_map: parsed['container_config']['image'] = updated_id_map[existing_image] return to_canonical_json(parsed)