import json import hashlib import logging from collections import namedtuple, OrderedDict from datetime import datetime from jwkest.jws import SIGNER_ALGS, keyrep from jwt.utils import base64url_encode, base64url_decode from digest import digest_tools logger = logging.getLogger(__name__) DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v1+prettyjws' DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v2+json' DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.list.v2+json' DOCKER_SCHEMA2_CONTENT_TYPES = [DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE, DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE] # These are used to extract backwards compatiblity data from Docker Manifest Schema 1 ExtractedLayerMetadata = namedtuple( 'ExtractedLayerMetadata', ['digest', 'v1_metadata', 'v1_metadata_str'] ) ExtractedDockerV1Metadata = namedtuple( 'ExtractedDockerV1Metadata', ['image_id', 'parent_image_id', 'created', 'comment', 'command'] ) # Constants used for Docker Manifest Schema 2.1 _DOCKER_SCHEMA_1_SIGNATURES_KEY = 'signatures' _DOCKER_SCHEMA_1_PROTECTED_KEY = 'protected' _DOCKER_SCHEMA_1_FORMAT_LENGTH_KEY = 'formatLength' _DOCKER_SCHEMA_1_FORMAT_TAIL_KEY = 'formatTail' _DOCKER_SCHEMA_1_REPO_NAME_KEY = 'name' _DOCKER_SCHEMA_1_REPO_TAG_KEY = 'tag' _DOCKER_SCHEMA_1_FS_LAYERS_KEY = 'fsLayers' _DOCKER_SCHEMA_1_HISTORY_KEY = 'history' _DOCKER_SCHEMA_1_BLOB_SUM_KEY = 'blobSum' _DOCKER_SCHEMA_1_V1_COMPAT_KEY = 'v1Compatibility' _DOCKER_SCHEMA_1_ARCH_KEY = 'architecture' _DOCKER_SCHEMA_1_SCHEMA_VER_KEY = 'schemaVersion' _ISO_DATETIME_FORMAT_ZULU = '%Y-%m-%dT%H:%M:%SZ' _JWS_ALGORITHM = 'RS256' class ManifestException(Exception): pass class ManifestMalformed(ManifestException): pass class ManifestSignatureFailure(ManifestException): pass def _updated_v1_metadata(v1_metadata_json, updated_id_map): parsed = json.loads(v1_metadata_json) parsed['id'] = updated_id_map[parsed['id']] if parsed.get('parent') and parsed['parent'] in updated_id_map: parsed['parent'] = updated_id_map[parsed['parent']] if parsed.get('container_config', {}).get('Image'): existing_image = parsed['container_config']['Image'] if existing_image in updated_id_map: parsed['container_config']['image'] = updated_id_map[existing_image] return json.dumps(parsed) class DockerSchema1Manifest(object): def __init__(self, manifest_bytes, validate=True): self._layers = None self._bytes = manifest_bytes self._parsed = json.loads(manifest_bytes) self._signatures = self._parsed[_DOCKER_SCHEMA_1_SIGNATURES_KEY] self._tag = self._parsed[_DOCKER_SCHEMA_1_REPO_TAG_KEY] repo_name_tuple = self._parsed[_DOCKER_SCHEMA_1_REPO_NAME_KEY].split('/') if len(repo_name_tuple) > 1: self._namespace, self._repo_name = repo_name_tuple elif len(repo_name_tuple) == 1: self._namespace = '' self._repo_name = repo_name_tuple[0] else: raise ManifestMalformed('malformed repository name') if validate: self._validate() def _validate(self): for signature in self._signatures: bytes_to_verify = '{0}.{1}'.format(signature['protected'], base64url_encode(self.payload)) signer = SIGNER_ALGS[signature['header']['alg']] key = keyrep(signature['header']['jwk']) gk = key.get_key() sig = base64url_decode(signature['signature'].encode('utf-8')) verified = signer.verify(bytes_to_verify, sig, gk) if not verified: raise ManifestSignatureFailure() @property def signatures(self): return self._signatures @property def namespace(self): return self._namespace @property def repo_name(self): return self._repo_name @property def tag(self): return self._tag @property def bytes(self): return self._bytes @property def manifest_json(self): return self._parsed @property def digest(self): return digest_tools.sha256_digest(self.payload) @property def image_ids(self): return {mdata.v1_metadata.image_id for mdata in self.layers} @property def parent_image_ids(self): return {mdata.v1_metadata.parent_image_id for mdata in self.layers if mdata.v1_metadata.parent_image_id} @property def checksums(self): return list({str(mdata.digest) for mdata in self.layers}) @property def layers(self): if self._layers is None: self._layers = list(self._generate_layers()) return self._layers def _generate_layers(self): """ Returns a generator of objects that have the blobSum and v1Compatibility keys in them, starting from the base image and working toward the leaf node. """ for blob_sum_obj, history_obj in reversed(zip(self._parsed[_DOCKER_SCHEMA_1_FS_LAYERS_KEY], self._parsed[_DOCKER_SCHEMA_1_HISTORY_KEY])): try: image_digest = digest_tools.Digest.parse_digest(blob_sum_obj[_DOCKER_SCHEMA_1_BLOB_SUM_KEY]) except digest_tools.InvalidDigestException: raise ManifestMalformed('could not parse manifest digest: %s' % blob_sum_obj[_DOCKER_SCHEMA_1_BLOB_SUM_KEY]) metadata_string = history_obj[_DOCKER_SCHEMA_1_V1_COMPAT_KEY] v1_metadata = json.loads(metadata_string) command_list = v1_metadata.get('container_config', {}).get('Cmd', None) command = json.dumps(command_list) if command_list else None if not 'id' in v1_metadata: raise ManifestMalformed('invalid manifest v1 history') extracted = ExtractedDockerV1Metadata(v1_metadata['id'], v1_metadata.get('parent'), v1_metadata.get('created'), v1_metadata.get('comment'), command) yield ExtractedLayerMetadata(image_digest, extracted, metadata_string) @property def payload(self): protected = str(self._signatures[0][_DOCKER_SCHEMA_1_PROTECTED_KEY]) parsed_protected = json.loads(base64url_decode(protected)) signed_content_head = self._bytes[:parsed_protected[_DOCKER_SCHEMA_1_FORMAT_LENGTH_KEY]] signed_content_tail = base64url_decode(str(parsed_protected[_DOCKER_SCHEMA_1_FORMAT_TAIL_KEY])) return signed_content_head + signed_content_tail def rewrite_invalid_image_ids(self, images_map): """ Rewrites Docker v1 image IDs and returns a generator of DockerV1Metadata. If Docker gives us a layer with a v1 image ID that already points to existing content, but the checksums don't match, then we need to rewrite the image ID to something new in order to ensure consistency. """ # used to synthesize a new "content addressable" image id digest_history = hashlib.sha256() has_rewritten_ids = False updated_id_map = {} for extracted_layer_metadata in self.layers: digest_str = str(extracted_layer_metadata.digest) extracted_v1_metadata = extracted_layer_metadata.v1_metadata working_image_id = extracted_v1_metadata.image_id # Update our digest_history hash for the new layer data. digest_history.update(digest_str) digest_history.update("@") digest_history.update(extracted_layer_metadata.v1_metadata_str.encode('utf-8')) digest_history.update("|") # Ensure that the v1 image's storage matches the V2 blob. If not, we've # found a data inconsistency and need to create a new layer ID for the V1 # image, and all images that follow it in the ancestry chain. digest_mismatch = (extracted_v1_metadata.image_id in images_map and images_map[extracted_v1_metadata.image_id].content_checksum != digest_str) if digest_mismatch or has_rewritten_ids: working_image_id = digest_history.hexdigest() has_rewritten_ids = True # Store the new docker id in the map updated_id_map[extracted_v1_metadata.image_id] = working_image_id # Lookup the parent image for the layer, if any. parent_image_id = None if extracted_v1_metadata.parent_image_id is not None: parent_image_id = images_map.get(extracted_v1_metadata.parent_image_id, None) if parent_image_id is None: raise ManifestMalformed( 'Parent not found with image ID: {0}'.format(extracted_v1_metadata.parent_image_id) ) # Synthesize and store the v1 metadata in the db. v1_metadata_json = extracted_layer_metadata.v1_metadata_str if has_rewritten_ids: v1_metadata_json = _updated_v1_metadata(v1_metadata_json, updated_id_map) yield DockerV1Metadata( image_id=working_image_id, created=extracted_v1_metadata.created, comment=extracted_v1_metadata.comment, command=extracted_v1_metadata.command, compat_json=v1_metadata_json, parent_image_id=parent_image_id, ) class DockerSchema1ManifestBuilder(object): """ Class which represents a manifest which is currently being built. """ def __init__(self, namespace_name, repo_name, tag, architecture='amd64'): repo_name_key = '{0}/{1}'.format(namespace_name, repo_name) if namespace_name == '': repo_name_key = repo_name self._base_payload = { _DOCKER_SCHEMA_1_REPO_TAG_KEY: tag, _DOCKER_SCHEMA_1_REPO_NAME_KEY: repo_name_key, _DOCKER_SCHEMA_1_ARCH_KEY: architecture, _DOCKER_SCHEMA_1_SCHEMA_VER_KEY: 1, } self._fs_layer_digests = [] self._history = [] def add_layer(self, layer_digest, v1_json_metadata): self._fs_layer_digests.append({ _DOCKER_SCHEMA_1_BLOB_SUM_KEY: layer_digest, }) self._history.append({ _DOCKER_SCHEMA_1_V1_COMPAT_KEY: v1_json_metadata, }) return self def build(self, json_web_key): """ Build the payload and sign it, returning a SignedManifest object. """ payload = OrderedDict(self._base_payload) payload.update({ _DOCKER_SCHEMA_1_HISTORY_KEY: self._history, _DOCKER_SCHEMA_1_FS_LAYERS_KEY: self._fs_layer_digests, }) payload_str = json.dumps(payload, indent=3) split_point = payload_str.rfind('\n}') protected_payload = { 'formatTail': base64url_encode(payload_str[split_point:]), 'formatLength': split_point, 'time': datetime.utcnow().strftime(_ISO_DATETIME_FORMAT_ZULU), } protected = base64url_encode(json.dumps(protected_payload)) logger.debug('Generated protected block: %s', protected) bytes_to_sign = '{0}.{1}'.format(protected, base64url_encode(payload_str)) signer = SIGNER_ALGS[_JWS_ALGORITHM] signature = base64url_encode(signer.sign(bytes_to_sign, json_web_key.get_key())) logger.debug('Generated signature: %s', signature) public_members = set(json_web_key.public_members) public_key = {comp: value for comp, value in json_web_key.to_dict().items() if comp in public_members} signature_block = { 'header': { 'jwk': public_key, 'alg': _JWS_ALGORITHM, }, 'signature': signature, _DOCKER_SCHEMA_1_PROTECTED_KEY: protected, } logger.debug('Encoded signature block: %s', json.dumps(signature_block)) payload.update({ _DOCKER_SCHEMA_1_SIGNATURES_KEY: [signature_block], }) return DockerSchema1Manifest(json.dumps(payload, indent=3)) Repository = namedtuple('Repository', ['id', 'name', 'namespace_name']) Tag = namedtuple('Tag', ['name', 'repository']) ManifestJSON = namedtuple('ManifestJSON', ['digest', 'json']) DockerV1Metadata = namedtuple('DockerV1Metadata', ['namespace_name', 'repo_name', 'image_id', 'checksum', 'content_checksum', 'created', 'comment', 'command', 'parent_image_id', 'compat_json'])