import logging import json import hashlib from collections import namedtuple, OrderedDict from datetime import datetime from functools import wraps import jwt.utils from peewee import IntegrityError from flask import make_response, request, url_for from jwkest.jws import SIGNER_ALGS, keyrep import features from app import docker_v2_signing_key, app from auth.registry_jwt_auth import process_registry_jwt_auth from endpoints.common import parse_repository_name from endpoints.decorators import anon_protect from endpoints.v2 import v2_bp, require_repo_read, require_repo_write from endpoints.v2.errors import (BlobUnknown, ManifestInvalid, ManifestUnknown, TagInvalid, NameInvalid) from endpoints.trackhelper import track_and_log from endpoints.notificationhelper import spawn_notification from util.registry.replication import queue_storage_replication from digest import digest_tools from data import model from data.database import RepositoryTag logger = logging.getLogger(__name__) VALID_TAG_PATTERN = r'[\w][\w.-]{0,127}' BASE_MANIFEST_ROUTE = '//manifests/' MANIFEST_DIGEST_ROUTE = BASE_MANIFEST_ROUTE.format(digest_tools.DIGEST_PATTERN) MANIFEST_TAGNAME_ROUTE = BASE_MANIFEST_ROUTE.format(VALID_TAG_PATTERN) # From: https://github.com/docker/distribution/blob/47a064d4195a9b56133891bbb13620c3ac83a827/manifest/schema1/manifest.go#L18 MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v1+prettyjws' MANIFEST2_SCHEMA2_CONTENT_TYPES = ['application/vnd.docker.distribution.manifest.v2+json', 'application/vnd.docker.distribution.manifest.list.v2+json'] ISO_DATETIME_FORMAT_ZULU = '%Y-%m-%dT%H:%M:%SZ' JWS_ALGORITHM = 'RS256' ImageMetadata = namedtuple('ImageMetadata', ['digest', 'v1_metadata', 'v1_metadata_str']) ExtractedV1Metadata = namedtuple('ExtractedV1Metadata', ['docker_id', 'parent', 'created', 'comment', 'command']) _SIGNATURES_KEY = 'signatures' _PROTECTED_KEY = 'protected' _FORMAT_LENGTH_KEY = 'formatLength' _FORMAT_TAIL_KEY = 'formatTail' _REPO_NAME_KEY = 'name' _REPO_TAG_KEY = 'tag' _FS_LAYERS_KEY = 'fsLayers' _HISTORY_KEY = 'history' _BLOB_SUM_KEY = 'blobSum' _V1_COMPAT_KEY = 'v1Compatibility' _ARCH_KEY = 'architecture' _SCHEMA_VER = 'schemaVersion' class SignedManifest(object): def __init__(self, manifest_bytes, validate=True): self._bytes = manifest_bytes self._parsed = json.loads(manifest_bytes) self._signatures = self._parsed[_SIGNATURES_KEY] self._tag = self._parsed[_REPO_TAG_KEY] repo_name_tuple = self._parsed[_REPO_NAME_KEY].split('/') if len(repo_name_tuple) > 1: self._namespace, self._repo_name = repo_name_tuple elif len(repo_name_tuple) == 1: self._namespace = '' self._repo_name = repo_name_tuple[0] else: raise ValueError('repo_name has too many or too few pieces') if validate: self._validate() def _validate(self): for signature in self._signatures: bytes_to_verify = '{0}.{1}'.format(signature['protected'], jwt.utils.base64url_encode(self.payload)) signer = SIGNER_ALGS[signature['header']['alg']] key = keyrep(signature['header']['jwk']) gk = key.get_key() sig = jwt.utils.base64url_decode(signature['signature'].encode('utf-8')) verified = signer.verify(bytes_to_verify, sig, gk) if not verified: raise ValueError('manifest file failed signature verification') @property def signatures(self): return self._signatures @property def namespace(self): return self._namespace @property def repo_name(self): return self._repo_name @property def tag(self): return self._tag @property def bytes(self): return self._bytes @property def digest(self): return digest_tools.sha256_digest(self.payload) @property def layers(self): """ Returns a generator of objects that have the blobSum and v1Compatibility keys in them, starting from the base image and working toward the leaf node. """ for blob_sum_obj, history_obj in reversed(zip(self._parsed[_FS_LAYERS_KEY], self._parsed[_HISTORY_KEY])): try: image_digest = digest_tools.Digest.parse_digest(blob_sum_obj[_BLOB_SUM_KEY]) except digest_tools.InvalidDigestException: err_message = 'could not parse manifest digest: %s' % blob_sum_obj[_BLOB_SUM_KEY] raise ManifestInvalid(detail={'message': err_message}) metadata_string = history_obj[_V1_COMPAT_KEY] v1_metadata = json.loads(metadata_string) command_list = v1_metadata.get('container_config', {}).get('Cmd', None) command = json.dumps(command_list) if command_list else None if not 'id' in v1_metadata: raise ManifestInvalid(detail={'message': 'invalid manifest v1 history'}) extracted = ExtractedV1Metadata(v1_metadata['id'], v1_metadata.get('parent'), v1_metadata.get('created'), v1_metadata.get('comment'), command) yield ImageMetadata(image_digest, extracted, metadata_string) @property def payload(self): protected = str(self._signatures[0][_PROTECTED_KEY]) parsed_protected = json.loads(jwt.utils.base64url_decode(protected)) signed_content_head = self._bytes[:parsed_protected[_FORMAT_LENGTH_KEY]] signed_content_tail = jwt.utils.base64url_decode(str(parsed_protected[_FORMAT_TAIL_KEY])) return signed_content_head + signed_content_tail class SignedManifestBuilder(object): """ Class which represents a manifest which is currently being built. """ def __init__(self, namespace_name, repo_name, tag, architecture='amd64', schema_ver=1): repo_name_key = '{0}/{1}'.format(namespace_name, repo_name) if namespace_name == '': repo_name_key = repo_name self._base_payload = { _REPO_TAG_KEY: tag, _REPO_NAME_KEY: repo_name_key, _ARCH_KEY: architecture, _SCHEMA_VER: schema_ver, } self._fs_layer_digests = [] self._history = [] def add_layer(self, layer_digest, v1_json_metadata): self._fs_layer_digests.append({ _BLOB_SUM_KEY: layer_digest, }) self._history.append({ _V1_COMPAT_KEY: v1_json_metadata, }) return self def build(self, json_web_key): """ Build the payload and sign it, returning a SignedManifest object. """ payload = OrderedDict(self._base_payload) payload.update({ _HISTORY_KEY: self._history, _FS_LAYERS_KEY: self._fs_layer_digests, }) payload_str = json.dumps(payload, indent=3) split_point = payload_str.rfind('\n}') protected_payload = { 'formatTail': jwt.utils.base64url_encode(payload_str[split_point:]), 'formatLength': split_point, 'time': datetime.utcnow().strftime(ISO_DATETIME_FORMAT_ZULU), } protected = jwt.utils.base64url_encode(json.dumps(protected_payload)) logger.debug('Generated protected block: %s', protected) bytes_to_sign = '{0}.{1}'.format(protected, jwt.utils.base64url_encode(payload_str)) signer = SIGNER_ALGS[JWS_ALGORITHM] signature = jwt.utils.base64url_encode(signer.sign(bytes_to_sign, json_web_key.get_key())) logger.debug('Generated signature: %s', signature) public_members = set(json_web_key.public_members) public_key = {comp: value for comp, value in json_web_key.to_dict().items() if comp in public_members} signature_block = { 'header': { 'jwk': public_key, 'alg': JWS_ALGORITHM, }, 'signature': signature, _PROTECTED_KEY: protected, } logger.debug('Encoded signature block: %s', json.dumps(signature_block)) payload.update({ _SIGNATURES_KEY: [signature_block], }) return SignedManifest(json.dumps(payload, indent=3)) @v2_bp.route(MANIFEST_TAGNAME_ROUTE, methods=['GET']) @parse_repository_name() @process_registry_jwt_auth(scopes=['pull']) @require_repo_read @anon_protect def fetch_manifest_by_tagname(namespace_name, repo_name, manifest_ref): try: manifest = model.tag.load_tag_manifest(namespace_name, repo_name, manifest_ref) except model.InvalidManifestException: try: model.tag.get_active_tag(namespace_name, repo_name, manifest_ref) except RepositoryTag.DoesNotExist: raise ManifestUnknown() try: manifest = _generate_and_store_manifest(namespace_name, repo_name, manifest_ref) except model.DataModelException: logger.exception('Exception when generating manifest for %s/%s:%s', namespace_name, repo_name, manifest_ref) raise ManifestUnknown() repo = model.repository.get_repository(namespace_name, repo_name) if repo is not None: track_and_log('pull_repo', repo, analytics_name='pull_repo_100x', analytics_sample=0.01) response = make_response(manifest.json_data, 200) response.headers['Content-Type'] = MANIFEST_CONTENT_TYPE response.headers['Docker-Content-Digest'] = manifest.digest return response @v2_bp.route(MANIFEST_DIGEST_ROUTE, methods=['GET']) @parse_repository_name() @process_registry_jwt_auth(scopes=['pull']) @require_repo_read @anon_protect def fetch_manifest_by_digest(namespace_name, repo_name, manifest_ref): try: manifest = model.tag.load_manifest_by_digest(namespace_name, repo_name, manifest_ref) except model.InvalidManifestException: # Without a tag name to reference, we can't make an attempt to generate the manifest raise ManifestUnknown() repo = model.repository.get_repository(namespace_name, repo_name) if repo is not None: track_and_log('pull_repo', repo) response = make_response(manifest.json_data, 200) response.headers['Content-Type'] = MANIFEST_CONTENT_TYPE response.headers['Docker-Content-Digest'] = manifest.digest return response def _reject_manifest2_schema2(func): @wraps(func) def wrapped(*args, **kwargs): if request.content_type in MANIFEST2_SCHEMA2_CONTENT_TYPES: raise ManifestInvalid(detail={'message': 'manifest schema version not supported'}, http_status_code=415) return func(*args, **kwargs) return wrapped @v2_bp.route(MANIFEST_TAGNAME_ROUTE, methods=['PUT']) @_reject_manifest2_schema2 @parse_repository_name() @process_registry_jwt_auth(scopes=['pull', 'push']) @require_repo_write @anon_protect def write_manifest_by_tagname(namespace_name, repo_name, manifest_ref): try: manifest = SignedManifest(request.data) except ValueError: raise ManifestInvalid(detail={'message': 'could not parse manifest'}) if manifest.tag != manifest_ref: raise TagInvalid() return _write_manifest(namespace_name, repo_name, manifest) @v2_bp.route(MANIFEST_DIGEST_ROUTE, methods=['PUT']) @_reject_manifest2_schema2 @parse_repository_name() @process_registry_jwt_auth(scopes=['pull', 'push']) @require_repo_write @anon_protect def write_manifest_by_digest(namespace_name, repo_name, manifest_ref): try: manifest = SignedManifest(request.data) except ValueError: raise ManifestInvalid(detail={'message': 'could not parse manifest'}) if manifest.digest != manifest_ref: raise ManifestInvalid(detail={'message': 'manifest digest mismatch'}) return _write_manifest(namespace_name, repo_name, manifest) def _updated_v1_metadata(v1_metadata_json, updated_id_map): parsed = json.loads(v1_metadata_json) parsed['id'] = updated_id_map[parsed['id']] if parsed.get('parent') and parsed['parent'] in updated_id_map: parsed['parent'] = updated_id_map[parsed['parent']] if parsed.get('container_config', {}).get('Image'): existing_image = parsed['container_config']['Image'] if existing_image in updated_id_map: parsed['container_config']['image'] = updated_id_map[existing_image] return json.dumps(parsed) def _write_manifest_itself(namespace_name, repo_name, manifest): # Ensure that the manifest is for this repository. If the manifest's namespace is empty, then # it is for the library namespace and we need an extra check. if (manifest.namespace == '' and features.LIBRARY_SUPPORT and namespace_name == app.config['LIBRARY_NAMESPACE']): # This is a library manifest. All good. pass elif manifest.namespace != namespace_name: raise NameInvalid() if manifest.repo_name != repo_name: raise NameInvalid() # Ensure that the repository exists. repo = model.repository.get_repository(namespace_name, repo_name) if repo is None: raise NameInvalid() # Lookup all the images and their parent images (if any) inside the manifest. This will let us # know which V1 images we need to synthesize and which ones are invalid. layers = list(manifest.layers) docker_image_ids = {mdata.v1_metadata.docker_id for mdata in layers} parent_image_ids = {mdata.v1_metadata.parent for mdata in layers if mdata.v1_metadata.parent} all_image_ids = list(docker_image_ids | parent_image_ids) images_query = model.image.lookup_repository_images(repo, all_image_ids) images_map = {image.docker_image_id: image for image in images_query} # Lookup the storages associated with each blob in the manifest. checksums = list({str(mdata.digest) for mdata in manifest.layers}) storage_query = model.storage.lookup_repo_storages_by_content_checksum(repo, checksums) storage_map = {storage.content_checksum: storage for storage in storage_query} # Ensure that we have valid V1 docker IDs. If Docker gives us a V1 layer ID pointing to # a storage with a content checksum different from the existing, then we need to rewrite # the Docker ID to ensure consistency. tag_name = manifest.tag has_rewritten_ids = False updated_id_map = {} # Synthesized image id hash. Can be used to pull a "content addressable" image id out of thin air. digest_history = hashlib.sha256() for mdata in layers: digest_str = str(mdata.digest) v1_mdata = mdata.v1_metadata working_docker_id = v1_mdata.docker_id # Update our digest_history hash for the new layer data. digest_history.update(digest_str) digest_history.update("@") digest_history.update(mdata.v1_metadata_str.encode('utf-8')) digest_history.update("|") # Ensure that all blobs exist. blob_storage = storage_map.get(digest_str) if blob_storage is None: raise BlobUnknown(detail={'digest': digest_str}) # Ensure that the V1 image's storage matches the V2 blob. If not, we've found # a data inconsistency and need to create a new layer ID for the V1 image, and all images # that follow it in the ancestry chain. if ((v1_mdata.docker_id in images_map and images_map[v1_mdata.docker_id].storage.content_checksum != digest_str) or has_rewritten_ids): working_docker_id = digest_history.hexdigest() logger.warning('Rewriting docker_id %s/%s %s -> %s', namespace_name, repo_name, v1_mdata.docker_id, working_docker_id) has_rewritten_ids = True # Store the new docker id in the map updated_id_map[v1_mdata.docker_id] = working_docker_id # Lookup the parent image for the layer, if any. parent_image = None if v1_mdata.parent is not None: parent_image = images_map.get(v1_mdata.parent) if parent_image is None: msg = 'Parent not found with docker image id {0}'.format(v1_mdata.parent) raise ManifestInvalid(detail={'message': msg}) # Synthesize and store the v1 metadata in the db. v1_metadata_json = mdata.v1_metadata_str if has_rewritten_ids: v1_metadata_json = _updated_v1_metadata(mdata.v1_metadata_str, updated_id_map) image = model.image.synthesize_v1_image(repo, blob_storage, working_docker_id, v1_mdata.created, v1_mdata.comment, v1_mdata.command, v1_metadata_json, parent_image) images_map[v1_mdata.docker_id] = image if not layers: # The manifest doesn't actually reference any layers! raise ManifestInvalid(detail={'message': 'manifest does not reference any layers'}) # Store the manifest pointing to the tag. manifest_digest = manifest.digest leaf_layer_id = images_map[layers[-1].v1_metadata.docker_id].docker_image_id model.tag.store_tag_manifest(namespace_name, repo_name, tag_name, leaf_layer_id, manifest_digest, manifest.bytes) # Queue all blob manifests for replication. # TODO(jschorr): Find a way to optimize this insertion. if features.STORAGE_REPLICATION: for mdata in layers: digest_str = str(mdata.digest) blob_storage = storage_map.get(digest_str) queue_storage_replication(namespace_name, blob_storage) return (repo, tag_name, manifest_digest) def _write_manifest(namespace_name, repo_name, manifest): (repo, tag_name, manifest_digest) = _write_manifest_itself(namespace_name, repo_name, manifest) # Spawn the repo_push event. event_data = { 'updated_tags': [tag_name], } track_and_log('push_repo', repo, tag=tag_name) spawn_notification(repo, 'repo_push', event_data) response = make_response('OK', 202) response.headers['Docker-Content-Digest'] = manifest_digest response.headers['Location'] = url_for('v2.fetch_manifest_by_digest', repository='%s/%s' % (namespace_name, repo_name), manifest_ref=manifest_digest) return response @v2_bp.route(MANIFEST_DIGEST_ROUTE, methods=['DELETE']) @parse_repository_name() @process_registry_jwt_auth(scopes=['pull', 'push']) @require_repo_write @anon_protect def delete_manifest_by_digest(namespace_name, repo_name, manifest_ref): """ Delete the manifest specified by the digest. Note: there is no equivalent method for deleting by tag name because it is forbidden by the spec. """ try: manifest = model.tag.load_manifest_by_digest(namespace_name, repo_name, manifest_ref) except model.InvalidManifestException: # Without a tag name to reference, we can't make an attempt to generate the manifest raise ManifestUnknown() # Mark the tag as no longer alive. try: model.tag.delete_tag(namespace_name, repo_name, manifest.tag.name) except model.DataModelException: # Tag is not alive. raise ManifestUnknown() track_and_log('delete_tag', manifest.tag.repository, tag=manifest.tag.name, digest=manifest_ref) return make_response('', 202) def _generate_and_store_manifest(namespace_name, repo_name, tag_name): # First look up the tag object and its ancestors image = model.tag.get_tag_image(namespace_name, repo_name, tag_name, include_storage=True) parents = model.image.get_parent_images(namespace_name, repo_name, image) # If the manifest is being generated under the library namespace, then we make its namespace # empty. manifest_namespace = namespace_name if features.LIBRARY_SUPPORT and namespace_name == app.config['LIBRARY_NAMESPACE']: manifest_namespace = '' # Create and populate the manifest builder builder = SignedManifestBuilder(manifest_namespace, repo_name, tag_name) # Add the leaf layer builder.add_layer(image.storage.content_checksum, image.v1_json_metadata) for parent in parents: builder.add_layer(parent.storage.content_checksum, parent.v1_json_metadata) # Sign the manifest with our signing key. manifest = builder.build(docker_v2_signing_key) # Write the manifest to the DB. If an existing manifest already exists, return the # one found. try: return model.tag.associate_generated_tag_manifest(namespace_name, repo_name, tag_name, manifest.digest, manifest.bytes) except IntegrityError as ie: logger.debug('Got integrity error: %s', ie) try: return model.tag.load_tag_manifest(namespace_name, repo_name, tag_name) except model.InvalidManifestException: logger.exception('Exception when generating manifest') raise model.DataModelException('Could not load or generate manifest')