This repository has been archived on 2020-03-24. You can view files and clone it, but cannot push or open issues or pull requests.
quay/endpoints/v2/manifest.py
2016-02-16 11:42:19 -05:00

532 lines
19 KiB
Python

import logging
import jwt.utils
import json
import features
import hashlib
from peewee import IntegrityError
from flask import make_response, request, url_for
from collections import namedtuple, OrderedDict
from jwkest.jws import SIGNER_ALGS, keyrep
from datetime import datetime
from functools import wraps
from app import docker_v2_signing_key, app
from auth.registry_jwt_auth import process_registry_jwt_auth
from endpoints.decorators import anon_protect
from endpoints.v2 import v2_bp, require_repo_read, require_repo_write
from endpoints.v2.errors import (BlobUnknown, ManifestInvalid, ManifestUnknown, TagInvalid,
NameInvalid)
from endpoints.trackhelper import track_and_log
from endpoints.notificationhelper import spawn_notification
from digest import digest_tools
from data import model
from data.database import RepositoryTag
from endpoints.common import parse_repository_name
logger = logging.getLogger(__name__)
VALID_TAG_PATTERN = r'[\w][\w.-]{0,127}'
BASE_MANIFEST_ROUTE = '/<repopath:repository>/manifests/<regex("{0}"):manifest_ref>'
MANIFEST_DIGEST_ROUTE = BASE_MANIFEST_ROUTE.format(digest_tools.DIGEST_PATTERN)
MANIFEST_TAGNAME_ROUTE = BASE_MANIFEST_ROUTE.format(VALID_TAG_PATTERN)
# From: https://github.com/docker/distribution/blob/47a064d4195a9b56133891bbb13620c3ac83a827/manifest/schema1/manifest.go#L18
MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v1+prettyjws'
MANIFEST2_SCHEMA2_CONTENT_TYPES = ['application/vnd.docker.distribution.manifest.v2+json',
'application/vnd.docker.distribution.manifest.list.v2+json']
ISO_DATETIME_FORMAT_ZULU = '%Y-%m-%dT%H:%M:%SZ'
JWS_ALGORITHM = 'RS256'
ImageMetadata = namedtuple('ImageMetadata', ['digest', 'v1_metadata', 'v1_metadata_str'])
ExtractedV1Metadata = namedtuple('ExtractedV1Metadata', ['docker_id', 'parent', 'created',
'comment', 'command'])
_SIGNATURES_KEY = 'signatures'
_PROTECTED_KEY = 'protected'
_FORMAT_LENGTH_KEY = 'formatLength'
_FORMAT_TAIL_KEY = 'formatTail'
_REPO_NAME_KEY = 'name'
_REPO_TAG_KEY = 'tag'
_FS_LAYERS_KEY = 'fsLayers'
_HISTORY_KEY = 'history'
_BLOB_SUM_KEY = 'blobSum'
_V1_COMPAT_KEY = 'v1Compatibility'
_ARCH_KEY = 'architecture'
_SCHEMA_VER = 'schemaVersion'
class SignedManifest(object):
def __init__(self, manifest_bytes, validate=True):
self._bytes = manifest_bytes
self._parsed = json.loads(manifest_bytes)
self._signatures = self._parsed[_SIGNATURES_KEY]
self._tag = self._parsed[_REPO_TAG_KEY]
repo_name_tuple = self._parsed[_REPO_NAME_KEY].split('/')
if len(repo_name_tuple) > 1:
self._namespace, self._repo_name = repo_name_tuple
elif len(repo_name_tuple) == 1:
self._namespace = ''
self._repo_name = repo_name_tuple[0]
else:
raise ValueError('repo_name has too many or too few pieces')
if validate:
self._validate()
def _validate(self):
for signature in self._signatures:
bytes_to_verify = '{0}.{1}'.format(signature['protected'], jwt.utils.base64url_encode(self.payload))
signer = SIGNER_ALGS[signature['header']['alg']]
key = keyrep(signature['header']['jwk'])
gk = key.get_key()
sig = jwt.utils.base64url_decode(signature['signature'].encode('utf-8'))
verified = signer.verify(bytes_to_verify, sig, gk)
if not verified:
raise ValueError('manifest file failed signature verification')
@property
def signatures(self):
return self._signatures
@property
def namespace(self):
return self._namespace
@property
def repo_name(self):
return self._repo_name
@property
def tag(self):
return self._tag
@property
def bytes(self):
return self._bytes
@property
def digest(self):
return digest_tools.sha256_digest(self.payload)
@property
def layers(self):
""" Returns a generator of objects that have the blobSum and v1Compatibility keys in them,
starting from the leaf image and working toward the base node.
"""
for blob_sum_obj, history_obj in reversed(zip(self._parsed[_FS_LAYERS_KEY],
self._parsed[_HISTORY_KEY])):
try:
image_digest = digest_tools.Digest.parse_digest(blob_sum_obj[_BLOB_SUM_KEY])
except digest_tools.InvalidDigestException:
err_message = 'could not parse manifest digest: %s' % blob_sum_obj[_BLOB_SUM_KEY]
raise ManifestInvalid(detail={'message': err_message})
metadata_string = history_obj[_V1_COMPAT_KEY]
v1_metadata = json.loads(metadata_string)
command_list = v1_metadata.get('container_config', {}).get('Cmd', None)
command = json.dumps(command_list) if command_list else None
if not 'id' in v1_metadata:
raise ManifestInvalid(detail={'message': 'invalid manifest v1 history'})
extracted = ExtractedV1Metadata(v1_metadata['id'], v1_metadata.get('parent'),
v1_metadata.get('created'), v1_metadata.get('comment'),
command)
yield ImageMetadata(image_digest, extracted, metadata_string)
@property
def payload(self):
protected = str(self._signatures[0][_PROTECTED_KEY])
parsed_protected = json.loads(jwt.utils.base64url_decode(protected))
logger.debug('parsed_protected: %s', parsed_protected)
signed_content_head = self._bytes[:parsed_protected[_FORMAT_LENGTH_KEY]]
logger.debug('signed content head: %s', signed_content_head)
signed_content_tail = jwt.utils.base64url_decode(str(parsed_protected[_FORMAT_TAIL_KEY]))
logger.debug('signed content tail: %s', signed_content_tail)
return signed_content_head + signed_content_tail
class SignedManifestBuilder(object):
""" Class which represents a manifest which is currently being built.
"""
def __init__(self, namespace, repo_name, tag, architecture='amd64', schema_ver=1):
repo_name_key = '{0}/{1}'.format(namespace, repo_name)
if namespace == '':
repo_name_key = repo_name
self._base_payload = {
_REPO_TAG_KEY: tag,
_REPO_NAME_KEY: repo_name_key,
_ARCH_KEY: architecture,
_SCHEMA_VER: schema_ver,
}
self._fs_layer_digests = []
self._history = []
def add_layer(self, layer_digest, v1_json_metadata):
self._fs_layer_digests.append({
_BLOB_SUM_KEY: layer_digest,
})
self._history.append({
_V1_COMPAT_KEY: v1_json_metadata,
})
def build(self, json_web_key):
""" Build the payload and sign it, returning a SignedManifest object.
"""
payload = OrderedDict(self._base_payload)
payload.update({
_HISTORY_KEY: self._history,
_FS_LAYERS_KEY: self._fs_layer_digests,
})
payload_str = json.dumps(payload, indent=3)
split_point = payload_str.rfind('\n}')
protected_payload = {
'formatTail': jwt.utils.base64url_encode(payload_str[split_point:]),
'formatLength': split_point,
'time': datetime.utcnow().strftime(ISO_DATETIME_FORMAT_ZULU),
}
protected = jwt.utils.base64url_encode(json.dumps(protected_payload))
logger.debug('Generated protected block: %s', protected)
bytes_to_sign = '{0}.{1}'.format(protected, jwt.utils.base64url_encode(payload_str))
signer = SIGNER_ALGS[JWS_ALGORITHM]
signature = jwt.utils.base64url_encode(signer.sign(bytes_to_sign, json_web_key.get_key()))
logger.debug('Generated signature: %s', signature)
public_members = set(json_web_key.public_members)
public_key = {comp: value for comp, value in json_web_key.to_dict().items()
if comp in public_members}
signature_block = {
'header': {
'jwk': public_key,
'alg': JWS_ALGORITHM,
},
'signature': signature,
_PROTECTED_KEY: protected,
}
logger.debug('Encoded signature block: %s', json.dumps(signature_block))
payload.update({
_SIGNATURES_KEY: [signature_block],
})
return SignedManifest(json.dumps(payload, indent=3))
@v2_bp.route(MANIFEST_TAGNAME_ROUTE, methods=['GET'])
@process_registry_jwt_auth
@parse_repository_name
@require_repo_read
@anon_protect
def fetch_manifest_by_tagname(namespace, repo_name, manifest_ref):
try:
manifest = model.tag.load_tag_manifest(namespace, repo_name, manifest_ref)
except model.InvalidManifestException:
try:
model.tag.get_active_tag(namespace, repo_name, manifest_ref)
except RepositoryTag.DoesNotExist:
raise ManifestUnknown()
try:
manifest = _generate_and_store_manifest(namespace, repo_name, manifest_ref)
except model.DataModelException:
logger.exception('Exception when generating manifest for %s/%s:%s', namespace, repo_name,
manifest_ref)
raise ManifestUnknown()
repo = model.repository.get_repository(namespace, repo_name)
if repo is not None:
track_and_log('pull_repo', repo, analytics_name='pull_repo_100x', analytics_sample=0.01)
response = make_response(manifest.json_data, 200)
response.headers['Content-Type'] = MANIFEST_CONTENT_TYPE
response.headers['Docker-Content-Digest'] = manifest.digest
return response
@v2_bp.route(MANIFEST_DIGEST_ROUTE, methods=['GET'])
@process_registry_jwt_auth
@parse_repository_name
@require_repo_read
@anon_protect
def fetch_manifest_by_digest(namespace, repo_name, manifest_ref):
try:
manifest = model.tag.load_manifest_by_digest(namespace, repo_name, manifest_ref)
except model.InvalidManifestException:
# Without a tag name to reference, we can't make an attempt to generate the manifest
raise ManifestUnknown()
repo = model.repository.get_repository(namespace, repo_name)
if repo is not None:
track_and_log('pull_repo', repo)
response = make_response(manifest.json_data, 200)
response.headers['Content-Type'] = MANIFEST_CONTENT_TYPE
response.headers['Docker-Content-Digest'] = manifest.digest
return response
def _reject_manifest2_schema2(func):
@wraps(func)
def wrapped(*args, **kwargs):
if request.content_type in MANIFEST2_SCHEMA2_CONTENT_TYPES:
raise ManifestInvalid(detail={'message': 'manifest schema version not supported'},
http_status_code=415)
return func(*args, **kwargs)
return wrapped
@v2_bp.route(MANIFEST_TAGNAME_ROUTE, methods=['PUT'])
@process_registry_jwt_auth
@parse_repository_name
@require_repo_write
@anon_protect
@_reject_manifest2_schema2
def write_manifest_by_tagname(namespace, repo_name, manifest_ref):
try:
manifest = SignedManifest(request.data)
except ValueError:
raise ManifestInvalid(detail={'message': 'could not parse manifest'})
if manifest.tag != manifest_ref:
raise TagInvalid()
return _write_manifest(namespace, repo_name, manifest)
@v2_bp.route(MANIFEST_DIGEST_ROUTE, methods=['PUT'])
@process_registry_jwt_auth
@parse_repository_name
@require_repo_write
@anon_protect
@_reject_manifest2_schema2
def write_manifest_by_digest(namespace, repo_name, manifest_ref):
try:
manifest = SignedManifest(request.data)
except ValueError:
raise ManifestInvalid(detail={'message': 'could not parse manifest'})
if manifest.digest != manifest_ref:
raise ManifestInvalid(detail={'message': 'manifest digest mismatch'})
return _write_manifest(namespace, repo_name, manifest)
def _updated_v1_metadata(v1_metadata_json, updated_id_map):
parsed = json.loads(v1_metadata_json)
parsed['id'] = updated_id_map[parsed['id']]
if parsed.get('parent') and parsed['parent'] in updated_id_map:
parsed['parent'] = updated_id_map[parsed['parent']]
if parsed.get('container_config', {}).get('Image'):
existing_image = parsed['container_config']['Image']
if existing_image in updated_id_map:
parsed['container_config']['image'] = updated_id_map[existing_image]
return json.dumps(parsed)
def _write_manifest(namespace, repo_name, manifest):
# Ensure that the manifest is for this repository. If the manifest's namespace is empty, then
# it is for the library namespace and we need an extra check.
if (manifest.namespace == '' and features.LIBRARY_SUPPORT and
namespace == app.config['LIBRARY_NAMESPACE']):
# This is a library manifest. All good.
pass
elif manifest.namespace != namespace:
raise NameInvalid()
if manifest.repo_name != repo_name:
raise NameInvalid()
# Ensure that the repository exists.
repo = model.repository.get_repository(namespace, repo_name)
if repo is None:
raise NameInvalid()
# Lookup all the images and their parent images (if any) inside the manifest. This will let us
# know which V1 images we need to synthesize and which ones are invalid.
layers = list(manifest.layers)
docker_image_ids = {mdata.v1_metadata.docker_id for mdata in layers}
parent_image_ids = {mdata.v1_metadata.parent for mdata in layers
if mdata.v1_metadata.parent}
all_image_ids = list(docker_image_ids | parent_image_ids)
images_query = model.image.lookup_repository_images(repo, all_image_ids)
images_map = {image.docker_image_id: image for image in images_query}
# Lookup the storages associated with each blob in the manifest.
checksums = list({str(mdata.digest) for mdata in manifest.layers})
storage_query = model.storage.lookup_repo_storages_by_content_checksum(repo, checksums)
storage_map = {storage.content_checksum: storage for storage in storage_query}
# Ensure that we have valid V1 docker IDs. If Docker gives us a V1 layer ID pointing to
# a storage with a content checksum different from the existing, then we need to rewrite
# the Docker ID to ensure consistency.
tag_name = manifest.tag
has_rewritten_ids = False
updated_id_map = {}
for mdata in layers:
digest_str = str(mdata.digest)
v1_mdata = mdata.v1_metadata
working_docker_id = v1_mdata.docker_id
# Ensure that all blobs exist.
blob_storage = storage_map.get(digest_str)
if blob_storage is None:
raise BlobUnknown(detail={'digest': digest_str})
# Ensure that the V1 image's storage matches the V2 blob. If not, we've found
# a data inconsistency and need to create a new layer ID for the V1 image, and all images
# that follow it in the ancestry chain.
if ((v1_mdata.docker_id in images_map and
images_map[v1_mdata.docker_id].storage.content_checksum != digest_str) or
has_rewritten_ids):
v1_metadata_str = mdata.v1_metadata_str.encode('utf-8')
working_docker_id = hashlib.sha256(v1_metadata_str + '@' + digest_str).hexdigest()
logger.debug('Rewriting docker_id %s/%s %s -> %s', namespace, repo_name, v1_mdata.docker_id,
working_docker_id)
has_rewritten_ids = True
# Store the new docker id in the map
updated_id_map[v1_mdata.docker_id] = working_docker_id
# Lookup the parent image for the layer, if any.
parent_image = None
if v1_mdata.parent is not None:
parent_image = images_map.get(v1_mdata.parent)
if parent_image is None:
msg = 'Parent not found with docker image id {0}'.format(v1_mdata.parent)
raise ManifestInvalid(detail={'message': msg})
# Synthesize and store the v1 metadata in the db.
digest_str = str(mdata.digest)
blob_storage = storage_map[digest_str]
v1_metadata_json = mdata.v1_metadata_str
if has_rewritten_ids:
v1_metadata_json = _updated_v1_metadata(mdata.v1_metadata_str, updated_id_map)
image = model.image.synthesize_v1_image(repo, blob_storage, working_docker_id,
v1_mdata.created, v1_mdata.comment, v1_mdata.command,
v1_metadata_json, parent_image)
images_map[v1_mdata.docker_id] = image
if not layers:
# The manifest doesn't actually reference any layers!
raise ManifestInvalid(detail={'message': 'manifest does not reference any layers'})
# Store the manifest pointing to the tag.
manifest_digest = manifest.digest
leaf_layer_id = images_map[layers[-1].v1_metadata.docker_id].docker_image_id
model.tag.store_tag_manifest(namespace, repo_name, tag_name, leaf_layer_id, manifest_digest,
manifest.bytes)
# Spawn the repo_push event.
event_data = {
'updated_tags': [tag_name],
}
track_and_log('push_repo', repo)
spawn_notification(repo, 'repo_push', event_data)
response = make_response('OK', 202)
response.headers['Docker-Content-Digest'] = manifest_digest
response.headers['Location'] = url_for('v2.fetch_manifest_by_digest',
repository='%s/%s' % (namespace, repo_name),
manifest_ref=manifest_digest)
return response
@v2_bp.route(MANIFEST_DIGEST_ROUTE, methods=['DELETE'])
@process_registry_jwt_auth
@parse_repository_name
@require_repo_write
@anon_protect
def delete_manifest_by_digest(namespace, repo_name, manifest_ref):
""" Delete the manifest specified by the digest. Note: there is no equivalent
method for deleting by tag name because it is forbidden by the spec.
"""
try:
manifest = model.tag.load_manifest_by_digest(namespace, repo_name, manifest_ref)
except model.InvalidManifestException:
# Without a tag name to reference, we can't make an attempt to generate the manifest
raise ManifestUnknown()
# Mark the tag as no longer alive.
try:
model.tag.delete_tag(namespace, repo_name, manifest.tag.name)
except model.DataModelException:
# Tag is not alive.
raise ManifestUnknown()
track_and_log('delete_tag', manifest.tag.repository,
tag=manifest.tag.name, digest=manifest_ref)
return make_response('', 202)
def _generate_and_store_manifest(namespace, repo_name, tag_name):
# First look up the tag object and its ancestors
image = model.tag.get_tag_image(namespace, repo_name, tag_name)
parents = model.image.get_parent_images(namespace, repo_name, image)
# If the manifest is being generated under the library namespace, then we make its namespace
# empty.
manifest_namespace = namespace
if features.LIBRARY_SUPPORT and namespace == app.config['LIBRARY_NAMESPACE']:
manifest_namespace = ''
# Create and populate the manifest builder
builder = SignedManifestBuilder(manifest_namespace, repo_name, tag_name)
# Add the leaf layer
builder.add_layer(image.storage.content_checksum, image.v1_json_metadata)
for parent in parents:
builder.add_layer(parent.storage.content_checksum, parent.v1_json_metadata)
# Sign the manifest with our signing key.
manifest = builder.build(docker_v2_signing_key)
# Write the manifest to the DB. If an existing manifest already exists, return the
# one found.
try:
return model.tag.associate_generated_tag_manifest(namespace, repo_name, tag_name,
manifest.digest, manifest.bytes)
except IntegrityError as ie:
logger.debug('Got integrity error: %s', ie)
try:
return model.tag.load_tag_manifest(namespace, repo_name, tag_name)
except model.InvalidManifestException:
logger.exception('Exception when generating manifest')
raise model.DataModelException('Could not load or generate manifest')