This repository has been archived on 2020-03-24. You can view files and clone it, but cannot push or open issues or pull requests.
quay/endpoints/v2/manifest.py
Joseph Schorr 357005e33f Raise a 409 if we try to insert a tag twice at the same time
Also fixes handling of labels for existing manifests

Fixes #1775
2016-08-29 16:03:35 -04:00

561 lines
20 KiB
Python

import logging
import json
import hashlib
from collections import namedtuple, OrderedDict
from datetime import datetime
from functools import wraps
import jwt.utils
from peewee import IntegrityError
from flask import make_response, request, url_for
from jwkest.jws import SIGNER_ALGS, keyrep
import features
from app import docker_v2_signing_key, app
from auth.registry_jwt_auth import process_registry_jwt_auth
from endpoints.common import parse_repository_name
from endpoints.decorators import anon_protect
from endpoints.v2 import v2_bp, require_repo_read, require_repo_write
from endpoints.v2.errors import (BlobUnknown, ManifestInvalid, ManifestUnknown, TagInvalid,
NameInvalid, TagAlreadyExists)
from endpoints.trackhelper import track_and_log
from endpoints.notificationhelper import spawn_notification
from util.registry.replication import queue_storage_replication
from util.names import VALID_TAG_PATTERN
from digest import digest_tools
from data import model
from data.model import TagAlreadyCreatedException
from data.database import RepositoryTag
logger = logging.getLogger(__name__)
BASE_MANIFEST_ROUTE = '/<repopath:repository>/manifests/<regex("{0}"):manifest_ref>'
MANIFEST_DIGEST_ROUTE = BASE_MANIFEST_ROUTE.format(digest_tools.DIGEST_PATTERN)
MANIFEST_TAGNAME_ROUTE = BASE_MANIFEST_ROUTE.format(VALID_TAG_PATTERN)
# From: https://github.com/docker/distribution/blob/47a064d4195a9b56133891bbb13620c3ac83a827/manifest/schema1/manifest.go#L18
MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v1+prettyjws'
MANIFEST2_SCHEMA2_CONTENT_TYPES = ['application/vnd.docker.distribution.manifest.v2+json',
'application/vnd.docker.distribution.manifest.list.v2+json']
ISO_DATETIME_FORMAT_ZULU = '%Y-%m-%dT%H:%M:%SZ'
JWS_ALGORITHM = 'RS256'
ImageMetadata = namedtuple('ImageMetadata', ['digest', 'v1_metadata', 'v1_metadata_str'])
ExtractedV1Metadata = namedtuple('ExtractedV1Metadata', ['docker_id', 'parent', 'created',
'comment', 'command', 'labels'])
_SIGNATURES_KEY = 'signatures'
_PROTECTED_KEY = 'protected'
_FORMAT_LENGTH_KEY = 'formatLength'
_FORMAT_TAIL_KEY = 'formatTail'
_REPO_NAME_KEY = 'name'
_REPO_TAG_KEY = 'tag'
_FS_LAYERS_KEY = 'fsLayers'
_HISTORY_KEY = 'history'
_BLOB_SUM_KEY = 'blobSum'
_V1_COMPAT_KEY = 'v1Compatibility'
_ARCH_KEY = 'architecture'
_SCHEMA_VER = 'schemaVersion'
class SignedManifest(object):
def __init__(self, manifest_bytes, validate=True):
self._bytes = manifest_bytes
self._parsed = json.loads(manifest_bytes)
self._signatures = self._parsed[_SIGNATURES_KEY]
self._tag = self._parsed[_REPO_TAG_KEY]
repo_name_tuple = self._parsed[_REPO_NAME_KEY].split('/')
if len(repo_name_tuple) > 1:
self._namespace, self._repo_name = repo_name_tuple
elif len(repo_name_tuple) == 1:
self._namespace = ''
self._repo_name = repo_name_tuple[0]
else:
raise ValueError('repo_name has too many or too few pieces')
if validate:
self._validate()
def _validate(self):
for signature in self._signatures:
bytes_to_verify = '{0}.{1}'.format(signature['protected'],
jwt.utils.base64url_encode(self.payload))
signer = SIGNER_ALGS[signature['header']['alg']]
key = keyrep(signature['header']['jwk'])
gk = key.get_key()
sig = jwt.utils.base64url_decode(signature['signature'].encode('utf-8'))
verified = signer.verify(bytes_to_verify, sig, gk)
if not verified:
raise ValueError('manifest file failed signature verification')
@property
def signatures(self):
return self._signatures
@property
def namespace(self):
return self._namespace
@property
def repo_name(self):
return self._repo_name
@property
def tag(self):
return self._tag
@property
def bytes(self):
return self._bytes
@property
def digest(self):
return digest_tools.sha256_digest(self.payload)
@property
def layers(self):
""" Returns a generator of objects that have the blobSum and v1Compatibility keys in them,
starting from the base image and working toward the leaf node.
"""
for blob_sum_obj, history_obj in reversed(zip(self._parsed[_FS_LAYERS_KEY],
self._parsed[_HISTORY_KEY])):
try:
image_digest = digest_tools.Digest.parse_digest(blob_sum_obj[_BLOB_SUM_KEY])
except digest_tools.InvalidDigestException:
err_message = 'could not parse manifest digest: %s' % blob_sum_obj[_BLOB_SUM_KEY]
raise ManifestInvalid(detail={'message': err_message})
metadata_string = history_obj[_V1_COMPAT_KEY]
v1_metadata = json.loads(metadata_string)
command_list = v1_metadata.get('container_config', {}).get('Cmd', None)
command = json.dumps(command_list) if command_list else None
if not 'id' in v1_metadata:
raise ManifestInvalid(detail={'message': 'invalid manifest v1 history'})
labels = v1_metadata.get('config', {}).get('Labels', {}) or {}
extracted = ExtractedV1Metadata(v1_metadata['id'], v1_metadata.get('parent'),
v1_metadata.get('created'), v1_metadata.get('comment'),
command, labels)
yield ImageMetadata(image_digest, extracted, metadata_string)
@property
def payload(self):
protected = str(self._signatures[0][_PROTECTED_KEY])
parsed_protected = json.loads(jwt.utils.base64url_decode(protected))
signed_content_head = self._bytes[:parsed_protected[_FORMAT_LENGTH_KEY]]
signed_content_tail = jwt.utils.base64url_decode(str(parsed_protected[_FORMAT_TAIL_KEY]))
return signed_content_head + signed_content_tail
class SignedManifestBuilder(object):
""" Class which represents a manifest which is currently being built.
"""
def __init__(self, namespace_name, repo_name, tag, architecture='amd64', schema_ver=1):
repo_name_key = '{0}/{1}'.format(namespace_name, repo_name)
if namespace_name == '':
repo_name_key = repo_name
self._base_payload = {
_REPO_TAG_KEY: tag,
_REPO_NAME_KEY: repo_name_key,
_ARCH_KEY: architecture,
_SCHEMA_VER: schema_ver,
}
self._fs_layer_digests = []
self._history = []
def add_layer(self, layer_digest, v1_json_metadata):
self._fs_layer_digests.append({
_BLOB_SUM_KEY: layer_digest,
})
self._history.append({
_V1_COMPAT_KEY: v1_json_metadata,
})
return self
def build(self, json_web_key):
""" Build the payload and sign it, returning a SignedManifest object.
"""
payload = OrderedDict(self._base_payload)
payload.update({
_HISTORY_KEY: self._history,
_FS_LAYERS_KEY: self._fs_layer_digests,
})
payload_str = json.dumps(payload, indent=3)
split_point = payload_str.rfind('\n}')
protected_payload = {
'formatTail': jwt.utils.base64url_encode(payload_str[split_point:]),
'formatLength': split_point,
'time': datetime.utcnow().strftime(ISO_DATETIME_FORMAT_ZULU),
}
protected = jwt.utils.base64url_encode(json.dumps(protected_payload))
logger.debug('Generated protected block: %s', protected)
bytes_to_sign = '{0}.{1}'.format(protected, jwt.utils.base64url_encode(payload_str))
signer = SIGNER_ALGS[JWS_ALGORITHM]
signature = jwt.utils.base64url_encode(signer.sign(bytes_to_sign, json_web_key.get_key()))
logger.debug('Generated signature: %s', signature)
public_members = set(json_web_key.public_members)
public_key = {comp: value for comp, value in json_web_key.to_dict().items()
if comp in public_members}
signature_block = {
'header': {
'jwk': public_key,
'alg': JWS_ALGORITHM,
},
'signature': signature,
_PROTECTED_KEY: protected,
}
logger.debug('Encoded signature block: %s', json.dumps(signature_block))
payload.update({
_SIGNATURES_KEY: [signature_block],
})
return SignedManifest(json.dumps(payload, indent=3))
@v2_bp.route(MANIFEST_TAGNAME_ROUTE, methods=['GET'])
@parse_repository_name()
@process_registry_jwt_auth(scopes=['pull'])
@require_repo_read
@anon_protect
def fetch_manifest_by_tagname(namespace_name, repo_name, manifest_ref):
try:
manifest = model.tag.load_tag_manifest(namespace_name, repo_name, manifest_ref)
except model.InvalidManifestException:
try:
model.tag.get_active_tag(namespace_name, repo_name, manifest_ref)
except RepositoryTag.DoesNotExist:
raise ManifestUnknown()
try:
manifest = _generate_and_store_manifest(namespace_name, repo_name, manifest_ref)
except model.DataModelException:
logger.exception('Exception when generating manifest for %s/%s:%s', namespace_name, repo_name,
manifest_ref)
raise ManifestUnknown()
repo = model.repository.get_repository(namespace_name, repo_name)
if repo is not None:
track_and_log('pull_repo', repo, analytics_name='pull_repo_100x', analytics_sample=0.01)
response = make_response(manifest.json_data, 200)
response.headers['Content-Type'] = MANIFEST_CONTENT_TYPE
response.headers['Docker-Content-Digest'] = manifest.digest
return response
@v2_bp.route(MANIFEST_DIGEST_ROUTE, methods=['GET'])
@parse_repository_name()
@process_registry_jwt_auth(scopes=['pull'])
@require_repo_read
@anon_protect
def fetch_manifest_by_digest(namespace_name, repo_name, manifest_ref):
try:
manifest = model.tag.load_manifest_by_digest(namespace_name, repo_name, manifest_ref)
except model.InvalidManifestException:
# Without a tag name to reference, we can't make an attempt to generate the manifest
raise ManifestUnknown()
repo = model.repository.get_repository(namespace_name, repo_name)
if repo is not None:
track_and_log('pull_repo', repo)
response = make_response(manifest.json_data, 200)
response.headers['Content-Type'] = MANIFEST_CONTENT_TYPE
response.headers['Docker-Content-Digest'] = manifest.digest
return response
def _reject_manifest2_schema2(func):
@wraps(func)
def wrapped(*args, **kwargs):
if request.content_type in MANIFEST2_SCHEMA2_CONTENT_TYPES:
raise ManifestInvalid(detail={'message': 'manifest schema version not supported'},
http_status_code=415)
return func(*args, **kwargs)
return wrapped
@v2_bp.route(MANIFEST_TAGNAME_ROUTE, methods=['PUT'])
@_reject_manifest2_schema2
@parse_repository_name()
@process_registry_jwt_auth(scopes=['pull', 'push'])
@require_repo_write
@anon_protect
def write_manifest_by_tagname(namespace_name, repo_name, manifest_ref):
try:
manifest = SignedManifest(request.data)
except ValueError:
raise ManifestInvalid(detail={'message': 'could not parse manifest'})
if manifest.tag != manifest_ref:
raise TagInvalid()
return _write_manifest(namespace_name, repo_name, manifest)
@v2_bp.route(MANIFEST_DIGEST_ROUTE, methods=['PUT'])
@_reject_manifest2_schema2
@parse_repository_name()
@process_registry_jwt_auth(scopes=['pull', 'push'])
@require_repo_write
@anon_protect
def write_manifest_by_digest(namespace_name, repo_name, manifest_ref):
try:
manifest = SignedManifest(request.data)
except ValueError:
raise ManifestInvalid(detail={'message': 'could not parse manifest'})
if manifest.digest != manifest_ref:
raise ManifestInvalid(detail={'message': 'manifest digest mismatch'})
return _write_manifest(namespace_name, repo_name, manifest)
def _updated_v1_metadata(v1_metadata_json, updated_id_map):
parsed = json.loads(v1_metadata_json)
parsed['id'] = updated_id_map[parsed['id']]
if parsed.get('parent') and parsed['parent'] in updated_id_map:
parsed['parent'] = updated_id_map[parsed['parent']]
if parsed.get('container_config', {}).get('Image'):
existing_image = parsed['container_config']['Image']
if existing_image in updated_id_map:
parsed['container_config']['image'] = updated_id_map[existing_image]
return json.dumps(parsed)
def _write_manifest_itself(namespace_name, repo_name, manifest):
# Ensure that the manifest is for this repository. If the manifest's namespace is empty, then
# it is for the library namespace and we need an extra check.
if (manifest.namespace == '' and features.LIBRARY_SUPPORT and
namespace_name == app.config['LIBRARY_NAMESPACE']):
# This is a library manifest. All good.
pass
elif manifest.namespace != namespace_name:
raise NameInvalid()
if manifest.repo_name != repo_name:
raise NameInvalid()
# Ensure that the repository exists.
repo = model.repository.get_repository(namespace_name, repo_name)
if repo is None:
raise NameInvalid()
# Lookup all the images and their parent images (if any) inside the manifest. This will let us
# know which V1 images we need to synthesize and which ones are invalid.
layers = list(manifest.layers)
docker_image_ids = {mdata.v1_metadata.docker_id for mdata in layers}
parent_image_ids = {mdata.v1_metadata.parent for mdata in layers
if mdata.v1_metadata.parent}
all_image_ids = list(docker_image_ids | parent_image_ids)
images_query = model.image.lookup_repository_images(repo, all_image_ids)
images_map = {image.docker_image_id: image for image in images_query}
# Lookup the storages associated with each blob in the manifest.
checksums = list({str(mdata.digest) for mdata in manifest.layers})
storage_query = model.storage.lookup_repo_storages_by_content_checksum(repo, checksums)
storage_map = {storage.content_checksum: storage for storage in storage_query}
# Ensure that we have valid V1 docker IDs. If Docker gives us a V1 layer ID pointing to
# a storage with a content checksum different from the existing, then we need to rewrite
# the Docker ID to ensure consistency.
tag_name = manifest.tag
has_rewritten_ids = False
updated_id_map = {}
# Synthesized image id hash. Can be used to pull a "content addressable" image id out of thin air.
digest_history = hashlib.sha256()
for mdata in layers:
digest_str = str(mdata.digest)
v1_mdata = mdata.v1_metadata
working_docker_id = v1_mdata.docker_id
# Update our digest_history hash for the new layer data.
digest_history.update(digest_str)
digest_history.update("@")
digest_history.update(mdata.v1_metadata_str.encode('utf-8'))
digest_history.update("|")
# Ensure that all blobs exist.
blob_storage = storage_map.get(digest_str)
if blob_storage is None:
raise BlobUnknown(detail={'digest': digest_str})
# Ensure that the V1 image's storage matches the V2 blob. If not, we've found
# a data inconsistency and need to create a new layer ID for the V1 image, and all images
# that follow it in the ancestry chain.
if ((v1_mdata.docker_id in images_map and
images_map[v1_mdata.docker_id].storage.content_checksum != digest_str) or
has_rewritten_ids):
working_docker_id = digest_history.hexdigest()
logger.warning('Rewriting docker_id %s/%s %s -> %s', namespace_name, repo_name,
v1_mdata.docker_id, working_docker_id)
has_rewritten_ids = True
# Store the new docker id in the map
updated_id_map[v1_mdata.docker_id] = working_docker_id
# Lookup the parent image for the layer, if any.
parent_image = None
if v1_mdata.parent is not None:
parent_image = images_map.get(v1_mdata.parent)
if parent_image is None:
msg = 'Parent not found with docker image id {0}'.format(v1_mdata.parent)
raise ManifestInvalid(detail={'message': msg})
# Synthesize and store the v1 metadata in the db.
v1_metadata_json = mdata.v1_metadata_str
if has_rewritten_ids:
v1_metadata_json = _updated_v1_metadata(mdata.v1_metadata_str, updated_id_map)
image = model.image.synthesize_v1_image(repo, blob_storage, working_docker_id,
v1_mdata.created, v1_mdata.comment, v1_mdata.command,
v1_metadata_json, parent_image)
images_map[v1_mdata.docker_id] = image
if not layers:
# The manifest doesn't actually reference any layers!
raise ManifestInvalid(detail={'message': 'manifest does not reference any layers'})
# Store the manifest pointing to the tag.
manifest_digest = manifest.digest
leaf_layer_id = images_map[layers[-1].v1_metadata.docker_id].docker_image_id
try:
tag_manifest, manifest_created = model.tag.store_tag_manifest(namespace_name, repo_name,
tag_name, leaf_layer_id,
manifest_digest, manifest.bytes)
except TagAlreadyCreatedException:
logger.warning('Tag %s was already created under repository %s/%s pointing to image %s',
tag_name, namespace_name, repo_name, leaf_layer_id)
raise TagAlreadyExists()
if manifest_created:
for key, value in layers[-1].v1_metadata.labels.iteritems():
model.label.create_manifest_label(tag_manifest, key, value, 'manifest')
# Queue all blob manifests for replication.
# TODO(jschorr): Find a way to optimize this insertion.
if features.STORAGE_REPLICATION:
for mdata in layers:
digest_str = str(mdata.digest)
blob_storage = storage_map.get(digest_str)
queue_storage_replication(namespace_name, blob_storage)
return (repo, tag_name, manifest_digest)
def _write_manifest(namespace_name, repo_name, manifest):
(repo, tag_name, manifest_digest) = _write_manifest_itself(namespace_name, repo_name, manifest)
# Spawn the repo_push event.
event_data = {
'updated_tags': [tag_name],
}
track_and_log('push_repo', repo, tag=tag_name)
spawn_notification(repo, 'repo_push', event_data)
response = make_response('OK', 202)
response.headers['Docker-Content-Digest'] = manifest_digest
response.headers['Location'] = url_for('v2.fetch_manifest_by_digest',
repository='%s/%s' % (namespace_name, repo_name),
manifest_ref=manifest_digest)
return response
@v2_bp.route(MANIFEST_DIGEST_ROUTE, methods=['DELETE'])
@parse_repository_name()
@process_registry_jwt_auth(scopes=['pull', 'push'])
@require_repo_write
@anon_protect
def delete_manifest_by_digest(namespace_name, repo_name, manifest_ref):
""" Delete the manifest specified by the digest. Note: there is no equivalent
method for deleting by tag name because it is forbidden by the spec.
"""
try:
manifest = model.tag.load_manifest_by_digest(namespace_name, repo_name, manifest_ref)
except model.InvalidManifestException:
# Without a tag name to reference, we can't make an attempt to generate the manifest
raise ManifestUnknown()
# Mark the tag as no longer alive.
try:
model.tag.delete_tag(namespace_name, repo_name, manifest.tag.name)
except model.DataModelException:
# Tag is not alive.
raise ManifestUnknown()
track_and_log('delete_tag', manifest.tag.repository,
tag=manifest.tag.name, digest=manifest_ref)
return make_response('', 202)
def _generate_and_store_manifest(namespace_name, repo_name, tag_name):
# First look up the tag object and its ancestors
image = model.tag.get_tag_image(namespace_name, repo_name, tag_name, include_storage=True)
parents = model.image.get_parent_images(namespace_name, repo_name, image)
# If the manifest is being generated under the library namespace, then we make its namespace
# empty.
manifest_namespace = namespace_name
if features.LIBRARY_SUPPORT and namespace_name == app.config['LIBRARY_NAMESPACE']:
manifest_namespace = ''
# Create and populate the manifest builder
builder = SignedManifestBuilder(manifest_namespace, repo_name, tag_name)
# Add the leaf layer
builder.add_layer(image.storage.content_checksum, image.v1_json_metadata)
for parent in parents:
builder.add_layer(parent.storage.content_checksum, parent.v1_json_metadata)
# Sign the manifest with our signing key.
manifest = builder.build(docker_v2_signing_key)
# Write the manifest to the DB. If an existing manifest already exists, return the
# one found.
try:
return model.tag.associate_generated_tag_manifest(namespace_name, repo_name, tag_name,
manifest.digest, manifest.bytes)
except IntegrityError as ie:
logger.debug('Got integrity error: %s', ie)
try:
return model.tag.load_tag_manifest(namespace_name, repo_name, tag_name)
except model.InvalidManifestException:
logger.exception('Exception when generating manifest')
raise model.DataModelException('Could not load or generate manifest')