540 lines
19 KiB
Python
540 lines
19 KiB
Python
import logging
|
|
import json
|
|
import hashlib
|
|
|
|
from collections import namedtuple, OrderedDict
|
|
from datetime import datetime
|
|
from functools import wraps
|
|
|
|
import jwt.utils
|
|
|
|
from peewee import IntegrityError
|
|
from flask import make_response, request, url_for
|
|
from jwkest.jws import SIGNER_ALGS, keyrep
|
|
|
|
import features
|
|
|
|
from app import docker_v2_signing_key, app
|
|
from auth.registry_jwt_auth import process_registry_jwt_auth
|
|
from endpoints.common import parse_repository_name
|
|
from endpoints.decorators import anon_protect
|
|
from endpoints.v2 import v2_bp, require_repo_read, require_repo_write
|
|
from endpoints.v2.errors import (BlobUnknown, ManifestInvalid, ManifestUnknown, TagInvalid,
|
|
NameInvalid)
|
|
from endpoints.trackhelper import track_and_log
|
|
from endpoints.notificationhelper import spawn_notification
|
|
from digest import digest_tools
|
|
from data import model
|
|
from data.database import RepositoryTag
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
VALID_TAG_PATTERN = r'[\w][\w.-]{0,127}'
|
|
|
|
BASE_MANIFEST_ROUTE = '/<repopath:repository>/manifests/<regex("{0}"):manifest_ref>'
|
|
MANIFEST_DIGEST_ROUTE = BASE_MANIFEST_ROUTE.format(digest_tools.DIGEST_PATTERN)
|
|
MANIFEST_TAGNAME_ROUTE = BASE_MANIFEST_ROUTE.format(VALID_TAG_PATTERN)
|
|
|
|
# From: https://github.com/docker/distribution/blob/47a064d4195a9b56133891bbb13620c3ac83a827/manifest/schema1/manifest.go#L18
|
|
MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v1+prettyjws'
|
|
MANIFEST2_SCHEMA2_CONTENT_TYPES = ['application/vnd.docker.distribution.manifest.v2+json',
|
|
'application/vnd.docker.distribution.manifest.list.v2+json']
|
|
|
|
ISO_DATETIME_FORMAT_ZULU = '%Y-%m-%dT%H:%M:%SZ'
|
|
JWS_ALGORITHM = 'RS256'
|
|
|
|
|
|
ImageMetadata = namedtuple('ImageMetadata', ['digest', 'v1_metadata', 'v1_metadata_str'])
|
|
ExtractedV1Metadata = namedtuple('ExtractedV1Metadata', ['docker_id', 'parent', 'created',
|
|
'comment', 'command'])
|
|
|
|
|
|
_SIGNATURES_KEY = 'signatures'
|
|
_PROTECTED_KEY = 'protected'
|
|
_FORMAT_LENGTH_KEY = 'formatLength'
|
|
_FORMAT_TAIL_KEY = 'formatTail'
|
|
_REPO_NAME_KEY = 'name'
|
|
_REPO_TAG_KEY = 'tag'
|
|
_FS_LAYERS_KEY = 'fsLayers'
|
|
_HISTORY_KEY = 'history'
|
|
_BLOB_SUM_KEY = 'blobSum'
|
|
_V1_COMPAT_KEY = 'v1Compatibility'
|
|
_ARCH_KEY = 'architecture'
|
|
_SCHEMA_VER = 'schemaVersion'
|
|
|
|
|
|
class SignedManifest(object):
|
|
|
|
def __init__(self, manifest_bytes, validate=True):
|
|
self._bytes = manifest_bytes
|
|
|
|
self._parsed = json.loads(manifest_bytes)
|
|
self._signatures = self._parsed[_SIGNATURES_KEY]
|
|
self._tag = self._parsed[_REPO_TAG_KEY]
|
|
|
|
repo_name_tuple = self._parsed[_REPO_NAME_KEY].split('/')
|
|
if len(repo_name_tuple) > 1:
|
|
self._namespace, self._repo_name = repo_name_tuple
|
|
elif len(repo_name_tuple) == 1:
|
|
self._namespace = ''
|
|
self._repo_name = repo_name_tuple[0]
|
|
else:
|
|
raise ValueError('repo_name has too many or too few pieces')
|
|
|
|
if validate:
|
|
self._validate()
|
|
|
|
def _validate(self):
|
|
for signature in self._signatures:
|
|
bytes_to_verify = '{0}.{1}'.format(signature['protected'],
|
|
jwt.utils.base64url_encode(self.payload))
|
|
signer = SIGNER_ALGS[signature['header']['alg']]
|
|
key = keyrep(signature['header']['jwk'])
|
|
gk = key.get_key()
|
|
sig = jwt.utils.base64url_decode(signature['signature'].encode('utf-8'))
|
|
verified = signer.verify(bytes_to_verify, sig, gk)
|
|
if not verified:
|
|
raise ValueError('manifest file failed signature verification')
|
|
|
|
@property
|
|
def signatures(self):
|
|
return self._signatures
|
|
|
|
@property
|
|
def namespace(self):
|
|
return self._namespace
|
|
|
|
@property
|
|
def repo_name(self):
|
|
return self._repo_name
|
|
|
|
@property
|
|
def tag(self):
|
|
return self._tag
|
|
|
|
@property
|
|
def bytes(self):
|
|
return self._bytes
|
|
|
|
@property
|
|
def digest(self):
|
|
return digest_tools.sha256_digest(self.payload)
|
|
|
|
@property
|
|
def layers(self):
|
|
""" Returns a generator of objects that have the blobSum and v1Compatibility keys in them,
|
|
starting from the base image and working toward the leaf node.
|
|
"""
|
|
for blob_sum_obj, history_obj in reversed(zip(self._parsed[_FS_LAYERS_KEY],
|
|
self._parsed[_HISTORY_KEY])):
|
|
|
|
try:
|
|
image_digest = digest_tools.Digest.parse_digest(blob_sum_obj[_BLOB_SUM_KEY])
|
|
except digest_tools.InvalidDigestException:
|
|
err_message = 'could not parse manifest digest: %s' % blob_sum_obj[_BLOB_SUM_KEY]
|
|
raise ManifestInvalid(detail={'message': err_message})
|
|
|
|
metadata_string = history_obj[_V1_COMPAT_KEY]
|
|
|
|
v1_metadata = json.loads(metadata_string)
|
|
command_list = v1_metadata.get('container_config', {}).get('Cmd', None)
|
|
command = json.dumps(command_list) if command_list else None
|
|
|
|
if not 'id' in v1_metadata:
|
|
raise ManifestInvalid(detail={'message': 'invalid manifest v1 history'})
|
|
|
|
extracted = ExtractedV1Metadata(v1_metadata['id'], v1_metadata.get('parent'),
|
|
v1_metadata.get('created'), v1_metadata.get('comment'),
|
|
command)
|
|
yield ImageMetadata(image_digest, extracted, metadata_string)
|
|
|
|
@property
|
|
def payload(self):
|
|
protected = str(self._signatures[0][_PROTECTED_KEY])
|
|
parsed_protected = json.loads(jwt.utils.base64url_decode(protected))
|
|
signed_content_head = self._bytes[:parsed_protected[_FORMAT_LENGTH_KEY]]
|
|
signed_content_tail = jwt.utils.base64url_decode(str(parsed_protected[_FORMAT_TAIL_KEY]))
|
|
return signed_content_head + signed_content_tail
|
|
|
|
|
|
class SignedManifestBuilder(object):
|
|
""" Class which represents a manifest which is currently being built.
|
|
"""
|
|
def __init__(self, namespace_name, repo_name, tag, architecture='amd64', schema_ver=1):
|
|
repo_name_key = '{0}/{1}'.format(namespace_name, repo_name)
|
|
if namespace_name == '':
|
|
repo_name_key = repo_name
|
|
|
|
self._base_payload = {
|
|
_REPO_TAG_KEY: tag,
|
|
_REPO_NAME_KEY: repo_name_key,
|
|
_ARCH_KEY: architecture,
|
|
_SCHEMA_VER: schema_ver,
|
|
}
|
|
|
|
self._fs_layer_digests = []
|
|
self._history = []
|
|
|
|
def add_layer(self, layer_digest, v1_json_metadata):
|
|
self._fs_layer_digests.append({
|
|
_BLOB_SUM_KEY: layer_digest,
|
|
})
|
|
self._history.append({
|
|
_V1_COMPAT_KEY: v1_json_metadata,
|
|
})
|
|
return self
|
|
|
|
|
|
def build(self, json_web_key):
|
|
""" Build the payload and sign it, returning a SignedManifest object.
|
|
"""
|
|
payload = OrderedDict(self._base_payload)
|
|
payload.update({
|
|
_HISTORY_KEY: self._history,
|
|
_FS_LAYERS_KEY: self._fs_layer_digests,
|
|
})
|
|
|
|
payload_str = json.dumps(payload, indent=3)
|
|
|
|
split_point = payload_str.rfind('\n}')
|
|
|
|
protected_payload = {
|
|
'formatTail': jwt.utils.base64url_encode(payload_str[split_point:]),
|
|
'formatLength': split_point,
|
|
'time': datetime.utcnow().strftime(ISO_DATETIME_FORMAT_ZULU),
|
|
}
|
|
protected = jwt.utils.base64url_encode(json.dumps(protected_payload))
|
|
logger.debug('Generated protected block: %s', protected)
|
|
|
|
bytes_to_sign = '{0}.{1}'.format(protected, jwt.utils.base64url_encode(payload_str))
|
|
|
|
signer = SIGNER_ALGS[JWS_ALGORITHM]
|
|
signature = jwt.utils.base64url_encode(signer.sign(bytes_to_sign, json_web_key.get_key()))
|
|
logger.debug('Generated signature: %s', signature)
|
|
|
|
public_members = set(json_web_key.public_members)
|
|
public_key = {comp: value for comp, value in json_web_key.to_dict().items()
|
|
if comp in public_members}
|
|
|
|
signature_block = {
|
|
'header': {
|
|
'jwk': public_key,
|
|
'alg': JWS_ALGORITHM,
|
|
},
|
|
'signature': signature,
|
|
_PROTECTED_KEY: protected,
|
|
}
|
|
|
|
logger.debug('Encoded signature block: %s', json.dumps(signature_block))
|
|
|
|
payload.update({
|
|
_SIGNATURES_KEY: [signature_block],
|
|
})
|
|
|
|
return SignedManifest(json.dumps(payload, indent=3))
|
|
|
|
|
|
@v2_bp.route(MANIFEST_TAGNAME_ROUTE, methods=['GET'])
|
|
@parse_repository_name()
|
|
@process_registry_jwt_auth(scopes=['pull'])
|
|
@require_repo_read
|
|
@anon_protect
|
|
def fetch_manifest_by_tagname(namespace_name, repo_name, manifest_ref):
|
|
try:
|
|
manifest = model.tag.load_tag_manifest(namespace_name, repo_name, manifest_ref)
|
|
except model.InvalidManifestException:
|
|
try:
|
|
model.tag.get_active_tag(namespace_name, repo_name, manifest_ref)
|
|
except RepositoryTag.DoesNotExist:
|
|
raise ManifestUnknown()
|
|
|
|
try:
|
|
manifest = _generate_and_store_manifest(namespace_name, repo_name, manifest_ref)
|
|
except model.DataModelException:
|
|
logger.exception('Exception when generating manifest for %s/%s:%s', namespace_name, repo_name,
|
|
manifest_ref)
|
|
raise ManifestUnknown()
|
|
|
|
repo = model.repository.get_repository(namespace_name, repo_name)
|
|
if repo is not None:
|
|
track_and_log('pull_repo', repo, analytics_name='pull_repo_100x', analytics_sample=0.01)
|
|
|
|
response = make_response(manifest.json_data, 200)
|
|
response.headers['Content-Type'] = MANIFEST_CONTENT_TYPE
|
|
response.headers['Docker-Content-Digest'] = manifest.digest
|
|
return response
|
|
|
|
|
|
@v2_bp.route(MANIFEST_DIGEST_ROUTE, methods=['GET'])
|
|
@parse_repository_name()
|
|
@process_registry_jwt_auth(scopes=['pull'])
|
|
@require_repo_read
|
|
@anon_protect
|
|
def fetch_manifest_by_digest(namespace_name, repo_name, manifest_ref):
|
|
try:
|
|
manifest = model.tag.load_manifest_by_digest(namespace_name, repo_name, manifest_ref)
|
|
except model.InvalidManifestException:
|
|
# Without a tag name to reference, we can't make an attempt to generate the manifest
|
|
raise ManifestUnknown()
|
|
|
|
repo = model.repository.get_repository(namespace_name, repo_name)
|
|
if repo is not None:
|
|
track_and_log('pull_repo', repo)
|
|
|
|
response = make_response(manifest.json_data, 200)
|
|
response.headers['Content-Type'] = MANIFEST_CONTENT_TYPE
|
|
response.headers['Docker-Content-Digest'] = manifest.digest
|
|
return response
|
|
|
|
|
|
def _reject_manifest2_schema2(func):
|
|
@wraps(func)
|
|
def wrapped(*args, **kwargs):
|
|
if request.content_type in MANIFEST2_SCHEMA2_CONTENT_TYPES:
|
|
raise ManifestInvalid(detail={'message': 'manifest schema version not supported'},
|
|
http_status_code=415)
|
|
return func(*args, **kwargs)
|
|
return wrapped
|
|
|
|
|
|
@v2_bp.route(MANIFEST_TAGNAME_ROUTE, methods=['PUT'])
|
|
@parse_repository_name()
|
|
@process_registry_jwt_auth(scopes=['pull', 'push'])
|
|
@require_repo_write
|
|
@anon_protect
|
|
@_reject_manifest2_schema2
|
|
def write_manifest_by_tagname(namespace_name, repo_name, manifest_ref):
|
|
try:
|
|
manifest = SignedManifest(request.data)
|
|
except ValueError:
|
|
raise ManifestInvalid(detail={'message': 'could not parse manifest'})
|
|
|
|
if manifest.tag != manifest_ref:
|
|
raise TagInvalid()
|
|
|
|
return _write_manifest(namespace_name, repo_name, manifest)
|
|
|
|
|
|
@v2_bp.route(MANIFEST_DIGEST_ROUTE, methods=['PUT'])
|
|
@parse_repository_name()
|
|
@process_registry_jwt_auth(scopes=['pull', 'push'])
|
|
@require_repo_write
|
|
@anon_protect
|
|
@_reject_manifest2_schema2
|
|
def write_manifest_by_digest(namespace_name, repo_name, manifest_ref):
|
|
try:
|
|
manifest = SignedManifest(request.data)
|
|
except ValueError:
|
|
raise ManifestInvalid(detail={'message': 'could not parse manifest'})
|
|
|
|
if manifest.digest != manifest_ref:
|
|
raise ManifestInvalid(detail={'message': 'manifest digest mismatch'})
|
|
|
|
return _write_manifest(namespace_name, repo_name, manifest)
|
|
|
|
|
|
def _updated_v1_metadata(v1_metadata_json, updated_id_map):
|
|
parsed = json.loads(v1_metadata_json)
|
|
parsed['id'] = updated_id_map[parsed['id']]
|
|
|
|
if parsed.get('parent') and parsed['parent'] in updated_id_map:
|
|
parsed['parent'] = updated_id_map[parsed['parent']]
|
|
|
|
if parsed.get('container_config', {}).get('Image'):
|
|
existing_image = parsed['container_config']['Image']
|
|
if existing_image in updated_id_map:
|
|
parsed['container_config']['image'] = updated_id_map[existing_image]
|
|
|
|
return json.dumps(parsed)
|
|
|
|
|
|
def _write_manifest_itself(namespace_name, repo_name, manifest):
|
|
# Ensure that the manifest is for this repository. If the manifest's namespace is empty, then
|
|
# it is for the library namespace and we need an extra check.
|
|
if (manifest.namespace == '' and features.LIBRARY_SUPPORT and
|
|
namespace_name == app.config['LIBRARY_NAMESPACE']):
|
|
# This is a library manifest. All good.
|
|
pass
|
|
elif manifest.namespace != namespace_name:
|
|
raise NameInvalid()
|
|
|
|
if manifest.repo_name != repo_name:
|
|
raise NameInvalid()
|
|
|
|
# Ensure that the repository exists.
|
|
repo = model.repository.get_repository(namespace_name, repo_name)
|
|
if repo is None:
|
|
raise NameInvalid()
|
|
|
|
# Lookup all the images and their parent images (if any) inside the manifest. This will let us
|
|
# know which V1 images we need to synthesize and which ones are invalid.
|
|
layers = list(manifest.layers)
|
|
|
|
docker_image_ids = {mdata.v1_metadata.docker_id for mdata in layers}
|
|
parent_image_ids = {mdata.v1_metadata.parent for mdata in layers
|
|
if mdata.v1_metadata.parent}
|
|
all_image_ids = list(docker_image_ids | parent_image_ids)
|
|
|
|
images_query = model.image.lookup_repository_images(repo, all_image_ids)
|
|
images_map = {image.docker_image_id: image for image in images_query}
|
|
|
|
# Lookup the storages associated with each blob in the manifest.
|
|
checksums = list({str(mdata.digest) for mdata in manifest.layers})
|
|
storage_query = model.storage.lookup_repo_storages_by_content_checksum(repo, checksums)
|
|
storage_map = {storage.content_checksum: storage for storage in storage_query}
|
|
|
|
# Ensure that we have valid V1 docker IDs. If Docker gives us a V1 layer ID pointing to
|
|
# a storage with a content checksum different from the existing, then we need to rewrite
|
|
# the Docker ID to ensure consistency.
|
|
tag_name = manifest.tag
|
|
has_rewritten_ids = False
|
|
updated_id_map = {}
|
|
|
|
# Synthesized image id hash. Can be used to pull a "content addressable" image id out of thin air.
|
|
digest_history = hashlib.sha256()
|
|
|
|
for mdata in layers:
|
|
digest_str = str(mdata.digest)
|
|
v1_mdata = mdata.v1_metadata
|
|
working_docker_id = v1_mdata.docker_id
|
|
|
|
# Update our digest_history hash for the new layer data.
|
|
digest_history.update(digest_str)
|
|
digest_history.update("@")
|
|
digest_history.update(mdata.v1_metadata_str.encode('utf-8'))
|
|
digest_history.update("|")
|
|
|
|
# Ensure that all blobs exist.
|
|
blob_storage = storage_map.get(digest_str)
|
|
if blob_storage is None:
|
|
raise BlobUnknown(detail={'digest': digest_str})
|
|
|
|
# Ensure that the V1 image's storage matches the V2 blob. If not, we've found
|
|
# a data inconsistency and need to create a new layer ID for the V1 image, and all images
|
|
# that follow it in the ancestry chain.
|
|
if ((v1_mdata.docker_id in images_map and
|
|
images_map[v1_mdata.docker_id].storage.content_checksum != digest_str) or
|
|
has_rewritten_ids):
|
|
|
|
working_docker_id = digest_history.hexdigest()
|
|
logger.warning('Rewriting docker_id %s/%s %s -> %s', namespace_name, repo_name,
|
|
v1_mdata.docker_id, working_docker_id)
|
|
has_rewritten_ids = True
|
|
|
|
# Store the new docker id in the map
|
|
updated_id_map[v1_mdata.docker_id] = working_docker_id
|
|
|
|
# Lookup the parent image for the layer, if any.
|
|
parent_image = None
|
|
if v1_mdata.parent is not None:
|
|
parent_image = images_map.get(v1_mdata.parent)
|
|
if parent_image is None:
|
|
msg = 'Parent not found with docker image id {0}'.format(v1_mdata.parent)
|
|
raise ManifestInvalid(detail={'message': msg})
|
|
|
|
# Synthesize and store the v1 metadata in the db.
|
|
v1_metadata_json = mdata.v1_metadata_str
|
|
if has_rewritten_ids:
|
|
v1_metadata_json = _updated_v1_metadata(mdata.v1_metadata_str, updated_id_map)
|
|
|
|
image = model.image.synthesize_v1_image(repo, blob_storage, working_docker_id,
|
|
v1_mdata.created, v1_mdata.comment, v1_mdata.command,
|
|
v1_metadata_json, parent_image)
|
|
|
|
images_map[v1_mdata.docker_id] = image
|
|
|
|
if not layers:
|
|
# The manifest doesn't actually reference any layers!
|
|
raise ManifestInvalid(detail={'message': 'manifest does not reference any layers'})
|
|
|
|
# Store the manifest pointing to the tag.
|
|
manifest_digest = manifest.digest
|
|
leaf_layer_id = images_map[layers[-1].v1_metadata.docker_id].docker_image_id
|
|
model.tag.store_tag_manifest(namespace_name, repo_name, tag_name, leaf_layer_id, manifest_digest,
|
|
manifest.bytes)
|
|
return (repo, tag_name, manifest_digest)
|
|
|
|
|
|
def _write_manifest(namespace_name, repo_name, manifest):
|
|
(repo, tag_name, manifest_digest) = _write_manifest_itself(namespace_name, repo_name, manifest)
|
|
|
|
# Spawn the repo_push event.
|
|
event_data = {
|
|
'updated_tags': [tag_name],
|
|
}
|
|
|
|
track_and_log('push_repo', repo)
|
|
spawn_notification(repo, 'repo_push', event_data)
|
|
|
|
response = make_response('OK', 202)
|
|
response.headers['Docker-Content-Digest'] = manifest_digest
|
|
response.headers['Location'] = url_for('v2.fetch_manifest_by_digest',
|
|
repository='%s/%s' % (namespace_name, repo_name),
|
|
manifest_ref=manifest_digest)
|
|
return response
|
|
|
|
|
|
@v2_bp.route(MANIFEST_DIGEST_ROUTE, methods=['DELETE'])
|
|
@parse_repository_name()
|
|
@process_registry_jwt_auth(scopes=['pull', 'push'])
|
|
@require_repo_write
|
|
@anon_protect
|
|
def delete_manifest_by_digest(namespace_name, repo_name, manifest_ref):
|
|
""" Delete the manifest specified by the digest. Note: there is no equivalent
|
|
method for deleting by tag name because it is forbidden by the spec.
|
|
"""
|
|
try:
|
|
manifest = model.tag.load_manifest_by_digest(namespace_name, repo_name, manifest_ref)
|
|
except model.InvalidManifestException:
|
|
# Without a tag name to reference, we can't make an attempt to generate the manifest
|
|
raise ManifestUnknown()
|
|
|
|
# Mark the tag as no longer alive.
|
|
try:
|
|
model.tag.delete_tag(namespace_name, repo_name, manifest.tag.name)
|
|
except model.DataModelException:
|
|
# Tag is not alive.
|
|
raise ManifestUnknown()
|
|
|
|
track_and_log('delete_tag', manifest.tag.repository,
|
|
tag=manifest.tag.name, digest=manifest_ref)
|
|
|
|
return make_response('', 202)
|
|
|
|
|
|
def _generate_and_store_manifest(namespace_name, repo_name, tag_name):
|
|
# First look up the tag object and its ancestors
|
|
image = model.tag.get_tag_image(namespace_name, repo_name, tag_name)
|
|
parents = model.image.get_parent_images(namespace_name, repo_name, image)
|
|
|
|
# If the manifest is being generated under the library namespace, then we make its namespace
|
|
# empty.
|
|
manifest_namespace = namespace_name
|
|
if features.LIBRARY_SUPPORT and namespace_name == app.config['LIBRARY_NAMESPACE']:
|
|
manifest_namespace = ''
|
|
|
|
# Create and populate the manifest builder
|
|
builder = SignedManifestBuilder(manifest_namespace, repo_name, tag_name)
|
|
|
|
# Add the leaf layer
|
|
builder.add_layer(image.storage.content_checksum, image.v1_json_metadata)
|
|
|
|
for parent in parents:
|
|
builder.add_layer(parent.storage.content_checksum, parent.v1_json_metadata)
|
|
|
|
# Sign the manifest with our signing key.
|
|
manifest = builder.build(docker_v2_signing_key)
|
|
|
|
# Write the manifest to the DB. If an existing manifest already exists, return the
|
|
# one found.
|
|
try:
|
|
return model.tag.associate_generated_tag_manifest(namespace_name, repo_name, tag_name,
|
|
manifest.digest, manifest.bytes)
|
|
except IntegrityError as ie:
|
|
logger.debug('Got integrity error: %s', ie)
|
|
try:
|
|
return model.tag.load_tag_manifest(namespace_name, repo_name, tag_name)
|
|
except model.InvalidManifestException:
|
|
logger.exception('Exception when generating manifest')
|
|
raise model.DataModelException('Could not load or generate manifest')
|