358 lines
12 KiB
Python
358 lines
12 KiB
Python
import json
|
|
import hashlib
|
|
import logging
|
|
|
|
from collections import namedtuple, OrderedDict
|
|
from datetime import datetime
|
|
|
|
from jwkest.jws import SIGNER_ALGS, keyrep
|
|
from jwt.utils import base64url_encode, base64url_decode
|
|
|
|
from digest import digest_tools
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v1+prettyjws'
|
|
DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v2+json'
|
|
DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.list.v2+json'
|
|
|
|
DOCKER_SCHEMA2_CONTENT_TYPES = [DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE,
|
|
DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE]
|
|
|
|
|
|
# These are used to extract backwards compatiblity data from Docker Manifest Schema 1
|
|
ExtractedLayerMetadata = namedtuple(
|
|
'ExtractedLayerMetadata',
|
|
['digest', 'v1_metadata', 'v1_metadata_str']
|
|
)
|
|
ExtractedDockerV1Metadata = namedtuple(
|
|
'ExtractedDockerV1Metadata',
|
|
['image_id', 'parent_image_id', 'created', 'comment', 'command']
|
|
)
|
|
|
|
|
|
# Constants used for Docker Manifest Schema 2.1
|
|
_DOCKER_SCHEMA_1_SIGNATURES_KEY = 'signatures'
|
|
_DOCKER_SCHEMA_1_PROTECTED_KEY = 'protected'
|
|
_DOCKER_SCHEMA_1_FORMAT_LENGTH_KEY = 'formatLength'
|
|
_DOCKER_SCHEMA_1_FORMAT_TAIL_KEY = 'formatTail'
|
|
_DOCKER_SCHEMA_1_REPO_NAME_KEY = 'name'
|
|
_DOCKER_SCHEMA_1_REPO_TAG_KEY = 'tag'
|
|
_DOCKER_SCHEMA_1_FS_LAYERS_KEY = 'fsLayers'
|
|
_DOCKER_SCHEMA_1_HISTORY_KEY = 'history'
|
|
_DOCKER_SCHEMA_1_BLOB_SUM_KEY = 'blobSum'
|
|
_DOCKER_SCHEMA_1_V1_COMPAT_KEY = 'v1Compatibility'
|
|
_DOCKER_SCHEMA_1_ARCH_KEY = 'architecture'
|
|
_DOCKER_SCHEMA_1_SCHEMA_VER_KEY = 'schemaVersion'
|
|
_ISO_DATETIME_FORMAT_ZULU = '%Y-%m-%dT%H:%M:%SZ'
|
|
_JWS_ALGORITHM = 'RS256'
|
|
|
|
|
|
class ManifestException(Exception):
|
|
pass
|
|
|
|
|
|
class ManifestMalformed(ManifestException):
|
|
pass
|
|
|
|
|
|
class ManifestSignatureFailure(ManifestException):
|
|
pass
|
|
|
|
|
|
def _updated_v1_metadata(v1_metadata_json, updated_id_map):
|
|
parsed = json.loads(v1_metadata_json)
|
|
parsed['id'] = updated_id_map[parsed['id']]
|
|
|
|
if parsed.get('parent') and parsed['parent'] in updated_id_map:
|
|
parsed['parent'] = updated_id_map[parsed['parent']]
|
|
|
|
if parsed.get('container_config', {}).get('Image'):
|
|
existing_image = parsed['container_config']['Image']
|
|
if existing_image in updated_id_map:
|
|
parsed['container_config']['image'] = updated_id_map[existing_image]
|
|
|
|
return json.dumps(parsed)
|
|
|
|
|
|
class DockerSchema1Manifest(object):
|
|
def __init__(self, manifest_bytes, validate=True):
|
|
self._layers = None
|
|
self._bytes = manifest_bytes
|
|
|
|
self._parsed = json.loads(manifest_bytes)
|
|
self._signatures = self._parsed[_DOCKER_SCHEMA_1_SIGNATURES_KEY]
|
|
self._tag = self._parsed[_DOCKER_SCHEMA_1_REPO_TAG_KEY]
|
|
|
|
repo_name_tuple = self._parsed[_DOCKER_SCHEMA_1_REPO_NAME_KEY].split('/')
|
|
if len(repo_name_tuple) > 1:
|
|
self._namespace, self._repo_name = repo_name_tuple
|
|
elif len(repo_name_tuple) == 1:
|
|
self._namespace = ''
|
|
self._repo_name = repo_name_tuple[0]
|
|
else:
|
|
raise ManifestMalformed('malformed repository name')
|
|
|
|
if validate:
|
|
self._validate()
|
|
|
|
def _validate(self):
|
|
for signature in self._signatures:
|
|
bytes_to_verify = '{0}.{1}'.format(signature['protected'],
|
|
base64url_encode(self.payload))
|
|
signer = SIGNER_ALGS[signature['header']['alg']]
|
|
key = keyrep(signature['header']['jwk'])
|
|
gk = key.get_key()
|
|
sig = base64url_decode(signature['signature'].encode('utf-8'))
|
|
verified = signer.verify(bytes_to_verify, sig, gk)
|
|
if not verified:
|
|
raise ManifestSignatureFailure()
|
|
|
|
@property
|
|
def signatures(self):
|
|
return self._signatures
|
|
|
|
@property
|
|
def namespace(self):
|
|
return self._namespace
|
|
|
|
@property
|
|
def repo_name(self):
|
|
return self._repo_name
|
|
|
|
@property
|
|
def tag(self):
|
|
return self._tag
|
|
|
|
@property
|
|
def bytes(self):
|
|
return self._bytes
|
|
|
|
@property
|
|
def manifest_json(self):
|
|
return self._parsed
|
|
|
|
@property
|
|
def digest(self):
|
|
return digest_tools.sha256_digest(self.payload)
|
|
|
|
@property
|
|
def image_ids(self):
|
|
return {mdata.v1_metadata.image_id for mdata in self.layers}
|
|
|
|
@property
|
|
def parent_image_ids(self):
|
|
return {mdata.v1_metadata.parent_image_id for mdata in self.layers
|
|
if mdata.v1_metadata.parent_image_id}
|
|
|
|
@property
|
|
def checksums(self):
|
|
return list({str(mdata.digest) for mdata in self.layers})
|
|
|
|
@property
|
|
def layers(self):
|
|
if self._layers is None:
|
|
self._layers = list(self._generate_layers())
|
|
return self._layers
|
|
|
|
def _generate_layers(self):
|
|
""" Returns a generator of objects that have the blobSum and v1Compatibility keys in them,
|
|
starting from the base image and working toward the leaf node.
|
|
"""
|
|
for blob_sum_obj, history_obj in reversed(zip(self._parsed[_DOCKER_SCHEMA_1_FS_LAYERS_KEY],
|
|
self._parsed[_DOCKER_SCHEMA_1_HISTORY_KEY])):
|
|
|
|
try:
|
|
image_digest = digest_tools.Digest.parse_digest(blob_sum_obj[_DOCKER_SCHEMA_1_BLOB_SUM_KEY])
|
|
except digest_tools.InvalidDigestException:
|
|
raise ManifestMalformed('could not parse manifest digest: %s' %
|
|
blob_sum_obj[_DOCKER_SCHEMA_1_BLOB_SUM_KEY])
|
|
|
|
metadata_string = history_obj[_DOCKER_SCHEMA_1_V1_COMPAT_KEY]
|
|
|
|
v1_metadata = json.loads(metadata_string)
|
|
command_list = v1_metadata.get('container_config', {}).get('Cmd', None)
|
|
command = json.dumps(command_list) if command_list else None
|
|
|
|
if not 'id' in v1_metadata:
|
|
raise ManifestMalformed('invalid manifest v1 history')
|
|
|
|
extracted = ExtractedDockerV1Metadata(v1_metadata['id'], v1_metadata.get('parent'),
|
|
v1_metadata.get('created'), v1_metadata.get('comment'),
|
|
command)
|
|
yield ExtractedLayerMetadata(image_digest, extracted, metadata_string)
|
|
|
|
@property
|
|
def payload(self):
|
|
protected = str(self._signatures[0][_DOCKER_SCHEMA_1_PROTECTED_KEY])
|
|
parsed_protected = json.loads(base64url_decode(protected))
|
|
signed_content_head = self._bytes[:parsed_protected[_DOCKER_SCHEMA_1_FORMAT_LENGTH_KEY]]
|
|
signed_content_tail = base64url_decode(str(parsed_protected[_DOCKER_SCHEMA_1_FORMAT_TAIL_KEY]))
|
|
return signed_content_head + signed_content_tail
|
|
|
|
def rewrite_invalid_image_ids(self, images_map):
|
|
"""
|
|
Rewrites Docker v1 image IDs and returns a generator of DockerV1Metadata.
|
|
|
|
If Docker gives us a layer with a v1 image ID that already points to existing
|
|
content, but the checksums don't match, then we need to rewrite the image ID
|
|
to something new in order to ensure consistency.
|
|
"""
|
|
# used to synthesize a new "content addressable" image id
|
|
digest_history = hashlib.sha256()
|
|
|
|
has_rewritten_ids = False
|
|
updated_id_map = {}
|
|
for extracted_layer_metadata in self.layers:
|
|
digest_str = str(extracted_layer_metadata.digest)
|
|
extracted_v1_metadata = extracted_layer_metadata.v1_metadata
|
|
working_image_id = extracted_v1_metadata.image_id
|
|
|
|
# Update our digest_history hash for the new layer data.
|
|
digest_history.update(digest_str)
|
|
digest_history.update("@")
|
|
digest_history.update(extracted_layer_metadata.v1_metadata_str.encode('utf-8'))
|
|
digest_history.update("|")
|
|
|
|
# Ensure that the v1 image's storage matches the V2 blob. If not, we've
|
|
# found a data inconsistency and need to create a new layer ID for the V1
|
|
# image, and all images that follow it in the ancestry chain.
|
|
digest_mismatch = (extracted_v1_metadata.image_id in images_map and
|
|
images_map[extracted_v1_metadata.image_id].content_checksum != digest_str)
|
|
if digest_mismatch or has_rewritten_ids:
|
|
working_image_id = digest_history.hexdigest()
|
|
has_rewritten_ids = True
|
|
|
|
# Store the new docker id in the map
|
|
updated_id_map[extracted_v1_metadata.image_id] = working_image_id
|
|
|
|
# Lookup the parent image for the layer, if any.
|
|
parent_image_id = None
|
|
if extracted_v1_metadata.parent_image_id is not None:
|
|
parent_image_id = images_map.get(extracted_v1_metadata.parent_image_id, None)
|
|
if parent_image_id is None:
|
|
raise ManifestMalformed(
|
|
'Parent not found with image ID: {0}'.format(extracted_v1_metadata.parent_image_id)
|
|
)
|
|
|
|
# Synthesize and store the v1 metadata in the db.
|
|
v1_metadata_json = extracted_layer_metadata.v1_metadata_str
|
|
if has_rewritten_ids:
|
|
v1_metadata_json = _updated_v1_metadata(v1_metadata_json, updated_id_map)
|
|
|
|
yield DockerV1Metadata(
|
|
image_id=working_image_id,
|
|
created=extracted_v1_metadata.created,
|
|
comment=extracted_v1_metadata.comment,
|
|
command=extracted_v1_metadata.command,
|
|
compat_json=v1_metadata_json,
|
|
parent_image_id=parent_image_id,
|
|
)
|
|
|
|
|
|
class DockerSchema1ManifestBuilder(object):
|
|
""" Class which represents a manifest which is currently being built. """
|
|
def __init__(self, namespace_name, repo_name, tag, architecture='amd64'):
|
|
repo_name_key = '{0}/{1}'.format(namespace_name, repo_name)
|
|
if namespace_name == '':
|
|
repo_name_key = repo_name
|
|
|
|
self._base_payload = {
|
|
_DOCKER_SCHEMA_1_REPO_TAG_KEY: tag,
|
|
_DOCKER_SCHEMA_1_REPO_NAME_KEY: repo_name_key,
|
|
_DOCKER_SCHEMA_1_ARCH_KEY: architecture,
|
|
_DOCKER_SCHEMA_1_SCHEMA_VER_KEY: 1,
|
|
}
|
|
|
|
self._fs_layer_digests = []
|
|
self._history = []
|
|
|
|
def add_layer(self, layer_digest, v1_json_metadata):
|
|
self._fs_layer_digests.append({
|
|
_DOCKER_SCHEMA_1_BLOB_SUM_KEY: layer_digest,
|
|
})
|
|
self._history.append({
|
|
_DOCKER_SCHEMA_1_V1_COMPAT_KEY: v1_json_metadata,
|
|
})
|
|
return self
|
|
|
|
|
|
def build(self, json_web_key):
|
|
""" Build the payload and sign it, returning a SignedManifest object.
|
|
"""
|
|
payload = OrderedDict(self._base_payload)
|
|
payload.update({
|
|
_DOCKER_SCHEMA_1_HISTORY_KEY: self._history,
|
|
_DOCKER_SCHEMA_1_FS_LAYERS_KEY: self._fs_layer_digests,
|
|
})
|
|
|
|
payload_str = json.dumps(payload, indent=3)
|
|
|
|
split_point = payload_str.rfind('\n}')
|
|
|
|
protected_payload = {
|
|
'formatTail': base64url_encode(payload_str[split_point:]),
|
|
'formatLength': split_point,
|
|
'time': datetime.utcnow().strftime(_ISO_DATETIME_FORMAT_ZULU),
|
|
}
|
|
protected = base64url_encode(json.dumps(protected_payload))
|
|
logger.debug('Generated protected block: %s', protected)
|
|
|
|
bytes_to_sign = '{0}.{1}'.format(protected, base64url_encode(payload_str))
|
|
|
|
signer = SIGNER_ALGS[_JWS_ALGORITHM]
|
|
signature = base64url_encode(signer.sign(bytes_to_sign, json_web_key.get_key()))
|
|
logger.debug('Generated signature: %s', signature)
|
|
|
|
public_members = set(json_web_key.public_members)
|
|
public_key = {comp: value for comp, value in json_web_key.to_dict().items()
|
|
if comp in public_members}
|
|
|
|
signature_block = {
|
|
'header': {
|
|
'jwk': public_key,
|
|
'alg': _JWS_ALGORITHM,
|
|
},
|
|
'signature': signature,
|
|
_DOCKER_SCHEMA_1_PROTECTED_KEY: protected,
|
|
}
|
|
|
|
logger.debug('Encoded signature block: %s', json.dumps(signature_block))
|
|
|
|
payload.update({
|
|
_DOCKER_SCHEMA_1_SIGNATURES_KEY: [signature_block],
|
|
})
|
|
|
|
return DockerSchema1Manifest(json.dumps(payload, indent=3))
|
|
|
|
|
|
Repository = namedtuple('Repository', ['id', 'name', 'namespace_name'])
|
|
|
|
Tag = namedtuple('Tag', ['name', 'repository'])
|
|
|
|
ManifestJSON = namedtuple('ManifestJSON', ['digest', 'json'])
|
|
|
|
DockerV1Metadata = namedtuple('DockerV1Metadata', ['namespace_name',
|
|
'repo_name',
|
|
'image_id',
|
|
'checksum',
|
|
'content_checksum',
|
|
'created',
|
|
'comment',
|
|
'command',
|
|
'parent_image_id',
|
|
'compat_json'])
|
|
|
|
BlobUpload = namedtuple('BlobUpload', ['uuid',
|
|
'byte_count',
|
|
'uncompressed_byte_count',
|
|
'chunk_count',
|
|
'sha_state',
|
|
'location_name',
|
|
'storage_metadata',
|
|
'piece_sha_state',
|
|
'piece_hashes'])
|
|
|
|
Blob = namedtuple('Blob', ['digest', 'size', 'locations'])
|