This repository has been archived on 2020-03-24. You can view files and clone it, but cannot push or open issues or pull requests.
quay/image/docker/schema1.py
Joseph Schorr e344d4a5cf Enhancements for Docker schema implementations in preparing for supporting schema 2 in the OCI model
This adds additional required properties and methods to the Docker schema interface to allow us to treat both schema1 and schema2 manifests and lists logically equivalent from the OCI mode perspective
2018-11-12 23:27:01 +02:00

522 lines
17 KiB
Python

"""
schema1 implements pure data transformations according to the Docker Manifest v2.1 Specification.
https://github.com/docker/distribution/blob/master/docs/spec/manifest-v2-1.md
"""
import hashlib
import json
import logging
from collections import namedtuple, OrderedDict
from datetime import datetime
import dateutil.parser
from jsonschema import validate as validate_schema, ValidationError
from jwkest.jws import SIGNER_ALGS, keyrep
from jwt.utils import base64url_encode, base64url_decode
from digest import digest_tools
from image.docker import ManifestException
from image.docker.interfaces import ManifestInterface
from image.docker.v1 import DockerV1Metadata
logger = logging.getLogger(__name__)
# Content Types
DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v1+json'
DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v1+prettyjws'
DOCKER_SCHEMA1_CONTENT_TYPES = {DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE,
DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE}
# Keys for signature-related data
DOCKER_SCHEMA1_SIGNATURES_KEY = 'signatures'
DOCKER_SCHEMA1_HEADER_KEY = 'header'
DOCKER_SCHEMA1_SIGNATURE_KEY = 'signature'
DOCKER_SCHEMA1_PROTECTED_KEY = 'protected'
DOCKER_SCHEMA1_FORMAT_LENGTH_KEY = 'formatLength'
DOCKER_SCHEMA1_FORMAT_TAIL_KEY = 'formatTail'
# Keys for manifest-related data
DOCKER_SCHEMA1_REPO_NAME_KEY = 'name'
DOCKER_SCHEMA1_REPO_TAG_KEY = 'tag'
DOCKER_SCHEMA1_ARCH_KEY = 'architecture'
DOCKER_SCHEMA1_FS_LAYERS_KEY = 'fsLayers'
DOCKER_SCHEMA1_BLOB_SUM_KEY = 'blobSum'
DOCKER_SCHEMA1_HISTORY_KEY = 'history'
DOCKER_SCHEMA1_V1_COMPAT_KEY = 'v1Compatibility'
DOCKER_SCHEMA1_SCHEMA_VER_KEY = 'schemaVersion'
# Format for time used in the protected payload.
_ISO_DATETIME_FORMAT_ZULU = '%Y-%m-%dT%H:%M:%SZ'
# The algorithm we use to sign the JWS.
_JWS_SIGNING_ALGORITHM = 'RS256'
class MalformedSchema1Manifest(ManifestException):
"""
Raised when a manifest fails an assertion that should be true according to the Docker Manifest
v2.1 Specification.
"""
pass
class InvalidSchema1Signature(ManifestException):
"""
Raised when there is a failure verifying the signature of a signed Docker 2.1 Manifest.
"""
pass
class Schema1Layer(namedtuple('Schema1Layer', ['digest', 'v1_metadata', 'raw_v1_metadata',
'compressed_size'])):
"""
Represents all of the data about an individual layer in a given Manifest.
This is the union of the fsLayers (digest) and the history entries (v1_compatibility).
"""
class Schema1V1Metadata(namedtuple('Schema1V1Metadata', ['image_id', 'parent_image_id', 'created',
'comment', 'command', 'labels'])):
"""
Represents the necessary data extracted from the v1 compatibility string in a given layer of a
Manifest.
"""
class DockerSchema1Manifest(ManifestInterface):
METASCHEMA = {
'type': 'object',
'properties': {
DOCKER_SCHEMA1_SIGNATURES_KEY: {
'type': 'array',
'items': {
'type': 'object',
'properties': {
DOCKER_SCHEMA1_PROTECTED_KEY: {
'type': 'string',
},
DOCKER_SCHEMA1_HEADER_KEY: {
'type': 'object',
'properties': {
'alg': {
'type': 'string',
},
'jwk': {
'type': 'object',
},
},
'required': ['alg', 'jwk'],
},
DOCKER_SCHEMA1_SIGNATURE_KEY: {
'type': 'string',
},
},
'required': [DOCKER_SCHEMA1_PROTECTED_KEY, DOCKER_SCHEMA1_HEADER_KEY,
DOCKER_SCHEMA1_SIGNATURE_KEY],
},
},
DOCKER_SCHEMA1_REPO_TAG_KEY: {
'type': 'string',
},
DOCKER_SCHEMA1_REPO_NAME_KEY: {
'type': 'string',
},
DOCKER_SCHEMA1_HISTORY_KEY: {
'type': 'array',
'items': {
'type': 'object',
'properties': {
DOCKER_SCHEMA1_V1_COMPAT_KEY: {
'type': 'string',
},
},
'required': [DOCKER_SCHEMA1_V1_COMPAT_KEY],
},
},
DOCKER_SCHEMA1_FS_LAYERS_KEY: {
'type': 'array',
'items': {
'type': 'object',
'properties': {
DOCKER_SCHEMA1_BLOB_SUM_KEY: {
'type': 'string',
},
},
'required': [DOCKER_SCHEMA1_BLOB_SUM_KEY],
},
},
},
'required': [DOCKER_SCHEMA1_REPO_TAG_KEY,
DOCKER_SCHEMA1_REPO_NAME_KEY, DOCKER_SCHEMA1_FS_LAYERS_KEY,
DOCKER_SCHEMA1_HISTORY_KEY],
}
def __init__(self, manifest_bytes, validate=True):
self._layers = None
self._bytes = manifest_bytes
try:
self._parsed = json.loads(manifest_bytes)
except ValueError as ve:
raise MalformedSchema1Manifest('malformed manifest data: %s' % ve)
try:
validate_schema(self._parsed, DockerSchema1Manifest.METASCHEMA)
except ValidationError as ve:
raise MalformedSchema1Manifest('manifest data does not match schema: %s' % ve)
self._signatures = self._parsed.get(DOCKER_SCHEMA1_SIGNATURES_KEY)
self._tag = self._parsed[DOCKER_SCHEMA1_REPO_TAG_KEY]
self._architecture = self._parsed[DOCKER_SCHEMA1_ARCH_KEY]
repo_name = self._parsed[DOCKER_SCHEMA1_REPO_NAME_KEY]
repo_name_tuple = repo_name.split('/')
if len(repo_name_tuple) > 1:
self._namespace, self._repo_name = repo_name_tuple
elif len(repo_name_tuple) == 1:
self._namespace = ''
self._repo_name = repo_name_tuple[0]
else:
raise MalformedSchema1Manifest('malformed repository name: %s' % repo_name)
if validate:
self._validate()
@classmethod
def for_latin1_bytes(cls, encoded_bytes, validate=True):
return DockerSchema1Manifest(encoded_bytes.encode('utf-8'), validate)
def _validate(self):
if not self._signatures:
return
for signature in self._signatures:
bytes_to_verify = '{0}.{1}'.format(signature['protected'],
base64url_encode(self._payload))
signer = SIGNER_ALGS[signature['header']['alg']]
key = keyrep(signature['header']['jwk'])
gk = key.get_key()
sig = base64url_decode(signature['signature'].encode('utf-8'))
verified = signer.verify(bytes_to_verify, sig, gk)
if not verified:
raise InvalidSchema1Signature()
@property
def schema_version(self):
return 1
@property
def content_type(self):
return (DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE
if self._signatures else DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE)
@property
def media_type(self):
return self.content_type
@property
def signatures(self):
return self._signatures
@property
def namespace(self):
return self._namespace
@property
def repo_name(self):
return self._repo_name
@property
def tag(self):
return self._tag
@property
def json(self):
return self._bytes
@property
def bytes(self):
return self._bytes
@property
def manifest_json(self):
return self._parsed
@property
def manifest_dict(self):
return self._parsed
@property
def digest(self):
return digest_tools.sha256_digest(self._payload)
@property
def image_ids(self):
return {mdata.v1_metadata.image_id for mdata in self.layers}
@property
def legacy_image_ids(self):
return {mdata.v1_metadata.image_id for mdata in self.layers}
@property
def parent_image_ids(self):
return {mdata.v1_metadata.parent_image_id for mdata in self.layers
if mdata.v1_metadata.parent_image_id}
@property
def checksums(self):
return list({str(mdata.digest) for mdata in self.layers})
@property
def leaf_layer_v1_image_id(self):
return self.layers[-1].v1_metadata.image_id
@property
def leaf_layer(self):
return self.layers[-1]
@property
def created_datetime(self):
created_datetime_str = self.leaf_layer.v1_metadata.created
if created_datetime_str is None:
return None
try:
return dateutil.parser.parse(created_datetime_str).replace(tzinfo=None)
except:
# parse raises different exceptions, so we cannot use a specific kind of handler here.
return None
@property
def layers(self):
if self._layers is None:
self._layers = list(self._generate_layers())
return self._layers
@property
def blob_digests(self):
return [str(layer.digest) for layer in self.layers]
def child_manifests(self, lookup_manifest_fn):
return None
def get_manifest_labels(self, lookup_config_fn):
return self.layers[-1].v1_metadata.labels
def unsigned(self):
if self.media_type == DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE:
return self
# Create an unsigned version of the manifest.
builder = DockerSchema1ManifestBuilder(self._namespace, self._repo_name, self._tag,
self._architecture)
for layer in reversed(self.layers):
builder.add_layer(str(layer.digest), layer.raw_v1_metadata)
return builder.build()
def _generate_layers(self):
"""
Returns a generator of objects that have the blobSum and v1Compatibility keys in them,
starting from the base image and working toward the leaf node.
"""
for blob_sum_obj, history_obj in reversed(zip(self._parsed[DOCKER_SCHEMA1_FS_LAYERS_KEY],
self._parsed[DOCKER_SCHEMA1_HISTORY_KEY])):
try:
image_digest = digest_tools.Digest.parse_digest(blob_sum_obj[DOCKER_SCHEMA1_BLOB_SUM_KEY])
except digest_tools.InvalidDigestException:
raise MalformedSchema1Manifest('could not parse manifest digest: %s' %
blob_sum_obj[DOCKER_SCHEMA1_BLOB_SUM_KEY])
metadata_string = history_obj[DOCKER_SCHEMA1_V1_COMPAT_KEY]
v1_metadata = json.loads(metadata_string)
command_list = v1_metadata.get('container_config', {}).get('Cmd', None)
command = json.dumps(command_list) if command_list else None
if not 'id' in v1_metadata:
raise MalformedSchema1Manifest('id field missing from v1Compatibility JSON')
labels = v1_metadata.get('config', {}).get('Labels', {}) or {}
extracted = Schema1V1Metadata(v1_metadata['id'], v1_metadata.get('parent'),
v1_metadata.get('created'), v1_metadata.get('comment'),
command, labels)
compressed_size = v1_metadata.get('Size')
yield Schema1Layer(image_digest, extracted, metadata_string, compressed_size)
@property
def _payload(self):
if self._signatures is None:
return self._bytes
protected = str(self._signatures[0][DOCKER_SCHEMA1_PROTECTED_KEY])
parsed_protected = json.loads(base64url_decode(protected))
signed_content_head = self._bytes[:parsed_protected[DOCKER_SCHEMA1_FORMAT_LENGTH_KEY]]
signed_content_tail = base64url_decode(str(parsed_protected[DOCKER_SCHEMA1_FORMAT_TAIL_KEY]))
return signed_content_head + signed_content_tail
def generate_legacy_layers(self, images_map, lookup_config_fn):
return self.rewrite_invalid_image_ids(images_map)
def rewrite_invalid_image_ids(self, images_map):
"""
Rewrites Docker v1 image IDs and returns a generator of DockerV1Metadata.
If Docker gives us a layer with a v1 image ID that already points to existing
content, but the checksums don't match, then we need to rewrite the image ID
to something new in order to ensure consistency.
"""
# Used to synthesize a new "content addressable" image id
digest_history = hashlib.sha256()
has_rewritten_ids = False
updated_id_map = {}
for layer in self.layers:
digest_str = str(layer.digest)
extracted_v1_metadata = layer.v1_metadata
working_image_id = extracted_v1_metadata.image_id
# Update our digest_history hash for the new layer data.
digest_history.update(digest_str)
digest_history.update("@")
digest_history.update(layer.raw_v1_metadata.encode('utf-8'))
digest_history.update("|")
# Ensure that the v1 image's storage matches the V2 blob. If not, we've
# found a data inconsistency and need to create a new layer ID for the V1
# image, and all images that follow it in the ancestry chain.
digest_mismatch = (extracted_v1_metadata.image_id in images_map and
images_map[extracted_v1_metadata.image_id].content_checksum != digest_str)
if digest_mismatch or has_rewritten_ids:
working_image_id = digest_history.hexdigest()
has_rewritten_ids = True
# Store the new docker id in the map
updated_id_map[extracted_v1_metadata.image_id] = working_image_id
# Lookup the parent image for the layer, if any.
parent_image_id = extracted_v1_metadata.parent_image_id
if parent_image_id is not None:
parent_image_id = updated_id_map.get(parent_image_id, parent_image_id)
# Synthesize and store the v1 metadata in the db.
v1_metadata_json = layer.raw_v1_metadata
if has_rewritten_ids:
v1_metadata_json = _updated_v1_metadata(v1_metadata_json, updated_id_map)
updated_image = DockerV1Metadata(
namespace_name=self.namespace,
repo_name=self.repo_name,
image_id=working_image_id,
created=extracted_v1_metadata.created,
comment=extracted_v1_metadata.comment,
command=extracted_v1_metadata.command,
compat_json=v1_metadata_json,
parent_image_id=parent_image_id,
checksum=None, # TODO: Check if we need this.
content_checksum=digest_str,
)
yield updated_image
class DockerSchema1ManifestBuilder(object):
"""
A convenient abstraction around creating new DockerSchema1Manifests.
"""
def __init__(self, namespace_name, repo_name, tag, architecture='amd64'):
repo_name_key = '{0}/{1}'.format(namespace_name, repo_name)
if namespace_name == '':
repo_name_key = repo_name
self._base_payload = {
DOCKER_SCHEMA1_REPO_TAG_KEY: tag,
DOCKER_SCHEMA1_REPO_NAME_KEY: repo_name_key,
DOCKER_SCHEMA1_ARCH_KEY: architecture,
DOCKER_SCHEMA1_SCHEMA_VER_KEY: 1,
}
self._fs_layer_digests = []
self._history = []
def add_layer(self, layer_digest, v1_json_metadata):
self._fs_layer_digests.append({
DOCKER_SCHEMA1_BLOB_SUM_KEY: layer_digest,
})
self._history.append({
DOCKER_SCHEMA1_V1_COMPAT_KEY: v1_json_metadata,
})
return self
def build(self, json_web_key=None):
"""
Builds a DockerSchema1Manifest object, with optional signature.
"""
payload = OrderedDict(self._base_payload)
payload.update({
DOCKER_SCHEMA1_HISTORY_KEY: self._history,
DOCKER_SCHEMA1_FS_LAYERS_KEY: self._fs_layer_digests,
})
payload_str = json.dumps(payload, indent=3)
if json_web_key is None:
return DockerSchema1Manifest(payload_str)
split_point = payload_str.rfind('\n}')
protected_payload = {
'formatTail': base64url_encode(payload_str[split_point:]),
'formatLength': split_point,
'time': datetime.utcnow().strftime(_ISO_DATETIME_FORMAT_ZULU),
}
protected = base64url_encode(json.dumps(protected_payload))
logger.debug('Generated protected block: %s', protected)
bytes_to_sign = '{0}.{1}'.format(protected, base64url_encode(payload_str))
signer = SIGNER_ALGS[_JWS_SIGNING_ALGORITHM]
signature = base64url_encode(signer.sign(bytes_to_sign, json_web_key.get_key()))
logger.debug('Generated signature: %s', signature)
public_members = set(json_web_key.public_members)
public_key = {comp: value for comp, value in json_web_key.to_dict().items()
if comp in public_members}
signature_block = {
DOCKER_SCHEMA1_HEADER_KEY: {'jwk': public_key, 'alg': _JWS_SIGNING_ALGORITHM},
DOCKER_SCHEMA1_SIGNATURE_KEY: signature,
DOCKER_SCHEMA1_PROTECTED_KEY: protected,
}
logger.debug('Encoded signature block: %s', json.dumps(signature_block))
payload.update({DOCKER_SCHEMA1_SIGNATURES_KEY: [signature_block]})
return DockerSchema1Manifest(json.dumps(payload, indent=3))
def _updated_v1_metadata(v1_metadata_json, updated_id_map):
"""
Updates v1_metadata with new image IDs.
"""
parsed = json.loads(v1_metadata_json)
parsed['id'] = updated_id_map[parsed['id']]
if parsed.get('parent') and parsed['parent'] in updated_id_map:
parsed['parent'] = updated_id_map[parsed['parent']]
if parsed.get('container_config', {}).get('Image'):
existing_image = parsed['container_config']['Image']
if existing_image in updated_id_map:
parsed['container_config']['image'] = updated_id_map[existing_image]
return json.dumps(parsed)