e344d4a5cf
This adds additional required properties and methods to the Docker schema interface to allow us to treat both schema1 and schema2 manifests and lists logically equivalent from the OCI mode perspective
327 lines
13 KiB
Python
327 lines
13 KiB
Python
import json
|
|
import logging
|
|
import hashlib
|
|
|
|
from collections import namedtuple
|
|
from jsonschema import validate as validate_schema, ValidationError
|
|
|
|
from digest import digest_tools
|
|
from image.docker import ManifestException
|
|
from image.docker.interfaces import ManifestInterface
|
|
from image.docker.schema2 import (DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE,
|
|
DOCKER_SCHEMA2_CONFIG_CONTENT_TYPE,
|
|
DOCKER_SCHEMA2_LAYER_CONTENT_TYPE,
|
|
DOCKER_SCHEMA2_REMOTE_LAYER_CONTENT_TYPE)
|
|
from image.docker.schema1 import DockerSchema1ManifestBuilder
|
|
from image.docker.schema2.config import DockerSchema2Config
|
|
|
|
# Keys.
|
|
DOCKER_SCHEMA2_MANIFEST_VERSION_KEY = 'schemaVersion'
|
|
DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY = 'mediaType'
|
|
DOCKER_SCHEMA2_MANIFEST_CONFIG_KEY = 'config'
|
|
DOCKER_SCHEMA2_MANIFEST_SIZE_KEY = 'size'
|
|
DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY = 'digest'
|
|
DOCKER_SCHEMA2_MANIFEST_LAYERS_KEY = 'layers'
|
|
DOCKER_SCHEMA2_MANIFEST_URLS_KEY = 'urls'
|
|
|
|
# Named tuples.
|
|
DockerV2ManifestConfig = namedtuple('DockerV2ManifestConfig', ['size', 'digest'])
|
|
DockerV2ManifestLayer = namedtuple('DockerV2ManifestLayer', ['index', 'digest',
|
|
'is_remote', 'urls',
|
|
'compressed_size'])
|
|
|
|
LayerWithV1ID = namedtuple('LayerWithV1ID', ['layer', 'v1_id', 'v1_parent_id'])
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class MalformedSchema2Manifest(ManifestException):
|
|
"""
|
|
Raised when a manifest fails an assertion that should be true according to the Docker Manifest
|
|
v2.2 Specification.
|
|
"""
|
|
pass
|
|
|
|
|
|
class DockerSchema2Manifest(ManifestInterface):
|
|
METASCHEMA = {
|
|
'type': 'object',
|
|
'properties': {
|
|
DOCKER_SCHEMA2_MANIFEST_VERSION_KEY: {
|
|
'type': 'number',
|
|
'description': 'The version of the schema. Must always be `2`.',
|
|
'minimum': 2,
|
|
'maximum': 2,
|
|
},
|
|
DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY: {
|
|
'type': 'string',
|
|
'description': 'The media type of the schema.',
|
|
'enum': [DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE],
|
|
},
|
|
DOCKER_SCHEMA2_MANIFEST_CONFIG_KEY: {
|
|
'type': 'object',
|
|
'description': 'The config field references a configuration object for a container, ' +
|
|
'by digest. This configuration item is a JSON blob that the runtime ' +
|
|
'uses to set up the container.',
|
|
'properties': {
|
|
DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY: {
|
|
'type': 'string',
|
|
'description': 'The MIME type of the referenced object. This should generally be ' +
|
|
'application/vnd.docker.container.image.v1+json',
|
|
'enum': [DOCKER_SCHEMA2_CONFIG_CONTENT_TYPE],
|
|
},
|
|
DOCKER_SCHEMA2_MANIFEST_SIZE_KEY: {
|
|
'type': 'number',
|
|
'description': 'The size in bytes of the object. This field exists so that a ' +
|
|
'client will have an expected size for the content before ' +
|
|
'validating. If the length of the retrieved content does not ' +
|
|
'match the specified length, the content should not be trusted.',
|
|
},
|
|
DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY: {
|
|
'type': 'string',
|
|
'description': 'The content addressable digest of the config in the blob store',
|
|
},
|
|
},
|
|
'required': [DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY, DOCKER_SCHEMA2_MANIFEST_SIZE_KEY,
|
|
DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY],
|
|
},
|
|
DOCKER_SCHEMA2_MANIFEST_LAYERS_KEY: {
|
|
'type': 'array',
|
|
'description': 'The layer list is ordered starting from the base ' +
|
|
'image (opposite order of schema1).',
|
|
'items': {
|
|
'type': 'object',
|
|
'properties': {
|
|
DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY: {
|
|
'type': 'string',
|
|
'description': 'The MIME type of the referenced object. This should generally be ' +
|
|
'application/vnd.docker.image.rootfs.diff.tar.gzip. Layers of type ' +
|
|
'application/vnd.docker.image.rootfs.foreign.diff.tar.gzip may be ' +
|
|
'pulled from a remote location but they should never be pushed.',
|
|
'enum': [DOCKER_SCHEMA2_LAYER_CONTENT_TYPE, DOCKER_SCHEMA2_REMOTE_LAYER_CONTENT_TYPE],
|
|
},
|
|
DOCKER_SCHEMA2_MANIFEST_SIZE_KEY: {
|
|
'type': 'number',
|
|
'description': 'The size in bytes of the object. This field exists so that a ' +
|
|
'client will have an expected size for the content before ' +
|
|
'validating. If the length of the retrieved content does not ' +
|
|
'match the specified length, the content should not be trusted.',
|
|
},
|
|
DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY: {
|
|
'type': 'string',
|
|
'description': 'The content addressable digest of the layer in the blob store',
|
|
},
|
|
},
|
|
'required': [
|
|
DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY, DOCKER_SCHEMA2_MANIFEST_SIZE_KEY,
|
|
DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY,
|
|
],
|
|
},
|
|
},
|
|
},
|
|
'required': [DOCKER_SCHEMA2_MANIFEST_VERSION_KEY, DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY,
|
|
DOCKER_SCHEMA2_MANIFEST_CONFIG_KEY, DOCKER_SCHEMA2_MANIFEST_LAYERS_KEY],
|
|
}
|
|
|
|
def __init__(self, manifest_bytes):
|
|
self._layers = None
|
|
self._payload = manifest_bytes
|
|
|
|
try:
|
|
self._parsed = json.loads(manifest_bytes)
|
|
except ValueError as ve:
|
|
raise MalformedSchema2Manifest('malformed manifest data: %s' % ve)
|
|
|
|
try:
|
|
validate_schema(self._parsed, DockerSchema2Manifest.METASCHEMA)
|
|
except ValidationError as ve:
|
|
raise MalformedSchema2Manifest('manifest data does not match schema: %s' % ve)
|
|
|
|
@property
|
|
def schema_version(self):
|
|
return 2
|
|
|
|
@property
|
|
def manifest_dict(self):
|
|
return self._parsed
|
|
|
|
@property
|
|
def media_type(self):
|
|
return self._parsed[DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY]
|
|
|
|
@property
|
|
def digest(self):
|
|
return digest_tools.sha256_digest(self._payload)
|
|
|
|
@property
|
|
def config(self):
|
|
config = self._parsed[DOCKER_SCHEMA2_MANIFEST_CONFIG_KEY]
|
|
return DockerV2ManifestConfig(size=config[DOCKER_SCHEMA2_MANIFEST_SIZE_KEY],
|
|
digest=config[DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY])
|
|
|
|
@property
|
|
def layers(self):
|
|
""" Returns the layers of this manifest, from base to leaf. """
|
|
if self._layers is None:
|
|
self._layers = list(self._generate_layers())
|
|
return self._layers
|
|
|
|
@property
|
|
def leaf_layer(self):
|
|
return self.layers[-1]
|
|
|
|
@property
|
|
def leaf_layer_v1_image_id(self):
|
|
return list(self.layers_with_v1_ids)[-1].v1_id
|
|
|
|
@property
|
|
def legacy_image_ids(self):
|
|
return [l.v1_id for l in self.layers_with_v1_ids]
|
|
|
|
@property
|
|
def blob_digests(self):
|
|
return [str(layer.digest) for layer in self.layers] + [str(self.config.digest)]
|
|
|
|
def get_manifest_labels(self, lookup_config_fn):
|
|
return self._get_built_config(lookup_config_fn).labels
|
|
|
|
def _get_built_config(self, lookup_config_fn):
|
|
config_bytes = lookup_config_fn(self.config.digest)
|
|
if len(config_bytes) != self.config.size:
|
|
raise MalformedSchema2Manifest('Size of config does not match that retrieved: %s vs %s',
|
|
len(config_bytes), self.config.size)
|
|
|
|
return DockerSchema2Config(config_bytes)
|
|
|
|
@property
|
|
def bytes(self):
|
|
return self._payload
|
|
|
|
def child_manifests(self, lookup_manifest_fn):
|
|
return None
|
|
|
|
def _generate_layers(self):
|
|
for index, layer in enumerate(self._parsed[DOCKER_SCHEMA2_MANIFEST_LAYERS_KEY]):
|
|
content_type = layer[DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY]
|
|
is_remote = content_type == DOCKER_SCHEMA2_REMOTE_LAYER_CONTENT_TYPE
|
|
|
|
try:
|
|
digest = digest_tools.Digest.parse_digest(layer[DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY])
|
|
except digest_tools.InvalidDigestException:
|
|
raise MalformedSchema2Manifest('could not parse manifest digest: %s' %
|
|
layer[DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY])
|
|
|
|
yield DockerV2ManifestLayer(index=index,
|
|
compressed_size=layer[DOCKER_SCHEMA2_MANIFEST_SIZE_KEY],
|
|
digest=digest,
|
|
is_remote=is_remote,
|
|
urls=layer.get(DOCKER_SCHEMA2_MANIFEST_URLS_KEY))
|
|
|
|
@property
|
|
def layers_with_v1_ids(self):
|
|
digest_history = hashlib.sha256()
|
|
v1_layer_parent_id = None
|
|
v1_layer_id = None
|
|
|
|
for layer in self.layers:
|
|
v1_layer_parent_id = v1_layer_id
|
|
|
|
# Create a new synthesized V1 ID for the layer by adding its digest and index to the
|
|
# existing digest history hash builder. This will ensure unique V1s across *all* schemas in
|
|
# a repository.
|
|
digest_history.update(str(layer.digest))
|
|
digest_history.update("#")
|
|
digest_history.update(str(layer.index))
|
|
digest_history.update("|")
|
|
v1_layer_id = digest_history.hexdigest()
|
|
yield LayerWithV1ID(layer=layer, v1_id=v1_layer_id, v1_parent_id=v1_layer_parent_id)
|
|
|
|
def populate_schema1_builder(self, v1_builder, lookup_config_fn):
|
|
""" Populates a DockerSchema1ManifestBuilder with the layers and config from
|
|
this schema. The `lookup_config_fn` is a function that, when given the config
|
|
digest SHA, returns the associated configuration JSON bytes for this schema.
|
|
"""
|
|
schema2_config = self._get_built_config(lookup_config_fn)
|
|
|
|
# Build the V1 IDs for the layers.
|
|
layers = list(self.layers_with_v1_ids)
|
|
for layer_with_ids in reversed(layers): # Schema1 has layers in reverse order
|
|
v1_compatibility = schema2_config.build_v1_compatibility(layer_with_ids.layer.index,
|
|
layer_with_ids.v1_id,
|
|
layer_with_ids.v1_parent_id)
|
|
v1_builder.add_layer(str(layer_with_ids.layer.digest), json.dumps(v1_compatibility))
|
|
|
|
return v1_builder
|
|
|
|
def generate_legacy_layers(self, images_map, lookup_config_fn):
|
|
# NOTE: We use the DockerSchema1ManifestBuilder here because it already contains
|
|
# the logic for generating the DockerV1Metadata. All of this will go away once we get
|
|
# rid of legacy images in the database, so this is a temporary solution.
|
|
v1_builder = DockerSchema1ManifestBuilder('', '', '')
|
|
self.populate_schema1_builder(v1_builder, lookup_config_fn)
|
|
return v1_builder.build().generate_legacy_layers(images_map, lookup_config_fn)
|
|
|
|
def unsigned(self):
|
|
return self
|
|
|
|
|
|
class DockerSchema2ManifestBuilder(object):
|
|
"""
|
|
A convenient abstraction around creating new DockerSchema2Manifests.
|
|
"""
|
|
def __init__(self):
|
|
self.config = None
|
|
self.layers = []
|
|
|
|
def set_config(self, schema2_config):
|
|
""" Sets the configuration for the manifest being built. """
|
|
self.set_config_digest(schema2_config.digest, schema2_config.size)
|
|
|
|
def set_config_digest(self, config_digest, config_size):
|
|
""" Sets the digest and size of the configuration layer. """
|
|
self.config = DockerV2ManifestConfig(size=config_size, digest=config_digest)
|
|
|
|
def add_layer(self, digest, size, urls=None):
|
|
""" Adds a layer to the manifest. """
|
|
self.layers.append(DockerV2ManifestLayer(index=len(self.layers),
|
|
digest=digest,
|
|
compressed_size=size,
|
|
urls=urls,
|
|
is_remote=bool(urls)))
|
|
|
|
def build(self):
|
|
""" Builds and returns the DockerSchema2Manifest. """
|
|
assert self.layers
|
|
assert self.config
|
|
|
|
def _build_layer(layer):
|
|
if layer.urls:
|
|
return {
|
|
DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY: DOCKER_SCHEMA2_REMOTE_LAYER_CONTENT_TYPE,
|
|
DOCKER_SCHEMA2_MANIFEST_SIZE_KEY: layer.compressed_size,
|
|
DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY: str(layer.digest),
|
|
DOCKER_SCHEMA2_MANIFEST_URLS_KEY: layer.urls,
|
|
}
|
|
|
|
return {
|
|
DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY: DOCKER_SCHEMA2_LAYER_CONTENT_TYPE,
|
|
DOCKER_SCHEMA2_MANIFEST_SIZE_KEY: layer.compressed_size,
|
|
DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY: str(layer.digest),
|
|
}
|
|
|
|
manifest_dict = {
|
|
DOCKER_SCHEMA2_MANIFEST_VERSION_KEY: 2,
|
|
DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY: DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE,
|
|
|
|
# Config
|
|
DOCKER_SCHEMA2_MANIFEST_CONFIG_KEY: {
|
|
DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY: DOCKER_SCHEMA2_CONFIG_CONTENT_TYPE,
|
|
DOCKER_SCHEMA2_MANIFEST_SIZE_KEY: self.config.size,
|
|
DOCKER_SCHEMA2_MANIFEST_DIGEST_KEY: str(self.config.digest),
|
|
},
|
|
|
|
# Layers
|
|
DOCKER_SCHEMA2_MANIFEST_LAYERS_KEY: [
|
|
_build_layer(layer) for layer in self.layers
|
|
],
|
|
}
|
|
return DockerSchema2Manifest(json.dumps(manifest_dict, indent=3))
|