This repository has been archived on 2020-03-24. You can view files and clone it, but cannot push or open issues or pull requests.
quay/image/docker/schema2/list.py
Joseph Schorr 171c7e5238 Further fixes for unicode handling in manifests
We were occasionally trying to compute schema 2 version 1 signatures on the *unicode* representation, which was failing the signature check. This PR adds a new wrapper type called `Bytes`, which all manifests must take in, and which handles the unicodes vs encoded utf-8 stuff in a central location. This PR also adds a test for the manifest that was breaking in production.
2019-01-09 15:14:41 -05:00

354 lines
13 KiB
Python

import logging
import json
from cachetools import lru_cache
from jsonschema import validate as validate_schema, ValidationError
from digest import digest_tools
from image.docker import ManifestException
from image.docker.interfaces import ManifestInterface
from image.docker.schema1 import DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE
from image.docker.schema1 import DockerSchema1Manifest
from image.docker.schema2 import (DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE,
DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE)
from image.docker.schema2.manifest import DockerSchema2Manifest
from util.bytes import Bytes
logger = logging.getLogger(__name__)
# Keys.
DOCKER_SCHEMA2_MANIFESTLIST_VERSION_KEY = 'schemaVersion'
DOCKER_SCHEMA2_MANIFESTLIST_MEDIATYPE_KEY = 'mediaType'
DOCKER_SCHEMA2_MANIFESTLIST_SIZE_KEY = 'size'
DOCKER_SCHEMA2_MANIFESTLIST_DIGEST_KEY = 'digest'
DOCKER_SCHEMA2_MANIFESTLIST_MANIFESTS_KEY = 'manifests'
DOCKER_SCHEMA2_MANIFESTLIST_PLATFORM_KEY = 'platform'
DOCKER_SCHEMA2_MANIFESTLIST_ARCHITECTURE_KEY = 'architecture'
DOCKER_SCHEMA2_MANIFESTLIST_OS_KEY = 'os'
DOCKER_SCHEMA2_MANIFESTLIST_OS_VERSION_KEY = 'os.version'
DOCKER_SCHEMA2_MANIFESTLIST_OS_FEATURES_KEY = 'os.features'
DOCKER_SCHEMA2_MANIFESTLIST_FEATURES_KEY = 'features'
DOCKER_SCHEMA2_MANIFESTLIST_VARIANT_KEY = 'variant'
class MalformedSchema2ManifestList(ManifestException):
"""
Raised when a manifest list fails an assertion that should be true according to the
Docker Manifest v2.2 Specification.
"""
pass
class LazyManifestLoader(object):
def __init__(self, manifest_data, content_retriever):
self._manifest_data = manifest_data
self._content_retriever = content_retriever
self._loaded_manifest = None
@property
def manifest_obj(self):
if self._loaded_manifest is not None:
return self._loaded_manifest
self._loaded_manifest = self._load_manifest()
return self._loaded_manifest
def _load_manifest(self):
digest = self._manifest_data[DOCKER_SCHEMA2_MANIFESTLIST_DIGEST_KEY]
size = self._manifest_data[DOCKER_SCHEMA2_MANIFESTLIST_SIZE_KEY]
manifest_bytes = self._content_retriever.get_manifest_bytes_with_digest(digest)
if manifest_bytes is None:
raise MalformedSchema2ManifestList('Could not find child manifest with digest `%s`' % digest)
if len(manifest_bytes) != size:
raise MalformedSchema2ManifestList('Size of manifest does not match that retrieved: %s vs %s',
len(manifest_bytes), size)
content_type = self._manifest_data[DOCKER_SCHEMA2_MANIFESTLIST_MEDIATYPE_KEY]
if content_type == DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE:
return DockerSchema2Manifest(Bytes.for_string_or_unicode(manifest_bytes))
if content_type == DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE:
return DockerSchema1Manifest(Bytes.for_string_or_unicode(manifest_bytes), validate=False)
raise MalformedSchema2ManifestList('Unknown manifest content type')
class DockerSchema2ManifestList(ManifestInterface):
METASCHEMA = {
'type': 'object',
'properties': {
DOCKER_SCHEMA2_MANIFESTLIST_VERSION_KEY: {
'type': 'number',
'description': 'The version of the manifest list. Must always be `2`.',
'minimum': 2,
'maximum': 2,
},
DOCKER_SCHEMA2_MANIFESTLIST_MEDIATYPE_KEY: {
'type': 'string',
'description': 'The media type of the manifest list.',
'enum': [DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE],
},
DOCKER_SCHEMA2_MANIFESTLIST_MANIFESTS_KEY: {
'type': 'array',
'description': 'The manifests field contains a list of manifests for specific platforms',
'items': {
'type': 'object',
'properties': {
DOCKER_SCHEMA2_MANIFESTLIST_MEDIATYPE_KEY: {
'type': 'string',
'description': 'The MIME type of the referenced object. This will generally be ' +
'application/vnd.docker.distribution.manifest.v2+json, but it ' +
'could also be application/vnd.docker.distribution.manifest.v1+json ' +
'if the manifest list references a legacy schema-1 manifest.',
'enum': [DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE, DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE],
},
DOCKER_SCHEMA2_MANIFESTLIST_SIZE_KEY: {
'type': 'number',
'description': 'The size in bytes of the object. This field exists so that a ' +
'client will have an expected size for the content before ' +
'validating. If the length of the retrieved content does not ' +
'match the specified length, the content should not be trusted.',
},
DOCKER_SCHEMA2_MANIFESTLIST_DIGEST_KEY: {
'type': 'string',
'description': 'The content addressable digest of the manifest in the blob store',
},
DOCKER_SCHEMA2_MANIFESTLIST_PLATFORM_KEY: {
'type': 'object',
'description': 'The platform object describes the platform which the image in ' +
'the manifest runs on',
'properties': {
DOCKER_SCHEMA2_MANIFESTLIST_ARCHITECTURE_KEY: {
'type': 'string',
'description': 'Specifies the CPU architecture, for example amd64 or ppc64le.',
},
DOCKER_SCHEMA2_MANIFESTLIST_OS_KEY: {
'type': 'string',
'description': 'Specifies the operating system, for example linux or windows',
},
DOCKER_SCHEMA2_MANIFESTLIST_OS_VERSION_KEY: {
'type': 'string',
'description': 'Specifies the operating system version, for example 10.0.10586',
},
DOCKER_SCHEMA2_MANIFESTLIST_OS_FEATURES_KEY: {
'type': 'array',
'description': 'specifies an array of strings, each listing a required OS ' +
'feature (for example on Windows win32k)',
'items': {
'type': 'string',
},
},
DOCKER_SCHEMA2_MANIFESTLIST_VARIANT_KEY: {
'type': 'string',
'description': 'Specifies a variant of the CPU, for example armv6l to specify ' +
'a particular CPU variant of the ARM CPU',
},
DOCKER_SCHEMA2_MANIFESTLIST_FEATURES_KEY: {
'type': 'array',
'description': 'specifies an array of strings, each listing a required CPU ' +
'feature (for example sse4 or aes).',
'items': {
'type': 'string',
},
},
},
'required': [DOCKER_SCHEMA2_MANIFESTLIST_ARCHITECTURE_KEY,
DOCKER_SCHEMA2_MANIFESTLIST_OS_KEY],
},
},
'required': [DOCKER_SCHEMA2_MANIFESTLIST_MEDIATYPE_KEY,
DOCKER_SCHEMA2_MANIFESTLIST_SIZE_KEY,
DOCKER_SCHEMA2_MANIFESTLIST_DIGEST_KEY,
DOCKER_SCHEMA2_MANIFESTLIST_PLATFORM_KEY],
},
},
},
'required': [DOCKER_SCHEMA2_MANIFESTLIST_VERSION_KEY,
DOCKER_SCHEMA2_MANIFESTLIST_MEDIATYPE_KEY,
DOCKER_SCHEMA2_MANIFESTLIST_MANIFESTS_KEY],
}
def __init__(self, manifest_bytes):
assert isinstance(manifest_bytes, Bytes)
self._layers = None
self._manifest_bytes = manifest_bytes
try:
self._parsed = json.loads(manifest_bytes.as_unicode())
except ValueError as ve:
raise MalformedSchema2ManifestList('malformed manifest data: %s' % ve)
try:
validate_schema(self._parsed, DockerSchema2ManifestList.METASCHEMA)
except ValidationError as ve:
raise MalformedSchema2ManifestList('manifest data does not match schema: %s' % ve)
@property
def is_manifest_list(self):
""" Returns whether this manifest is a list. """
return True
@property
def schema_version(self):
return 2
@property
def digest(self):
""" The digest of the manifest, including type prefix. """
return digest_tools.sha256_digest(self._manifest_bytes.as_encoded_str())
@property
def media_type(self):
""" The media type of the schema. """
return self._parsed[DOCKER_SCHEMA2_MANIFESTLIST_MEDIATYPE_KEY]
@property
def manifest_dict(self):
""" Returns the manifest as a dictionary ready to be serialized to JSON. """
return self._parsed
@property
def bytes(self):
return self._manifest_bytes
def get_layers(self, content_retriever):
""" Returns the layers of this manifest, from base to leaf or None if this kind of manifest
does not support layers. """
return None
@property
def blob_digests(self):
# Manifest lists have no blob digests, since everything is stored as a manifest.
return []
@property
def local_blob_digests(self):
return self.blob_digests
@property
def layers_compressed_size(self):
return None
@lru_cache(maxsize=1)
def manifests(self, content_retriever):
""" Returns the manifests in the list.
"""
manifests = self._parsed[DOCKER_SCHEMA2_MANIFESTLIST_MANIFESTS_KEY]
return [LazyManifestLoader(m, content_retriever) for m in manifests]
def child_manifests(self, content_retriever):
return self.manifests(content_retriever)
def child_manifest_digests(self):
return [m[DOCKER_SCHEMA2_MANIFESTLIST_DIGEST_KEY]
for m in self._parsed[DOCKER_SCHEMA2_MANIFESTLIST_MANIFESTS_KEY]]
def get_manifest_labels(self, content_retriever):
return None
def get_leaf_layer_v1_image_id(self, content_retriever):
return None
def get_legacy_image_ids(self, content_retriever):
return None
@property
def has_legacy_image(self):
return False
def get_requires_empty_layer_blob(self, content_retriever):
return False
def get_schema1_manifest(self, namespace_name, repo_name, tag_name, content_retriever):
""" Returns the manifest that is compatible with V1, by virtue of being `amd64` and `linux`.
If none, returns None.
"""
legacy_manifest = self._get_legacy_manifest(content_retriever)
if legacy_manifest is None:
return None
return legacy_manifest.get_schema1_manifest(namespace_name, repo_name, tag_name,
content_retriever)
def convert_manifest(self, allowed_mediatypes, namespace_name, repo_name, tag_name,
content_retriever):
if self.media_type in allowed_mediatypes:
return self
legacy_manifest = self._get_legacy_manifest(content_retriever)
if legacy_manifest is None:
return None
return legacy_manifest.convert_manifest(allowed_mediatypes, namespace_name, repo_name,
tag_name, content_retriever)
def _get_legacy_manifest(self, content_retriever):
""" Returns the manifest under this list with architecture amd64 and os linux, if any, or None
if none or error.
"""
for manifest_ref in self.manifests(content_retriever):
platform = manifest_ref._manifest_data[DOCKER_SCHEMA2_MANIFESTLIST_PLATFORM_KEY]
architecture = platform[DOCKER_SCHEMA2_MANIFESTLIST_ARCHITECTURE_KEY]
os = platform[DOCKER_SCHEMA2_MANIFESTLIST_OS_KEY]
if architecture != 'amd64' or os != 'linux':
continue
try:
return manifest_ref.manifest_obj
except (ManifestException, IOError):
logger.exception('Could not load child manifest')
return None
return None
def unsigned(self):
return self
def generate_legacy_layers(self, images_map, content_retriever):
return None
class DockerSchema2ManifestListBuilder(object):
"""
A convenient abstraction around creating new DockerSchema2ManifestList's.
"""
def __init__(self):
self.manifests = []
def add_manifest(self, manifest, architecture, os):
""" Adds a manifest to the list. """
manifest = manifest.unsigned() # Make sure we add the unsigned version to the list.
self.add_manifest_digest(manifest.digest,
len(manifest.bytes.as_encoded_str()),
manifest.media_type,
architecture, os)
def add_manifest_digest(self, manifest_digest, manifest_size, media_type, architecture, os):
""" Adds a manifest to the list. """
self.manifests.append((manifest_digest, manifest_size, media_type, {
DOCKER_SCHEMA2_MANIFESTLIST_ARCHITECTURE_KEY: architecture,
DOCKER_SCHEMA2_MANIFESTLIST_OS_KEY: os,
}))
def build(self):
""" Builds and returns the DockerSchema2ManifestList. """
assert self.manifests
manifest_list_dict = {
DOCKER_SCHEMA2_MANIFESTLIST_VERSION_KEY: 2,
DOCKER_SCHEMA2_MANIFESTLIST_MEDIATYPE_KEY: DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE,
DOCKER_SCHEMA2_MANIFESTLIST_MANIFESTS_KEY: [
{
DOCKER_SCHEMA2_MANIFESTLIST_MEDIATYPE_KEY: manifest[2],
DOCKER_SCHEMA2_MANIFESTLIST_DIGEST_KEY: manifest[0],
DOCKER_SCHEMA2_MANIFESTLIST_SIZE_KEY: manifest[1],
DOCKER_SCHEMA2_MANIFESTLIST_PLATFORM_KEY: manifest[3],
} for manifest in self.manifests
],
}
json_str = Bytes.for_string_or_unicode(json.dumps(manifest_list_dict, indent=3))
return DockerSchema2ManifestList(json_str)