Further fixes for unicode handling in manifests

We were occasionally trying to compute schema 2 version 1 signatures on the *unicode* representation, which was failing the signature check. This PR adds a new wrapper type called `Bytes`, which all manifests must take in, and which handles the unicodes vs encoded utf-8 stuff in a central location. This PR also adds a test for the manifest that was breaking in production.
This commit is contained in:
Joseph Schorr 2019-01-08 20:49:00 -05:00
parent 05fa2bcbe0
commit 171c7e5238
28 changed files with 275 additions and 106 deletions

View file

@ -12,7 +12,7 @@ from image.docker.schema1 import DockerSchema1Manifest
from image.docker.schema2 import (DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE,
DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE)
from image.docker.schema2.manifest import DockerSchema2Manifest
from image.docker.schemautil import ensure_utf8
from util.bytes import Bytes
logger = logging.getLogger(__name__)
@ -67,10 +67,10 @@ class LazyManifestLoader(object):
content_type = self._manifest_data[DOCKER_SCHEMA2_MANIFESTLIST_MEDIATYPE_KEY]
if content_type == DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE:
return DockerSchema2Manifest(manifest_bytes)
return DockerSchema2Manifest(Bytes.for_string_or_unicode(manifest_bytes))
if content_type == DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE:
return DockerSchema1Manifest(manifest_bytes, validate=False)
return DockerSchema1Manifest(Bytes.for_string_or_unicode(manifest_bytes), validate=False)
raise MalformedSchema2ManifestList('Unknown manifest content type')
@ -171,11 +171,13 @@ class DockerSchema2ManifestList(ManifestInterface):
}
def __init__(self, manifest_bytes):
assert isinstance(manifest_bytes, Bytes)
self._layers = None
self._manifest_bytes = manifest_bytes
try:
self._parsed = json.loads(ensure_utf8(manifest_bytes))
self._parsed = json.loads(manifest_bytes.as_unicode())
except ValueError as ve:
raise MalformedSchema2ManifestList('malformed manifest data: %s' % ve)
@ -196,7 +198,7 @@ class DockerSchema2ManifestList(ManifestInterface):
@property
def digest(self):
""" The digest of the manifest, including type prefix. """
return digest_tools.sha256_digest(ensure_utf8(self._manifest_bytes))
return digest_tools.sha256_digest(self._manifest_bytes.as_encoded_str())
@property
def media_type(self):
@ -319,7 +321,9 @@ class DockerSchema2ManifestListBuilder(object):
def add_manifest(self, manifest, architecture, os):
""" Adds a manifest to the list. """
manifest = manifest.unsigned() # Make sure we add the unsigned version to the list.
self.add_manifest_digest(manifest.digest, len(manifest.bytes), manifest.media_type,
self.add_manifest_digest(manifest.digest,
len(manifest.bytes.as_encoded_str()),
manifest.media_type,
architecture, os)
def add_manifest_digest(self, manifest_digest, manifest_size, media_type, architecture, os):
@ -345,4 +349,6 @@ class DockerSchema2ManifestListBuilder(object):
} for manifest in self.manifests
],
}
return DockerSchema2ManifestList(json.dumps(manifest_list_dict, indent=3))
json_str = Bytes.for_string_or_unicode(json.dumps(manifest_list_dict, indent=3))
return DockerSchema2ManifestList(json_str)