diff --git a/image/docker/schema1.py b/image/docker/schema1.py index a216d6fb9..2e076e058 100644 --- a/image/docker/schema1.py +++ b/image/docker/schema1.py @@ -15,7 +15,7 @@ import dateutil.parser from jsonschema import validate as validate_schema, ValidationError -from jwkest.jws import SIGNER_ALGS, keyrep +from jwkest.jws import SIGNER_ALGS, keyrep, BadSignature from jwt.utils import base64url_encode, base64url_decode from digest import digest_tools @@ -23,6 +23,7 @@ from image.docker import ManifestException from image.docker.types import ManifestImageLayer from image.docker.interfaces import ManifestInterface from image.docker.v1 import DockerV1Metadata +from image.docker.schemautil import ensure_utf8, to_canonical_json logger = logging.getLogger(__name__) @@ -198,14 +199,19 @@ class DockerSchema1Manifest(ManifestInterface): if not self._signatures: return + payload_str = ensure_utf8(self._payload) for signature in self._signatures: - bytes_to_verify = '{0}.{1}'.format(signature['protected'], - base64url_encode(self._payload)) + bytes_to_verify = '{0}.{1}'.format(signature['protected'], base64url_encode(payload_str)) signer = SIGNER_ALGS[signature['header']['alg']] key = keyrep(signature['header']['jwk']) gk = key.get_key() sig = base64url_decode(signature['signature'].encode('utf-8')) - verified = signer.verify(bytes_to_verify, sig, gk) + + try: + verified = signer.verify(bytes_to_verify, sig, gk) + except BadSignature: + raise InvalidSchema1Signature() + if not verified: raise InvalidSchema1Signature() @@ -264,7 +270,7 @@ class DockerSchema1Manifest(ManifestInterface): @property def digest(self): - return digest_tools.sha256_digest(self._payload) + return digest_tools.sha256_digest(ensure_utf8(self._payload)) @property def image_ids(self): @@ -369,7 +375,7 @@ class DockerSchema1Manifest(ManifestInterface): v1_metadata = json.loads(metadata_string) command_list = v1_metadata.get('container_config', {}).get('Cmd', None) - command = json.dumps(command_list) if command_list else None + command = to_canonical_json(command_list) if command_list else None if not 'id' in v1_metadata: raise MalformedSchema1Manifest('id field missing from v1Compatibility JSON') @@ -530,7 +536,7 @@ class DockerSchema1ManifestBuilder(object): return self - def build(self, json_web_key=None): + def build(self, json_web_key=None, ensure_ascii=True): """ Builds a DockerSchema1Manifest object, with optional signature. """ @@ -540,7 +546,7 @@ class DockerSchema1ManifestBuilder(object): DOCKER_SCHEMA1_FS_LAYERS_KEY: self._fs_layer_digests, }) - payload_str = json.dumps(payload, indent=3) + payload_str = json.dumps(payload, indent=3, ensure_ascii=ensure_ascii) if json_web_key is None: return DockerSchema1Manifest(payload_str) @@ -551,9 +557,10 @@ class DockerSchema1ManifestBuilder(object): 'formatLength': split_point, 'time': datetime.utcnow().strftime(_ISO_DATETIME_FORMAT_ZULU), } - protected = base64url_encode(json.dumps(protected_payload)) + protected = base64url_encode(json.dumps(protected_payload, ensure_ascii=ensure_ascii)) logger.debug('Generated protected block: %s', protected) + payload_str = ensure_utf8(payload_str) bytes_to_sign = '{0}.{1}'.format(protected, base64url_encode(payload_str)) signer = SIGNER_ALGS[_JWS_SIGNING_ALGORITHM] @@ -571,10 +578,8 @@ class DockerSchema1ManifestBuilder(object): } logger.debug('Encoded signature block: %s', json.dumps(signature_block)) - payload.update({DOCKER_SCHEMA1_SIGNATURES_KEY: [signature_block]}) - - return DockerSchema1Manifest(json.dumps(payload, indent=3)) + return DockerSchema1Manifest(json.dumps(payload, indent=3, ensure_ascii=ensure_ascii)) def _updated_v1_metadata(v1_metadata_json, updated_id_map): @@ -592,4 +597,4 @@ def _updated_v1_metadata(v1_metadata_json, updated_id_map): if existing_image in updated_id_map: parsed['container_config']['image'] = updated_id_map[existing_image] - return json.dumps(parsed) + return to_canonical_json(parsed) diff --git a/image/docker/schema2/config.py b/image/docker/schema2/config.py index f46c96a71..dc0ee475c 100644 --- a/image/docker/schema2/config.py +++ b/image/docker/schema2/config.py @@ -102,6 +102,7 @@ from dateutil.parser import parse as parse_date from digest import digest_tools from image.docker import ManifestException +from image.docker.schemautil import ensure_utf8 DOCKER_SCHEMA2_CONFIG_HISTORY_KEY = "history" @@ -185,7 +186,7 @@ class DockerSchema2Config(object): self._config_bytes = config_bytes try: - self._parsed = json.loads(config_bytes) + self._parsed = json.loads(ensure_utf8(config_bytes)) except ValueError as ve: raise MalformedSchema2Config('malformed config data: %s' % ve) @@ -197,12 +198,12 @@ class DockerSchema2Config(object): @property def digest(self): """ Returns the digest of this config object. """ - return digest_tools.sha256_digest(self._config_bytes) + return digest_tools.sha256_digest(ensure_utf8(self._config_bytes)) @property def size(self): """ Returns the size of this config object. """ - return len(self._config_bytes) + return len(ensure_utf8(self._config_bytes)) @property def bytes(self): diff --git a/image/docker/schema2/list.py b/image/docker/schema2/list.py index d964e8792..233ce9f6e 100644 --- a/image/docker/schema2/list.py +++ b/image/docker/schema2/list.py @@ -12,6 +12,7 @@ from image.docker.schema1 import DockerSchema1Manifest from image.docker.schema2 import (DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE, DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE) from image.docker.schema2.manifest import DockerSchema2Manifest +from image.docker.schemautil import ensure_utf8 logger = logging.getLogger(__name__) @@ -174,7 +175,7 @@ class DockerSchema2ManifestList(ManifestInterface): self._manifest_bytes = manifest_bytes try: - self._parsed = json.loads(manifest_bytes) + self._parsed = json.loads(ensure_utf8(manifest_bytes)) except ValueError as ve: raise MalformedSchema2ManifestList('malformed manifest data: %s' % ve) @@ -195,7 +196,7 @@ class DockerSchema2ManifestList(ManifestInterface): @property def digest(self): """ The digest of the manifest, including type prefix. """ - return digest_tools.sha256_digest(self._manifest_bytes) + return digest_tools.sha256_digest(ensure_utf8(self._manifest_bytes)) @property def media_type(self): diff --git a/image/docker/schema2/manifest.py b/image/docker/schema2/manifest.py index 53d3f3268..5731fa76d 100644 --- a/image/docker/schema2/manifest.py +++ b/image/docker/schema2/manifest.py @@ -16,6 +16,7 @@ from image.docker.schema2 import (DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE, EMPTY_LAYER_BLOB_DIGEST, EMPTY_LAYER_SIZE) from image.docker.schema1 import DockerSchema1ManifestBuilder from image.docker.schema2.config import DockerSchema2Config +from image.docker.schemautil import ensure_utf8 # Keys. DOCKER_SCHEMA2_MANIFEST_VERSION_KEY = 'schemaVersion' @@ -128,12 +129,13 @@ class DockerSchema2Manifest(ManifestInterface): } def __init__(self, manifest_bytes): - self._filesystem_layers = None self._payload = manifest_bytes + + self._filesystem_layers = None self._cached_built_config = None try: - self._parsed = json.loads(manifest_bytes) + self._parsed = json.loads(ensure_utf8(self._payload)) except ValueError as ve: raise MalformedSchema2Manifest('malformed manifest data: %s' % ve) @@ -164,7 +166,7 @@ class DockerSchema2Manifest(ManifestInterface): @property def digest(self): - return digest_tools.sha256_digest(self._payload) + return digest_tools.sha256_digest(ensure_utf8(self._payload)) @property def config(self): @@ -408,7 +410,7 @@ class DockerSchema2ManifestBuilder(object): urls=urls, is_remote=bool(urls))) - def build(self): + def build(self, ensure_ascii=True): """ Builds and returns the DockerSchema2Manifest. """ assert self.filesystem_layers assert self.config @@ -444,4 +446,4 @@ class DockerSchema2ManifestBuilder(object): _build_layer(layer) for layer in self.filesystem_layers ], } - return DockerSchema2Manifest(json.dumps(manifest_dict, indent=3)) + return DockerSchema2Manifest(json.dumps(manifest_dict, ensure_ascii=ensure_ascii, indent=3)) diff --git a/image/docker/schema2/test/test_manifest.py b/image/docker/schema2/test/test_manifest.py index 4f1aec2bd..f8bfea5aa 100644 --- a/image/docker/schema2/test/test_manifest.py +++ b/image/docker/schema2/test/test_manifest.py @@ -1,5 +1,8 @@ +# -*- coding: utf-8 -*- + import json import pytest +import os from app import docker_v2_signing_key from image.docker.schema1 import (DockerSchema1ManifestBuilder, @@ -7,6 +10,7 @@ from image.docker.schema1 import (DockerSchema1ManifestBuilder, DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE) from image.docker.schema2.manifest import (MalformedSchema2Manifest, DockerSchema2Manifest, DockerSchema2ManifestBuilder, EMPTY_LAYER_BLOB_DIGEST) +from image.docker.schema2.config import DockerSchema2Config from image.docker.schema2.test.test_config import CONFIG_BYTES from image.docker.schemautil import ContentRetrieverForTesting @@ -351,3 +355,67 @@ def test_remote_layer_manifest(): assert set(manifest.blob_digests) == {'sha256:adef', 'sha256:abcd', 'sha256:1352', 'sha256:1353'} assert set(manifest.local_blob_digests) == {'sha256:abcd', 'sha256:1352', 'sha256:1353'} + + +def test_unencoded_unicode_manifest(): + builder = DockerSchema2ManifestBuilder() + builder.add_layer('sha256:abc123', 123) + builder.set_config_digest('sha256:def456', 2000) + manifest = builder.build() + + retriever = ContentRetrieverForTesting.for_config({ + "config": { + "author": u"Sômé guy", + }, + "rootfs": {"type": "layers", "diff_ids": []}, + "history": [ + { + "created": "2018-04-03T18:37:09.284840891Z", + "created_by": "base", + "author": u"Sômé guy", + }, + ], + }, 'sha256:def456', 2000, ensure_ascii=False) + + layers = list(manifest.get_layers(retriever)) + assert layers[0].author == u"Sômé guy" + + +def test_build_unencoded_unicode_manifest(): + config_json = json.dumps({ + "config": { + "author": u"Sômé guy", + }, + "rootfs": {"type": "layers", "diff_ids": []}, + "history": [ + { + "created": "2018-04-03T18:37:09.284840891Z", + "created_by": "base", + "author": u"Sômé guy", + }, + ], + }, ensure_ascii=False) + + schema2_config = DockerSchema2Config(config_json) + + builder = DockerSchema2ManifestBuilder() + builder.set_config(schema2_config) + builder.add_layer('sha256:abc123', 123) + builder.build() + + +def test_load_unicode_manifest(): + test_dir = os.path.dirname(os.path.abspath(__file__)) + with open(os.path.join(test_dir, 'unicode_manifest_config.json'), 'r') as f: + retriever = ContentRetrieverForTesting() + retriever.add_digest('sha256:5bdd65cdd055c7f3bbaecdc9fd6c75f155322520f85953aa0e2724cab006d407', + f.read()) + + with open(os.path.join(test_dir, 'unicode_manifest.json'), 'r') as f: + manifest_bytes = f.read() + + manifest = DockerSchema2Manifest(manifest_bytes) + assert manifest.digest == 'sha256:97556fa8c553395bd9d8e19a04acef4716ca287ffbf6bde14dd9966053912613' + + layers = list(manifest.get_layers(retriever)) + assert layers[-1].author == u"Sômé guy" diff --git a/image/docker/schema2/test/unicode_manifest.json b/image/docker/schema2/test/unicode_manifest.json new file mode 100644 index 000000000..c783cce2f --- /dev/null +++ b/image/docker/schema2/test/unicode_manifest.json @@ -0,0 +1,16 @@ +{ + "schemaVersion": 2, + "mediaType": "application/vnd.docker.distribution.manifest.v2+json", + "config": { + "mediaType": "application/vnd.docker.container.image.v1+json", + "size": 1661, + "digest": "sha256:5bdd65cdd055c7f3bbaecdc9fd6c75f155322520f85953aa0e2724cab006d407" + }, + "layers": [ + { + "mediaType": "application/vnd.docker.image.rootfs.diff.tar.gzip", + "size": 727978, + "digest": "sha256:90e01955edcd85dac7985b72a8374545eac617ccdddcc992b732e43cd42534af" + } + ] +} \ No newline at end of file diff --git a/image/docker/schema2/test/unicode_manifest_config.json b/image/docker/schema2/test/unicode_manifest_config.json new file mode 100644 index 000000000..d7df096a2 --- /dev/null +++ b/image/docker/schema2/test/unicode_manifest_config.json @@ -0,0 +1 @@ +{"architecture":"amd64","author":"Sômé guy","config":{"Hostname":"","Domainname":"","User":"","AttachStdin":false,"AttachStdout":false,"AttachStderr":false,"Tty":false,"OpenStdin":false,"StdinOnce":false,"Env":["PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"],"Cmd":["sh"],"ArgsEscaped":true,"Image":"sha256:59788edf1f3e78cd0ebe6ce1446e9d10788225db3dedcfd1a59f764bad2b2690","Volumes":null,"WorkingDir":"","Entrypoint":null,"OnBuild":[],"Labels":null},"container":"de786c5a14d0622c39dd9639abf60a4ee299ed0ee4ef3848342f46f13a77d2c8","container_config":{"Hostname":"de786c5a14d0","Domainname":"","User":"","AttachStdin":false,"AttachStdout":false,"AttachStderr":false,"Tty":false,"OpenStdin":false,"StdinOnce":false,"Env":["PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"],"Cmd":["/bin/sh","-c","#(nop) ","MAINTAINER Sômé guy"],"ArgsEscaped":true,"Image":"sha256:59788edf1f3e78cd0ebe6ce1446e9d10788225db3dedcfd1a59f764bad2b2690","Volumes":null,"WorkingDir":"","Entrypoint":null,"OnBuild":[],"Labels":{}},"created":"2018-12-17T19:02:18.9295865Z","docker_version":"17.09.0-ce","history":[{"created":"2018-10-02T17:19:34.03981888Z","created_by":"/bin/sh -c #(nop) ADD file:63eebd629a5f7558c361be0305df5f16baac1d3bbec014b7c486e28812441969 in / "},{"created":"2018-10-02T17:19:34.239926273Z","created_by":"/bin/sh -c #(nop) CMD [\"sh\"]","empty_layer":true},{"created":"2018-12-17T19:02:18.9295865Z","author":"Sômé guy","created_by":"/bin/sh -c #(nop) MAINTAINER Sômé guy","empty_layer":true}],"os":"linux","rootfs":{"type":"layers","diff_ids":["sha256:8a788232037eaf17794408ff3df6b922a1aedf9ef8de36afdae3ed0b0381907b"]}} \ No newline at end of file diff --git a/image/docker/schemas.py b/image/docker/schemas.py index 8bc46051f..f3d48d676 100644 --- a/image/docker/schemas.py +++ b/image/docker/schemas.py @@ -10,6 +10,13 @@ def parse_manifest_from_bytes(manifest_bytes, media_type, validate=True): """ Parses and returns a manifest from the given bytes, for the given media type. Raises a ManifestException if the parse fails for some reason. """ + # NOTE: Docker sometimes pushed manifests encoded as utf-8, so decode them + # if we can. Otherwise, treat the string as already unicode encoded. + try: + manifest_bytes = manifest_bytes.decode('utf-8') + except: + pass + if media_type == DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE: return DockerSchema2Manifest(manifest_bytes) diff --git a/image/docker/schemautil.py b/image/docker/schemautil.py index 1a231c62c..1840a6dba 100644 --- a/image/docker/schemautil.py +++ b/image/docker/schemautil.py @@ -16,9 +16,37 @@ class ContentRetrieverForTesting(ContentRetriever): return self.digests.get(digest) @classmethod - def for_config(cls, config_obj, digest, size): - config_str = json.dumps(config_obj) + def for_config(cls, config_obj, digest, size, ensure_ascii=True): + config_str = json.dumps(config_obj, ensure_ascii=ensure_ascii) padded_string = config_str + ' ' * (size - len(config_str)) digests = {} digests[digest] = padded_string return ContentRetrieverForTesting(digests) + + +def ensure_utf8(unicode_or_str): + """ Ensures the given string is a utf-8 encoded str and not a unicode type. """ + if isinstance(unicode_or_str, unicode): + return unicode_or_str.encode('utf-8') + + return unicode_or_str + + +class _CustomEncoder(json.JSONEncoder): + def encode(self, o): + encoded = super(_CustomEncoder, self).encode(o) + if isinstance(o, basestring): + encoded = encoded.replace('<', '\\u003c') + encoded = encoded.replace('>', '\\u003e') + encoded = encoded.replace('&', '\\u0026') + return encoded + + +def to_canonical_json(value, ensure_ascii=True, indent=None): + """ Returns the canonical JSON string form of the given value, + as per the guidelines in https://github.com/docker/distribution/blob/master/docs/spec/json.md. + + `indent` is allowed only for the purposes of indenting for debugging. + """ + return json.dumps(value, ensure_ascii=ensure_ascii, sort_keys=True, separators=(',', ':'), + cls=_CustomEncoder, indent=indent) diff --git a/image/docker/test/manifest_unencoded_unicode.json b/image/docker/test/manifest_unencoded_unicode.json new file mode 100644 index 000000000..5b3110c27 --- /dev/null +++ b/image/docker/test/manifest_unencoded_unicode.json @@ -0,0 +1,44 @@ +{ + "schemaVersion": 1, + "name": "devtable/testimage", + "tag": "latest", + "architecture": "amd64", + "fsLayers": [ + { + "blobSum": "sha256:a3ed95caeb02ffe68cdd9fd84406680ae93d633cb16422d00e8a7c22955b46d4" + }, + { + "blobSum": "sha256:a3ed95caeb02ffe68cdd9fd84406680ae93d633cb16422d00e8a7c22955b46d4" + }, + { + "blobSum": "sha256:90e01955edcd85dac7985b72a8374545eac617ccdddcc992b732e43cd42534af" + } + ], + "history": [ + { + "v1Compatibility": "{\"architecture\":\"amd64\",\"author\":\"Sômé guy\",\"config\":{\"Hostname\":\"\",\"Domainname\":\"\",\"User\":\"\",\"AttachStdin\":false,\"AttachStdout\":false,\"AttachStderr\":false,\"Tty\":false,\"OpenStdin\":false,\"StdinOnce\":false,\"Env\":[\"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\"],\"Cmd\":[\"sh\"],\"ArgsEscaped\":true,\"Image\":\"sha256:59788edf1f3e78cd0ebe6ce1446e9d10788225db3dedcfd1a59f764bad2b2690\",\"Volumes\":null,\"WorkingDir\":\"\",\"Entrypoint\":null,\"OnBuild\":[],\"Labels\":null},\"container\":\"de786c5a14d0622c39dd9639abf60a4ee299ed0ee4ef3848342f46f13a77d2c8\",\"container_config\":{\"Hostname\":\"de786c5a14d0\",\"Domainname\":\"\",\"User\":\"\",\"AttachStdin\":false,\"AttachStdout\":false,\"AttachStderr\":false,\"Tty\":false,\"OpenStdin\":false,\"StdinOnce\":false,\"Env\":[\"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\"],\"Cmd\":[\"/bin/sh\",\"-c\",\"#(nop) \",\"MAINTAINER Sômé guy\"],\"ArgsEscaped\":true,\"Image\":\"sha256:59788edf1f3e78cd0ebe6ce1446e9d10788225db3dedcfd1a59f764bad2b2690\",\"Volumes\":null,\"WorkingDir\":\"\",\"Entrypoint\":null,\"OnBuild\":[],\"Labels\":{}},\"created\":\"2018-12-17T19:02:18.9295865Z\",\"docker_version\":\"17.09.0-ce\",\"id\":\"b68e6d1f5027887177ddf83c2b9566e1f9eb38454af649b2c0806d13c4c2f01d\",\"os\":\"linux\",\"parent\":\"61b2663f44edc9a6af340b9bfd46d17d8ed2574ffe289e0d95c0476da3c6faac\",\"throwaway\":true}" + }, + { + "v1Compatibility": "{\"id\":\"61b2663f44edc9a6af340b9bfd46d17d8ed2574ffe289e0d95c0476da3c6faac\",\"parent\":\"5327db1e651c0f49157ace3ffd8569c7361b1f2e61d0b49ff617e83a42bf78d6\",\"created\":\"2018-10-02T17:19:34.239926273Z\",\"container_config\":{\"Cmd\":[\"/bin/sh -c #(nop) CMD [\\\"sh\\\"]\"]},\"throwaway\":true}" + }, + { + "v1Compatibility": "{\"id\":\"5327db1e651c0f49157ace3ffd8569c7361b1f2e61d0b49ff617e83a42bf78d6\",\"created\":\"2018-10-02T17:19:34.03981888Z\",\"container_config\":{\"Cmd\":[\"/bin/sh -c #(nop) ADD file:63eebd629a5f7558c361be0305df5f16baac1d3bbec014b7c486e28812441969 in / \"]}}" + } + ], + "signatures": [ + { + "header": { + "jwk": { + "crv": "P-256", + "kid": "AARA:PFUD:3V54:7F2S:2P7E:WMCU:WRE7:KUYD:CFKH:UHZ7:AZ4I:UQEX", + "kty": "EC", + "x": "34N4h_uM7FedPw4k3_VabKlt7qoBWpHgpko7zE0RkeY", + "y": "LhxxtCYh_b1EwUbl3-tQFTbg1mTu34vMxj4UaKjWZk8" + }, + "alg": "ES256" + }, + "signature": "eAhgOTAxmWLK25O5lfpJA9ZuTvEdm-E-8qS4pbaYkKwWq9Nc0iLmJ9tKy3QBWP0QtXmK8dz2J0CpCvV0xCheSw", + "protected": "eyJmb3JtYXRMZW5ndGgiOjI2MTQsImZvcm1hdFRhaWwiOiJDbjAiLCJ0aW1lIjoiMjAxOC0xMi0xN1QxOToxMDo1M1oifQ" + } + ] +} \ No newline at end of file diff --git a/image/docker/test/test_schema1.py b/image/docker/test/test_schema1.py index 950de0049..f2f9cfcf5 100644 --- a/image/docker/test/test_schema1.py +++ b/image/docker/test/test_schema1.py @@ -1,10 +1,13 @@ +# -*- coding: utf-8 -*- + import os -import hashlib import json import pytest -from image.docker.schema1 import MalformedSchema1Manifest, DockerSchema1Manifest +from app import docker_v2_signing_key +from image.docker.schema1 import (MalformedSchema1Manifest, DockerSchema1Manifest, + DockerSchema1ManifestBuilder) @pytest.mark.parametrize('json_data', [ '', @@ -130,3 +133,32 @@ def test_validate_manifest_with_unicode_encoded(): digest = manifest.digest assert digest == 'sha256:dde3714ce7e23edc6413aa85c0b42792e4f2f79e9ea36afc154d63ff3d04e86c' assert manifest.created_datetime + + +def test_validate_manifest_with_unencoded_unicode(): + test_dir = os.path.dirname(os.path.abspath(__file__)) + with open(os.path.join(test_dir, 'manifest_unencoded_unicode.json'), 'r') as f: + manifest_bytes = f.read() + + manifest = DockerSchema1Manifest(manifest_bytes) + digest = manifest.digest + assert digest == 'sha256:5d8a0f34744a39bf566ba430251adc0cc86587f86aed3ac2acfb897f349777bc' + assert manifest.created_datetime + + layers = list(manifest.get_layers(None)) + assert layers[-1].author == u'Sômé guy' + + +@pytest.mark.parametrize('with_key', [ + None, + docker_v2_signing_key, +]) +def test_build_unencoded_unicode_manifest(with_key): + builder = DockerSchema1ManifestBuilder('somenamespace', 'somerepo', 'sometag') + builder.add_layer('sha256:abcde', json.dumps({ + 'id': 'someid', + 'author': u'Sômé guy', + }, ensure_ascii=False)) + + built = builder.build(with_key, ensure_ascii=False) + built._validate() diff --git a/image/docker/test/test_schemautil.py b/image/docker/test/test_schemautil.py new file mode 100644 index 000000000..360a74bb7 --- /dev/null +++ b/image/docker/test/test_schemautil.py @@ -0,0 +1,23 @@ +import pytest + +from image.docker.schemautil import to_canonical_json + +@pytest.mark.parametrize('input, expected_output', [ + pytest.param({}, '{}', id='empty object'), + pytest.param({'b': 2, 'a': 1}, '{"a":1,"b":2}', id='object with sorted keys'), + pytest.param('hello world', '"hello world"', id='basic string'), + pytest.param('hey & hi', '"hey \\u0026 hi"', id='string with &'), + pytest.param('', '"\\u003chey\\u003e"', id='string with brackets'), + pytest.param({ + "zxcv": [{}, True, 1000000000, 'tyui'], + "asdf": 1, + "qwer": [], + }, '{"asdf":1,"qwer":[],"zxcv":[{},true,1000000000,"tyui"]}', id='example canonical'), +]) +def test_to_canonical_json(input, expected_output): + result = to_canonical_json(input) + assert result == expected_output + + # Ensure the result is utf-8. + assert isinstance(result, str) + result.decode('utf-8') diff --git a/test/registry/protocol_fixtures.py b/test/registry/protocol_fixtures.py index 0b7fd2b44..f9ec7e0bd 100644 --- a/test/registry/protocol_fixtures.py +++ b/test/registry/protocol_fixtures.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + import random import string @@ -24,6 +26,20 @@ def basic_images(): ] +@pytest.fixture(scope="session") +def unicode_images(): + """ Returns basic images for push and pull testing that contain unicode in the image metadata. """ + # Note: order is from base layer down to leaf. + parent_bytes = layer_bytes_for_contents('parent contents') + image_bytes = layer_bytes_for_contents('some contents') + return [ + Image(id='parentid', bytes=parent_bytes, parent_id=None), + Image(id='someid', bytes=image_bytes, parent_id='parentid', + config={'comment': u'the Pawe\xc5\x82 Kami\xc5\x84ski image', + 'author': u'Sômé guy'}), + ] + + @pytest.fixture(scope="session") def different_images(): """ Returns different basic images for push and pull testing. """ diff --git a/test/registry/protocol_v2.py b/test/registry/protocol_v2.py index e449f8bdf..c0176f5e4 100644 --- a/test/registry/protocol_v2.py +++ b/test/registry/protocol_v2.py @@ -281,12 +281,12 @@ class V2Protocol(RegistryProtocol): if images[-1].config: config['config'] = images[-1].config - config_json = json.dumps(config) + config_json = json.dumps(config, ensure_ascii=options.ensure_ascii) schema2_config = DockerSchema2Config(config_json) builder.set_config(schema2_config) - blobs[schema2_config.digest] = schema2_config.bytes - return builder.build() + blobs[schema2_config.digest] = schema2_config.bytes.encode('utf-8') + return builder.build(ensure_ascii=options.ensure_ascii) def build_schema1(self, namespace, repo_name, tag_name, images, blobs, options): builder = DockerSchema1ManifestBuilder(namespace, repo_name, tag_name) @@ -311,10 +311,14 @@ class V2Protocol(RegistryProtocol): if image.created is not None: layer_dict['created'] = image.created - builder.add_layer(checksum, json.dumps(layer_dict)) + builder.add_layer(checksum, json.dumps(layer_dict, ensure_ascii=options.ensure_ascii)) # Build the manifest. - return builder.build(self.jwk) + built = builder.build(self.jwk, ensure_ascii=options.ensure_ascii) + + # Validate it before we send it. + DockerSchema1Manifest(built.bytes) + return built def push(self, session, namespace, repo_name, tag_names, images, credentials=None, expected_failure=None, options=None): @@ -368,7 +372,7 @@ class V2Protocol(RegistryProtocol): tag_or_digest = tag_name if not options.push_by_manifest_digest else manifest.digest self.conduct(session, 'PUT', '/v2/%s/manifests/%s' % (self.repo_name(namespace, repo_name), tag_or_digest), - data=manifest.bytes, + data=manifest.bytes.encode('utf-8'), expected_status=(put_code, expected_failure, V2ProtocolSteps.PUT_MANIFEST), headers=manifest_headers) diff --git a/test/registry/protocols.py b/test/registry/protocols.py index f3a834e19..c28875240 100644 --- a/test/registry/protocols.py +++ b/test/registry/protocols.py @@ -81,6 +81,7 @@ class ProtocolOptions(object): self.push_by_manifest_digest = False self.request_addr = None self.skip_blob_push_checks = False + self.ensure_ascii = True @add_metaclass(ABCMeta) @@ -120,7 +121,7 @@ class RegistryProtocol(object): def conduct(self, session, method, url, expected_status=200, params=None, data=None, json_data=None, headers=None, auth=None, options=None): if json_data is not None: - data = json.dumps(json_data) + data = json.dumps(json_data).encode('utf-8') headers = headers or {} headers['Content-Type'] = 'application/json' diff --git a/test/registry/registry_tests.py b/test/registry/registry_tests.py index 176c8a295..afb5878e8 100644 --- a/test/registry/registry_tests.py +++ b/test/registry/registry_tests.py @@ -1773,3 +1773,33 @@ def test_pull_manifest_list_schema2_only(v22_protocol, basic_images, different_i if has_amd64_linux: assert result.manifests['latest'].media_type == DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE + + +def test_push_pull_unicode(pusher, puller, unicode_images, liveserver_session, app_reloader): + """ Test: Push an image with unicode inside and then pull it. """ + credentials = ('devtable', 'password') + + # Push a new repository. + pusher.push(liveserver_session, 'devtable', 'newrepo', 'latest', unicode_images, + credentials=credentials) + + # Pull the repository to verify. + puller.pull(liveserver_session, 'devtable', 'newrepo', 'latest', unicode_images, + credentials=credentials) + + +def test_push_pull_unicode_direct(pusher, puller, unicode_images, liveserver_session, app_reloader): + """ Test: Push an image with *unescaped* unicode inside and then pull it. """ + credentials = ('devtable', 'password') + + # Turn off automatic unicode encoding when building the manifests. + options = ProtocolOptions() + options.ensure_ascii = False + + # Push a new repository. + pusher.push(liveserver_session, 'devtable', 'newrepo', 'latest', unicode_images, + credentials=credentials, options=options) + + # Pull the repository to verify. + puller.pull(liveserver_session, 'devtable', 'newrepo', 'latest', unicode_images, + credentials=credentials, options=options)