From feee49be9e71561a6eea3bd6f37cc2d8da59b949 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Tue, 18 Dec 2018 14:52:19 -0500 Subject: [PATCH] Ensure we encode the config in manifest schema 2 via the canonical JSON format --- image/docker/schema1.py | 6 +++--- image/docker/schema2/manifest.py | 2 +- image/docker/schemautil.py | 22 +++++++++++++++++++++- image/docker/test/test_schemautil.py | 23 +++++++++++++++++++++++ 4 files changed, 48 insertions(+), 5 deletions(-) create mode 100644 image/docker/test/test_schemautil.py diff --git a/image/docker/schema1.py b/image/docker/schema1.py index 8c8b4cb85..2e076e058 100644 --- a/image/docker/schema1.py +++ b/image/docker/schema1.py @@ -23,7 +23,7 @@ from image.docker import ManifestException from image.docker.types import ManifestImageLayer from image.docker.interfaces import ManifestInterface from image.docker.v1 import DockerV1Metadata -from image.docker.schemautil import ensure_utf8 +from image.docker.schemautil import ensure_utf8, to_canonical_json logger = logging.getLogger(__name__) @@ -375,7 +375,7 @@ class DockerSchema1Manifest(ManifestInterface): v1_metadata = json.loads(metadata_string) command_list = v1_metadata.get('container_config', {}).get('Cmd', None) - command = json.dumps(command_list) if command_list else None + command = to_canonical_json(command_list) if command_list else None if not 'id' in v1_metadata: raise MalformedSchema1Manifest('id field missing from v1Compatibility JSON') @@ -597,4 +597,4 @@ def _updated_v1_metadata(v1_metadata_json, updated_id_map): if existing_image in updated_id_map: parsed['container_config']['image'] = updated_id_map[existing_image] - return json.dumps(parsed) + return to_canonical_json(parsed) diff --git a/image/docker/schema2/manifest.py b/image/docker/schema2/manifest.py index 25b9fb476..5731fa76d 100644 --- a/image/docker/schema2/manifest.py +++ b/image/docker/schema2/manifest.py @@ -446,4 +446,4 @@ class DockerSchema2ManifestBuilder(object): _build_layer(layer) for layer in self.filesystem_layers ], } - return DockerSchema2Manifest(json.dumps(manifest_dict, indent=3, ensure_ascii=ensure_ascii)) + return DockerSchema2Manifest(json.dumps(manifest_dict, ensure_ascii=ensure_ascii, indent=3)) diff --git a/image/docker/schemautil.py b/image/docker/schemautil.py index 05a6ad5d2..1840a6dba 100644 --- a/image/docker/schemautil.py +++ b/image/docker/schemautil.py @@ -25,8 +25,28 @@ class ContentRetrieverForTesting(ContentRetriever): def ensure_utf8(unicode_or_str): - """ Ensures the given string is utf-8 encoded and not unicode. """ + """ Ensures the given string is a utf-8 encoded str and not a unicode type. """ if isinstance(unicode_or_str, unicode): return unicode_or_str.encode('utf-8') return unicode_or_str + + +class _CustomEncoder(json.JSONEncoder): + def encode(self, o): + encoded = super(_CustomEncoder, self).encode(o) + if isinstance(o, basestring): + encoded = encoded.replace('<', '\\u003c') + encoded = encoded.replace('>', '\\u003e') + encoded = encoded.replace('&', '\\u0026') + return encoded + + +def to_canonical_json(value, ensure_ascii=True, indent=None): + """ Returns the canonical JSON string form of the given value, + as per the guidelines in https://github.com/docker/distribution/blob/master/docs/spec/json.md. + + `indent` is allowed only for the purposes of indenting for debugging. + """ + return json.dumps(value, ensure_ascii=ensure_ascii, sort_keys=True, separators=(',', ':'), + cls=_CustomEncoder, indent=indent) diff --git a/image/docker/test/test_schemautil.py b/image/docker/test/test_schemautil.py new file mode 100644 index 000000000..360a74bb7 --- /dev/null +++ b/image/docker/test/test_schemautil.py @@ -0,0 +1,23 @@ +import pytest + +from image.docker.schemautil import to_canonical_json + +@pytest.mark.parametrize('input, expected_output', [ + pytest.param({}, '{}', id='empty object'), + pytest.param({'b': 2, 'a': 1}, '{"a":1,"b":2}', id='object with sorted keys'), + pytest.param('hello world', '"hello world"', id='basic string'), + pytest.param('hey & hi', '"hey \\u0026 hi"', id='string with &'), + pytest.param('', '"\\u003chey\\u003e"', id='string with brackets'), + pytest.param({ + "zxcv": [{}, True, 1000000000, 'tyui'], + "asdf": 1, + "qwer": [], + }, '{"asdf":1,"qwer":[],"zxcv":[{},true,1000000000,"tyui"]}', id='example canonical'), +]) +def test_to_canonical_json(input, expected_output): + result = to_canonical_json(input) + assert result == expected_output + + # Ensure the result is utf-8. + assert isinstance(result, str) + result.decode('utf-8')