Ensure we encode the config in manifest schema 2 via the canonical JSON format

This commit is contained in:
Joseph Schorr 2018-12-18 14:52:19 -05:00
parent 48e584905a
commit feee49be9e
4 changed files with 48 additions and 5 deletions

View file

@ -23,7 +23,7 @@ from image.docker import ManifestException
from image.docker.types import ManifestImageLayer from image.docker.types import ManifestImageLayer
from image.docker.interfaces import ManifestInterface from image.docker.interfaces import ManifestInterface
from image.docker.v1 import DockerV1Metadata from image.docker.v1 import DockerV1Metadata
from image.docker.schemautil import ensure_utf8 from image.docker.schemautil import ensure_utf8, to_canonical_json
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -375,7 +375,7 @@ class DockerSchema1Manifest(ManifestInterface):
v1_metadata = json.loads(metadata_string) v1_metadata = json.loads(metadata_string)
command_list = v1_metadata.get('container_config', {}).get('Cmd', None) command_list = v1_metadata.get('container_config', {}).get('Cmd', None)
command = json.dumps(command_list) if command_list else None command = to_canonical_json(command_list) if command_list else None
if not 'id' in v1_metadata: if not 'id' in v1_metadata:
raise MalformedSchema1Manifest('id field missing from v1Compatibility JSON') raise MalformedSchema1Manifest('id field missing from v1Compatibility JSON')
@ -597,4 +597,4 @@ def _updated_v1_metadata(v1_metadata_json, updated_id_map):
if existing_image in updated_id_map: if existing_image in updated_id_map:
parsed['container_config']['image'] = updated_id_map[existing_image] parsed['container_config']['image'] = updated_id_map[existing_image]
return json.dumps(parsed) return to_canonical_json(parsed)

View file

@ -446,4 +446,4 @@ class DockerSchema2ManifestBuilder(object):
_build_layer(layer) for layer in self.filesystem_layers _build_layer(layer) for layer in self.filesystem_layers
], ],
} }
return DockerSchema2Manifest(json.dumps(manifest_dict, indent=3, ensure_ascii=ensure_ascii)) return DockerSchema2Manifest(json.dumps(manifest_dict, ensure_ascii=ensure_ascii, indent=3))

View file

@ -25,8 +25,28 @@ class ContentRetrieverForTesting(ContentRetriever):
def ensure_utf8(unicode_or_str): def ensure_utf8(unicode_or_str):
""" Ensures the given string is utf-8 encoded and not unicode. """ """ Ensures the given string is a utf-8 encoded str and not a unicode type. """
if isinstance(unicode_or_str, unicode): if isinstance(unicode_or_str, unicode):
return unicode_or_str.encode('utf-8') return unicode_or_str.encode('utf-8')
return unicode_or_str return unicode_or_str
class _CustomEncoder(json.JSONEncoder):
def encode(self, o):
encoded = super(_CustomEncoder, self).encode(o)
if isinstance(o, basestring):
encoded = encoded.replace('<', '\\u003c')
encoded = encoded.replace('>', '\\u003e')
encoded = encoded.replace('&', '\\u0026')
return encoded
def to_canonical_json(value, ensure_ascii=True, indent=None):
""" Returns the canonical JSON string form of the given value,
as per the guidelines in https://github.com/docker/distribution/blob/master/docs/spec/json.md.
`indent` is allowed only for the purposes of indenting for debugging.
"""
return json.dumps(value, ensure_ascii=ensure_ascii, sort_keys=True, separators=(',', ':'),
cls=_CustomEncoder, indent=indent)

View file

@ -0,0 +1,23 @@
import pytest
from image.docker.schemautil import to_canonical_json
@pytest.mark.parametrize('input, expected_output', [
pytest.param({}, '{}', id='empty object'),
pytest.param({'b': 2, 'a': 1}, '{"a":1,"b":2}', id='object with sorted keys'),
pytest.param('hello world', '"hello world"', id='basic string'),
pytest.param('hey & hi', '"hey \\u0026 hi"', id='string with &'),
pytest.param('<hey>', '"\\u003chey\\u003e"', id='string with brackets'),
pytest.param({
"zxcv": [{}, True, 1000000000, 'tyui'],
"asdf": 1,
"qwer": [],
}, '{"asdf":1,"qwer":[],"zxcv":[{},true,1000000000,"tyui"]}', id='example canonical'),
])
def test_to_canonical_json(input, expected_output):
result = to_canonical_json(input)
assert result == expected_output
# Ensure the result is utf-8.
assert isinstance(result, str)
result.decode('utf-8')