Ensure we encode the config in manifest schema 2 via the canonical JSON format

This commit is contained in:
Joseph Schorr 2018-12-18 14:52:19 -05:00
parent 48e584905a
commit feee49be9e
4 changed files with 48 additions and 5 deletions

View file

@ -23,7 +23,7 @@ from image.docker import ManifestException
from image.docker.types import ManifestImageLayer
from image.docker.interfaces import ManifestInterface
from image.docker.v1 import DockerV1Metadata
from image.docker.schemautil import ensure_utf8
from image.docker.schemautil import ensure_utf8, to_canonical_json
logger = logging.getLogger(__name__)
@ -375,7 +375,7 @@ class DockerSchema1Manifest(ManifestInterface):
v1_metadata = json.loads(metadata_string)
command_list = v1_metadata.get('container_config', {}).get('Cmd', None)
command = json.dumps(command_list) if command_list else None
command = to_canonical_json(command_list) if command_list else None
if not 'id' in v1_metadata:
raise MalformedSchema1Manifest('id field missing from v1Compatibility JSON')
@ -597,4 +597,4 @@ def _updated_v1_metadata(v1_metadata_json, updated_id_map):
if existing_image in updated_id_map:
parsed['container_config']['image'] = updated_id_map[existing_image]
return json.dumps(parsed)
return to_canonical_json(parsed)

View file

@ -446,4 +446,4 @@ class DockerSchema2ManifestBuilder(object):
_build_layer(layer) for layer in self.filesystem_layers
],
}
return DockerSchema2Manifest(json.dumps(manifest_dict, indent=3, ensure_ascii=ensure_ascii))
return DockerSchema2Manifest(json.dumps(manifest_dict, ensure_ascii=ensure_ascii, indent=3))

View file

@ -25,8 +25,28 @@ class ContentRetrieverForTesting(ContentRetriever):
def ensure_utf8(unicode_or_str):
""" Ensures the given string is utf-8 encoded and not unicode. """
""" Ensures the given string is a utf-8 encoded str and not a unicode type. """
if isinstance(unicode_or_str, unicode):
return unicode_or_str.encode('utf-8')
return unicode_or_str
class _CustomEncoder(json.JSONEncoder):
def encode(self, o):
encoded = super(_CustomEncoder, self).encode(o)
if isinstance(o, basestring):
encoded = encoded.replace('<', '\\u003c')
encoded = encoded.replace('>', '\\u003e')
encoded = encoded.replace('&', '\\u0026')
return encoded
def to_canonical_json(value, ensure_ascii=True, indent=None):
""" Returns the canonical JSON string form of the given value,
as per the guidelines in https://github.com/docker/distribution/blob/master/docs/spec/json.md.
`indent` is allowed only for the purposes of indenting for debugging.
"""
return json.dumps(value, ensure_ascii=ensure_ascii, sort_keys=True, separators=(',', ':'),
cls=_CustomEncoder, indent=indent)

View file

@ -0,0 +1,23 @@
import pytest
from image.docker.schemautil import to_canonical_json
@pytest.mark.parametrize('input, expected_output', [
pytest.param({}, '{}', id='empty object'),
pytest.param({'b': 2, 'a': 1}, '{"a":1,"b":2}', id='object with sorted keys'),
pytest.param('hello world', '"hello world"', id='basic string'),
pytest.param('hey & hi', '"hey \\u0026 hi"', id='string with &'),
pytest.param('<hey>', '"\\u003chey\\u003e"', id='string with brackets'),
pytest.param({
"zxcv": [{}, True, 1000000000, 'tyui'],
"asdf": 1,
"qwer": [],
}, '{"asdf":1,"qwer":[],"zxcv":[{},true,1000000000,"tyui"]}', id='example canonical'),
])
def test_to_canonical_json(input, expected_output):
result = to_canonical_json(input)
assert result == expected_output
# Ensure the result is utf-8.
assert isinstance(result, str)
result.decode('utf-8')