Fix handling of four byte utf8 manifests

- Adds the charset: utf-8 to all the manifest responses
- Makes sure we connect to MySQL in utf8mb4 mode, to ensure we can properly read and write 4-byte utf8 strings
- Adds tests for all of the above
This commit is contained in:
Joseph Schorr 2019-01-10 16:34:56 -05:00
parent 62609fce3e
commit eb9ca8e8a8
8 changed files with 120 additions and 11 deletions

View file

@ -1,3 +1,5 @@
# -*- coding: utf-8 -*-
import hashlib
import json
import uuid
@ -864,3 +866,33 @@ def test_known_issue_schema1(registry_model):
assert found.digest == digest
assert found.internal_manifest_bytes.as_encoded_str() == manifest.bytes.as_encoded_str()
assert found.get_parsed_manifest().digest == digest
def test_unicode_emoji(registry_model):
builder = DockerSchema1ManifestBuilder('devtable', 'simple', 'latest')
builder.add_layer('sha256:abcde', json.dumps({
'id': 'someid',
'author': u'😱',
}, ensure_ascii=False))
manifest = builder.build(ensure_ascii=False)
manifest._validate()
for blob_digest in manifest.local_blob_digests:
_populate_blob(blob_digest)
# Create the manifest in the database.
repository_ref = registry_model.lookup_repository('devtable', 'simple')
created_manifest, _ = registry_model.create_manifest_and_retarget_tag(repository_ref, manifest,
'latest', storage)
assert created_manifest
assert created_manifest.digest == manifest.digest
assert (created_manifest.internal_manifest_bytes.as_encoded_str() ==
manifest.bytes.as_encoded_str())
# Look it up again and validate.
found = registry_model.lookup_manifest_by_digest(repository_ref, manifest.digest, allow_dead=True)
assert found
assert found.digest == manifest.digest
assert found.internal_manifest_bytes.as_encoded_str() == manifest.bytes.as_encoded_str()
assert found.get_parsed_manifest().digest == manifest.digest