Fix handling of four byte utf8 manifests

- Adds the charset: utf-8 to all the manifest responses
- Makes sure we connect to MySQL in utf8mb4 mode, to ensure we can properly read and write 4-byte utf8 strings
- Adds tests for all of the above
This commit is contained in:
Joseph Schorr 2019-01-10 16:34:56 -05:00
parent 62609fce3e
commit eb9ca8e8a8
8 changed files with 120 additions and 11 deletions

View file

@ -70,6 +70,12 @@ SCHEME_RANDOM_FUNCTION = {
}
_EXTRA_ARGS = {
'mysql': dict(charset='utf8mb4'),
'mysql+pymysql': dict(charset='utf8mb4'),
}
def pipes_concat(arg1, arg2, *extra_args):
""" Concat function for sqlite, since it doesn't support fn.Concat.
Concatenates clauses with || characters.
@ -315,6 +321,10 @@ def _db_from_url(url, db_kwargs, connect_timeout=DEFAULT_DB_CONNECT_TIMEOUT,
db_kwargs.pop('stale_timeout', None)
db_kwargs.pop('max_connections', None)
for key, value in _EXTRA_ARGS.get(parsed_url.drivername, {}).iteritems():
if key not in db_kwargs:
db_kwargs[key] = value
if allow_retry:
driver = _wrap_for_retry(driver)

View file

@ -1,3 +1,5 @@
# -*- coding: utf-8 -*-
import hashlib
import json
import uuid
@ -864,3 +866,33 @@ def test_known_issue_schema1(registry_model):
assert found.digest == digest
assert found.internal_manifest_bytes.as_encoded_str() == manifest.bytes.as_encoded_str()
assert found.get_parsed_manifest().digest == digest
def test_unicode_emoji(registry_model):
builder = DockerSchema1ManifestBuilder('devtable', 'simple', 'latest')
builder.add_layer('sha256:abcde', json.dumps({
'id': 'someid',
'author': u'😱',
}, ensure_ascii=False))
manifest = builder.build(ensure_ascii=False)
manifest._validate()
for blob_digest in manifest.local_blob_digests:
_populate_blob(blob_digest)
# Create the manifest in the database.
repository_ref = registry_model.lookup_repository('devtable', 'simple')
created_manifest, _ = registry_model.create_manifest_and_retarget_tag(repository_ref, manifest,
'latest', storage)
assert created_manifest
assert created_manifest.digest == manifest.digest
assert (created_manifest.internal_manifest_bytes.as_encoded_str() ==
manifest.bytes.as_encoded_str())
# Look it up again and validate.
found = registry_model.lookup_manifest_by_digest(repository_ref, manifest.digest, allow_dead=True)
assert found
assert found.digest == manifest.digest
assert found.internal_manifest_bytes.as_encoded_str() == manifest.bytes.as_encoded_str()
assert found.get_parsed_manifest().digest == manifest.digest