Fix handling of four byte utf8 manifests

- Adds the charset: utf-8 to all the manifest responses
- Makes sure we connect to MySQL in utf8mb4 mode, to ensure we can properly read and write 4-byte utf8 strings
- Adds tests for all of the above
This commit is contained in:
Joseph Schorr 2019-01-10 16:34:56 -05:00
parent 62609fce3e
commit eb9ca8e8a8
8 changed files with 120 additions and 11 deletions

View file

@ -70,6 +70,12 @@ SCHEME_RANDOM_FUNCTION = {
}
_EXTRA_ARGS = {
'mysql': dict(charset='utf8mb4'),
'mysql+pymysql': dict(charset='utf8mb4'),
}
def pipes_concat(arg1, arg2, *extra_args):
""" Concat function for sqlite, since it doesn't support fn.Concat.
Concatenates clauses with || characters.
@ -315,6 +321,10 @@ def _db_from_url(url, db_kwargs, connect_timeout=DEFAULT_DB_CONNECT_TIMEOUT,
db_kwargs.pop('stale_timeout', None)
db_kwargs.pop('max_connections', None)
for key, value in _EXTRA_ARGS.get(parsed_url.drivername, {}).iteritems():
if key not in db_kwargs:
db_kwargs[key] = value
if allow_retry:
driver = _wrap_for_retry(driver)

View file

@ -1,3 +1,5 @@
# -*- coding: utf-8 -*-
import hashlib
import json
import uuid
@ -864,3 +866,33 @@ def test_known_issue_schema1(registry_model):
assert found.digest == digest
assert found.internal_manifest_bytes.as_encoded_str() == manifest.bytes.as_encoded_str()
assert found.get_parsed_manifest().digest == digest
def test_unicode_emoji(registry_model):
builder = DockerSchema1ManifestBuilder('devtable', 'simple', 'latest')
builder.add_layer('sha256:abcde', json.dumps({
'id': 'someid',
'author': u'😱',
}, ensure_ascii=False))
manifest = builder.build(ensure_ascii=False)
manifest._validate()
for blob_digest in manifest.local_blob_digests:
_populate_blob(blob_digest)
# Create the manifest in the database.
repository_ref = registry_model.lookup_repository('devtable', 'simple')
created_manifest, _ = registry_model.create_manifest_and_retarget_tag(repository_ref, manifest,
'latest', storage)
assert created_manifest
assert created_manifest.digest == manifest.digest
assert (created_manifest.internal_manifest_bytes.as_encoded_str() ==
manifest.bytes.as_encoded_str())
# Look it up again and validate.
found = registry_model.lookup_manifest_by_digest(repository_ref, manifest.digest, allow_dead=True)
assert found
assert found.digest == manifest.digest
assert found.internal_manifest_bytes.as_encoded_str() == manifest.bytes.as_encoded_str()
assert found.get_parsed_manifest().digest == manifest.digest

View file

@ -76,7 +76,7 @@ def fetch_manifest_by_tagname(namespace_name, repo_name, manifest_ref):
supported.bytes.as_unicode(),
status=200,
headers={
'Content-Type': supported.media_type,
'Content-Type': '%s; charset=utf-8' % supported.media_type,
'Docker-Content-Digest': supported.digest,
},
)
@ -111,7 +111,7 @@ def fetch_manifest_by_digest(namespace_name, repo_name, manifest_ref):
metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'v2', True])
return Response(supported.bytes.as_unicode(), status=200, headers={
'Content-Type': supported.media_type,
'Content-Type': '%s; charset=utf-8' % supported.media_type,
'Docker-Content-Digest': supported.digest,
})

View file

@ -178,3 +178,22 @@ def test_validate_manifest_known_issue():
layers = list(manifest.get_layers(None))
assert layers[-1].author is None
@pytest.mark.parametrize('with_key', [
None,
docker_v2_signing_key,
])
def test_validate_manifest_with_emoji(with_key):
builder = DockerSchema1ManifestBuilder('somenamespace', 'somerepo', 'sometag')
builder.add_layer('sha256:abcde', json.dumps({
'id': 'someid',
'author': u'😱',
}, ensure_ascii=False))
built = builder.build(with_key, ensure_ascii=False)
built._validate()
# Ensure the manifest can be reloaded.
built_bytes = built.bytes.as_encoded_str()
DockerSchema1Manifest(Bytes.for_string_or_unicode(built_bytes))

View file

@ -83,6 +83,7 @@ def _init_db_path_sqlite(tmpdir_factory):
initialize_database()
db.obj.execute_sql('PRAGMA foreign_keys = ON;')
db.obj.execute_sql('PRAGMA encoding="UTF-8";')
populate_database()
close_db_filter(None)
@ -157,7 +158,9 @@ def initialized_db(appconfig):
if not under_test_real_database:
# Make absolutely sure foreign key constraints are on.
db.obj.execute_sql('PRAGMA foreign_keys = ON;')
db.obj.execute_sql('PRAGMA encoding="UTF-8";')
assert db.obj.execute_sql('PRAGMA foreign_keys;').fetchone()[0] == 1
assert db.obj.execute_sql('PRAGMA encoding;').fetchone()[0] == 'UTF-8'
# If under a test *real* database, setup a savepoint.
if under_test_real_database:

View file

@ -138,6 +138,20 @@ def images_with_empty_layer():
]
@pytest.fixture(scope="session")
def unicode_emoji_images():
""" Returns basic images for push and pull testing that contain unicode in the image metadata. """
# Note: order is from base layer down to leaf.
parent_bytes = layer_bytes_for_contents('parent contents')
image_bytes = layer_bytes_for_contents('some contents')
return [
Image(id='parentid', bytes=parent_bytes, parent_id=None),
Image(id='someid', bytes=image_bytes, parent_id='parentid',
config={'comment': u'😱',
'author': u'Sômé guy'}),
]
@pytest.fixture(scope="session")
def jwk():
return RSAKey(key=RSA.generate(2048))

View file

@ -168,9 +168,9 @@ class V2Protocol(RegistryProtocol):
return None
# Parse the returned manifest list and ensure it matches.
assert response.headers['Content-Type'] == DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE
retrieved = parse_manifest_from_bytes(Bytes.for_string_or_unicode(response.text),
response.headers['Content-Type'])
ct, _ = response.headers['Content-Type'].split(';', 1)
assert ct == DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE
retrieved = parse_manifest_from_bytes(Bytes.for_string_or_unicode(response.text), ct)
assert retrieved.schema_version == 2
assert retrieved.is_manifest_list
assert retrieved.digest == manifestlist.digest
@ -185,9 +185,8 @@ class V2Protocol(RegistryProtocol):
headers=headers)
if expected_failure is not None:
return None
manifest = parse_manifest_from_bytes(Bytes.for_string_or_unicode(response.text),
response.headers['Content-Type'])
ct, _ = response.headers['Content-Type'].split(';', 1)
manifest = parse_manifest_from_bytes(Bytes.for_string_or_unicode(response.text), ct)
assert not manifest.is_manifest_list
assert manifest.digest == manifest_digest
@ -546,11 +545,11 @@ class V2Protocol(RegistryProtocol):
return None
# Ensure the manifest returned by us is valid.
ct, _ = response.headers['Content-Type'].split(';', 1)
if not self.schema2:
assert response.headers['Content-Type'] in DOCKER_SCHEMA1_CONTENT_TYPES
assert ct in DOCKER_SCHEMA1_CONTENT_TYPES
manifest = parse_manifest_from_bytes(Bytes.for_string_or_unicode(response.text),
response.headers['Content-Type'])
manifest = parse_manifest_from_bytes(Bytes.for_string_or_unicode(response.text), ct)
manifests[tag_name] = manifest
if manifest.schema_version == 1:

View file

@ -1822,3 +1822,35 @@ def test_push_legacy_pull_not_allowed(v22_protocol, v1_protocol, remote_images,
# Attempt to pull. Should fail with a 404.
v1_protocol.pull(liveserver_session, 'devtable', 'newrepo', 'latest', remote_images,
credentials=credentials, expected_failure=Failures.UNKNOWN_TAG)
def test_push_pull_emoji_unicode(pusher, puller, unicode_emoji_images, liveserver_session,
app_reloader):
""" Test: Push an image with unicode inside and then pull it. """
credentials = ('devtable', 'password')
# Push a new repository.
pusher.push(liveserver_session, 'devtable', 'newrepo', 'latest', unicode_emoji_images,
credentials=credentials)
# Pull the repository to verify.
puller.pull(liveserver_session, 'devtable', 'newrepo', 'latest', unicode_emoji_images,
credentials=credentials)
def test_push_pull_emoji_unicode_direct(pusher, puller, unicode_emoji_images, liveserver_session,
app_reloader):
""" Test: Push an image with *unescaped* unicode inside and then pull it. """
credentials = ('devtable', 'password')
# Turn off automatic unicode encoding when building the manifests.
options = ProtocolOptions()
options.ensure_ascii = False
# Push a new repository.
pusher.push(liveserver_session, 'devtable', 'newrepo', 'latest', unicode_emoji_images,
credentials=credentials, options=options)
# Pull the repository to verify.
puller.pull(liveserver_session, 'devtable', 'newrepo', 'latest', unicode_emoji_images,
credentials=credentials, options=options)