Fix handling of four byte utf8 manifests

- Adds the charset: utf-8 to all the manifest responses
- Makes sure we connect to MySQL in utf8mb4 mode, to ensure we can properly read and write 4-byte utf8 strings
- Adds tests for all of the above
This commit is contained in:
Joseph Schorr 2019-01-10 16:34:56 -05:00
parent 62609fce3e
commit eb9ca8e8a8
8 changed files with 120 additions and 11 deletions

View file

@ -83,6 +83,7 @@ def _init_db_path_sqlite(tmpdir_factory):
initialize_database()
db.obj.execute_sql('PRAGMA foreign_keys = ON;')
db.obj.execute_sql('PRAGMA encoding="UTF-8";')
populate_database()
close_db_filter(None)
@ -157,7 +158,9 @@ def initialized_db(appconfig):
if not under_test_real_database:
# Make absolutely sure foreign key constraints are on.
db.obj.execute_sql('PRAGMA foreign_keys = ON;')
db.obj.execute_sql('PRAGMA encoding="UTF-8";')
assert db.obj.execute_sql('PRAGMA foreign_keys;').fetchone()[0] == 1
assert db.obj.execute_sql('PRAGMA encoding;').fetchone()[0] == 'UTF-8'
# If under a test *real* database, setup a savepoint.
if under_test_real_database:

View file

@ -138,6 +138,20 @@ def images_with_empty_layer():
]
@pytest.fixture(scope="session")
def unicode_emoji_images():
""" Returns basic images for push and pull testing that contain unicode in the image metadata. """
# Note: order is from base layer down to leaf.
parent_bytes = layer_bytes_for_contents('parent contents')
image_bytes = layer_bytes_for_contents('some contents')
return [
Image(id='parentid', bytes=parent_bytes, parent_id=None),
Image(id='someid', bytes=image_bytes, parent_id='parentid',
config={'comment': u'😱',
'author': u'Sômé guy'}),
]
@pytest.fixture(scope="session")
def jwk():
return RSAKey(key=RSA.generate(2048))

View file

@ -168,9 +168,9 @@ class V2Protocol(RegistryProtocol):
return None
# Parse the returned manifest list and ensure it matches.
assert response.headers['Content-Type'] == DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE
retrieved = parse_manifest_from_bytes(Bytes.for_string_or_unicode(response.text),
response.headers['Content-Type'])
ct, _ = response.headers['Content-Type'].split(';', 1)
assert ct == DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE
retrieved = parse_manifest_from_bytes(Bytes.for_string_or_unicode(response.text), ct)
assert retrieved.schema_version == 2
assert retrieved.is_manifest_list
assert retrieved.digest == manifestlist.digest
@ -185,9 +185,8 @@ class V2Protocol(RegistryProtocol):
headers=headers)
if expected_failure is not None:
return None
manifest = parse_manifest_from_bytes(Bytes.for_string_or_unicode(response.text),
response.headers['Content-Type'])
ct, _ = response.headers['Content-Type'].split(';', 1)
manifest = parse_manifest_from_bytes(Bytes.for_string_or_unicode(response.text), ct)
assert not manifest.is_manifest_list
assert manifest.digest == manifest_digest
@ -546,11 +545,11 @@ class V2Protocol(RegistryProtocol):
return None
# Ensure the manifest returned by us is valid.
ct, _ = response.headers['Content-Type'].split(';', 1)
if not self.schema2:
assert response.headers['Content-Type'] in DOCKER_SCHEMA1_CONTENT_TYPES
assert ct in DOCKER_SCHEMA1_CONTENT_TYPES
manifest = parse_manifest_from_bytes(Bytes.for_string_or_unicode(response.text),
response.headers['Content-Type'])
manifest = parse_manifest_from_bytes(Bytes.for_string_or_unicode(response.text), ct)
manifests[tag_name] = manifest
if manifest.schema_version == 1:

View file

@ -1822,3 +1822,35 @@ def test_push_legacy_pull_not_allowed(v22_protocol, v1_protocol, remote_images,
# Attempt to pull. Should fail with a 404.
v1_protocol.pull(liveserver_session, 'devtable', 'newrepo', 'latest', remote_images,
credentials=credentials, expected_failure=Failures.UNKNOWN_TAG)
def test_push_pull_emoji_unicode(pusher, puller, unicode_emoji_images, liveserver_session,
app_reloader):
""" Test: Push an image with unicode inside and then pull it. """
credentials = ('devtable', 'password')
# Push a new repository.
pusher.push(liveserver_session, 'devtable', 'newrepo', 'latest', unicode_emoji_images,
credentials=credentials)
# Pull the repository to verify.
puller.pull(liveserver_session, 'devtable', 'newrepo', 'latest', unicode_emoji_images,
credentials=credentials)
def test_push_pull_emoji_unicode_direct(pusher, puller, unicode_emoji_images, liveserver_session,
app_reloader):
""" Test: Push an image with *unescaped* unicode inside and then pull it. """
credentials = ('devtable', 'password')
# Turn off automatic unicode encoding when building the manifests.
options = ProtocolOptions()
options.ensure_ascii = False
# Push a new repository.
pusher.push(liveserver_session, 'devtable', 'newrepo', 'latest', unicode_emoji_images,
credentials=credentials, options=options)
# Pull the repository to verify.
puller.pull(liveserver_session, 'devtable', 'newrepo', 'latest', unicode_emoji_images,
credentials=credentials, options=options)