Fix handling of four byte utf8 manifests
- Adds the charset: utf-8 to all the manifest responses - Makes sure we connect to MySQL in utf8mb4 mode, to ensure we can properly read and write 4-byte utf8 strings - Adds tests for all of the above
This commit is contained in:
parent
62609fce3e
commit
eb9ca8e8a8
8 changed files with 120 additions and 11 deletions
|
@ -70,6 +70,12 @@ SCHEME_RANDOM_FUNCTION = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
_EXTRA_ARGS = {
|
||||||
|
'mysql': dict(charset='utf8mb4'),
|
||||||
|
'mysql+pymysql': dict(charset='utf8mb4'),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def pipes_concat(arg1, arg2, *extra_args):
|
def pipes_concat(arg1, arg2, *extra_args):
|
||||||
""" Concat function for sqlite, since it doesn't support fn.Concat.
|
""" Concat function for sqlite, since it doesn't support fn.Concat.
|
||||||
Concatenates clauses with || characters.
|
Concatenates clauses with || characters.
|
||||||
|
@ -315,6 +321,10 @@ def _db_from_url(url, db_kwargs, connect_timeout=DEFAULT_DB_CONNECT_TIMEOUT,
|
||||||
db_kwargs.pop('stale_timeout', None)
|
db_kwargs.pop('stale_timeout', None)
|
||||||
db_kwargs.pop('max_connections', None)
|
db_kwargs.pop('max_connections', None)
|
||||||
|
|
||||||
|
for key, value in _EXTRA_ARGS.get(parsed_url.drivername, {}).iteritems():
|
||||||
|
if key not in db_kwargs:
|
||||||
|
db_kwargs[key] = value
|
||||||
|
|
||||||
if allow_retry:
|
if allow_retry:
|
||||||
driver = _wrap_for_retry(driver)
|
driver = _wrap_for_retry(driver)
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import json
|
import json
|
||||||
import uuid
|
import uuid
|
||||||
|
@ -864,3 +866,33 @@ def test_known_issue_schema1(registry_model):
|
||||||
assert found.digest == digest
|
assert found.digest == digest
|
||||||
assert found.internal_manifest_bytes.as_encoded_str() == manifest.bytes.as_encoded_str()
|
assert found.internal_manifest_bytes.as_encoded_str() == manifest.bytes.as_encoded_str()
|
||||||
assert found.get_parsed_manifest().digest == digest
|
assert found.get_parsed_manifest().digest == digest
|
||||||
|
|
||||||
|
|
||||||
|
def test_unicode_emoji(registry_model):
|
||||||
|
builder = DockerSchema1ManifestBuilder('devtable', 'simple', 'latest')
|
||||||
|
builder.add_layer('sha256:abcde', json.dumps({
|
||||||
|
'id': 'someid',
|
||||||
|
'author': u'😱',
|
||||||
|
}, ensure_ascii=False))
|
||||||
|
|
||||||
|
manifest = builder.build(ensure_ascii=False)
|
||||||
|
manifest._validate()
|
||||||
|
|
||||||
|
for blob_digest in manifest.local_blob_digests:
|
||||||
|
_populate_blob(blob_digest)
|
||||||
|
|
||||||
|
# Create the manifest in the database.
|
||||||
|
repository_ref = registry_model.lookup_repository('devtable', 'simple')
|
||||||
|
created_manifest, _ = registry_model.create_manifest_and_retarget_tag(repository_ref, manifest,
|
||||||
|
'latest', storage)
|
||||||
|
assert created_manifest
|
||||||
|
assert created_manifest.digest == manifest.digest
|
||||||
|
assert (created_manifest.internal_manifest_bytes.as_encoded_str() ==
|
||||||
|
manifest.bytes.as_encoded_str())
|
||||||
|
|
||||||
|
# Look it up again and validate.
|
||||||
|
found = registry_model.lookup_manifest_by_digest(repository_ref, manifest.digest, allow_dead=True)
|
||||||
|
assert found
|
||||||
|
assert found.digest == manifest.digest
|
||||||
|
assert found.internal_manifest_bytes.as_encoded_str() == manifest.bytes.as_encoded_str()
|
||||||
|
assert found.get_parsed_manifest().digest == manifest.digest
|
||||||
|
|
|
@ -76,7 +76,7 @@ def fetch_manifest_by_tagname(namespace_name, repo_name, manifest_ref):
|
||||||
supported.bytes.as_unicode(),
|
supported.bytes.as_unicode(),
|
||||||
status=200,
|
status=200,
|
||||||
headers={
|
headers={
|
||||||
'Content-Type': supported.media_type,
|
'Content-Type': '%s; charset=utf-8' % supported.media_type,
|
||||||
'Docker-Content-Digest': supported.digest,
|
'Docker-Content-Digest': supported.digest,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
@ -111,7 +111,7 @@ def fetch_manifest_by_digest(namespace_name, repo_name, manifest_ref):
|
||||||
metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'v2', True])
|
metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'v2', True])
|
||||||
|
|
||||||
return Response(supported.bytes.as_unicode(), status=200, headers={
|
return Response(supported.bytes.as_unicode(), status=200, headers={
|
||||||
'Content-Type': supported.media_type,
|
'Content-Type': '%s; charset=utf-8' % supported.media_type,
|
||||||
'Docker-Content-Digest': supported.digest,
|
'Docker-Content-Digest': supported.digest,
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
|
@ -178,3 +178,22 @@ def test_validate_manifest_known_issue():
|
||||||
|
|
||||||
layers = list(manifest.get_layers(None))
|
layers = list(manifest.get_layers(None))
|
||||||
assert layers[-1].author is None
|
assert layers[-1].author is None
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('with_key', [
|
||||||
|
None,
|
||||||
|
docker_v2_signing_key,
|
||||||
|
])
|
||||||
|
def test_validate_manifest_with_emoji(with_key):
|
||||||
|
builder = DockerSchema1ManifestBuilder('somenamespace', 'somerepo', 'sometag')
|
||||||
|
builder.add_layer('sha256:abcde', json.dumps({
|
||||||
|
'id': 'someid',
|
||||||
|
'author': u'😱',
|
||||||
|
}, ensure_ascii=False))
|
||||||
|
|
||||||
|
built = builder.build(with_key, ensure_ascii=False)
|
||||||
|
built._validate()
|
||||||
|
|
||||||
|
# Ensure the manifest can be reloaded.
|
||||||
|
built_bytes = built.bytes.as_encoded_str()
|
||||||
|
DockerSchema1Manifest(Bytes.for_string_or_unicode(built_bytes))
|
||||||
|
|
|
@ -83,6 +83,7 @@ def _init_db_path_sqlite(tmpdir_factory):
|
||||||
initialize_database()
|
initialize_database()
|
||||||
|
|
||||||
db.obj.execute_sql('PRAGMA foreign_keys = ON;')
|
db.obj.execute_sql('PRAGMA foreign_keys = ON;')
|
||||||
|
db.obj.execute_sql('PRAGMA encoding="UTF-8";')
|
||||||
|
|
||||||
populate_database()
|
populate_database()
|
||||||
close_db_filter(None)
|
close_db_filter(None)
|
||||||
|
@ -157,7 +158,9 @@ def initialized_db(appconfig):
|
||||||
if not under_test_real_database:
|
if not under_test_real_database:
|
||||||
# Make absolutely sure foreign key constraints are on.
|
# Make absolutely sure foreign key constraints are on.
|
||||||
db.obj.execute_sql('PRAGMA foreign_keys = ON;')
|
db.obj.execute_sql('PRAGMA foreign_keys = ON;')
|
||||||
|
db.obj.execute_sql('PRAGMA encoding="UTF-8";')
|
||||||
assert db.obj.execute_sql('PRAGMA foreign_keys;').fetchone()[0] == 1
|
assert db.obj.execute_sql('PRAGMA foreign_keys;').fetchone()[0] == 1
|
||||||
|
assert db.obj.execute_sql('PRAGMA encoding;').fetchone()[0] == 'UTF-8'
|
||||||
|
|
||||||
# If under a test *real* database, setup a savepoint.
|
# If under a test *real* database, setup a savepoint.
|
||||||
if under_test_real_database:
|
if under_test_real_database:
|
||||||
|
|
|
@ -138,6 +138,20 @@ def images_with_empty_layer():
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def unicode_emoji_images():
|
||||||
|
""" Returns basic images for push and pull testing that contain unicode in the image metadata. """
|
||||||
|
# Note: order is from base layer down to leaf.
|
||||||
|
parent_bytes = layer_bytes_for_contents('parent contents')
|
||||||
|
image_bytes = layer_bytes_for_contents('some contents')
|
||||||
|
return [
|
||||||
|
Image(id='parentid', bytes=parent_bytes, parent_id=None),
|
||||||
|
Image(id='someid', bytes=image_bytes, parent_id='parentid',
|
||||||
|
config={'comment': u'😱',
|
||||||
|
'author': u'Sômé guy'}),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
@pytest.fixture(scope="session")
|
||||||
def jwk():
|
def jwk():
|
||||||
return RSAKey(key=RSA.generate(2048))
|
return RSAKey(key=RSA.generate(2048))
|
||||||
|
|
|
@ -168,9 +168,9 @@ class V2Protocol(RegistryProtocol):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Parse the returned manifest list and ensure it matches.
|
# Parse the returned manifest list and ensure it matches.
|
||||||
assert response.headers['Content-Type'] == DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE
|
ct, _ = response.headers['Content-Type'].split(';', 1)
|
||||||
retrieved = parse_manifest_from_bytes(Bytes.for_string_or_unicode(response.text),
|
assert ct == DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE
|
||||||
response.headers['Content-Type'])
|
retrieved = parse_manifest_from_bytes(Bytes.for_string_or_unicode(response.text), ct)
|
||||||
assert retrieved.schema_version == 2
|
assert retrieved.schema_version == 2
|
||||||
assert retrieved.is_manifest_list
|
assert retrieved.is_manifest_list
|
||||||
assert retrieved.digest == manifestlist.digest
|
assert retrieved.digest == manifestlist.digest
|
||||||
|
@ -185,9 +185,8 @@ class V2Protocol(RegistryProtocol):
|
||||||
headers=headers)
|
headers=headers)
|
||||||
if expected_failure is not None:
|
if expected_failure is not None:
|
||||||
return None
|
return None
|
||||||
|
ct, _ = response.headers['Content-Type'].split(';', 1)
|
||||||
manifest = parse_manifest_from_bytes(Bytes.for_string_or_unicode(response.text),
|
manifest = parse_manifest_from_bytes(Bytes.for_string_or_unicode(response.text), ct)
|
||||||
response.headers['Content-Type'])
|
|
||||||
assert not manifest.is_manifest_list
|
assert not manifest.is_manifest_list
|
||||||
assert manifest.digest == manifest_digest
|
assert manifest.digest == manifest_digest
|
||||||
|
|
||||||
|
@ -546,11 +545,11 @@ class V2Protocol(RegistryProtocol):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Ensure the manifest returned by us is valid.
|
# Ensure the manifest returned by us is valid.
|
||||||
|
ct, _ = response.headers['Content-Type'].split(';', 1)
|
||||||
if not self.schema2:
|
if not self.schema2:
|
||||||
assert response.headers['Content-Type'] in DOCKER_SCHEMA1_CONTENT_TYPES
|
assert ct in DOCKER_SCHEMA1_CONTENT_TYPES
|
||||||
|
|
||||||
manifest = parse_manifest_from_bytes(Bytes.for_string_or_unicode(response.text),
|
manifest = parse_manifest_from_bytes(Bytes.for_string_or_unicode(response.text), ct)
|
||||||
response.headers['Content-Type'])
|
|
||||||
manifests[tag_name] = manifest
|
manifests[tag_name] = manifest
|
||||||
|
|
||||||
if manifest.schema_version == 1:
|
if manifest.schema_version == 1:
|
||||||
|
|
|
@ -1822,3 +1822,35 @@ def test_push_legacy_pull_not_allowed(v22_protocol, v1_protocol, remote_images,
|
||||||
# Attempt to pull. Should fail with a 404.
|
# Attempt to pull. Should fail with a 404.
|
||||||
v1_protocol.pull(liveserver_session, 'devtable', 'newrepo', 'latest', remote_images,
|
v1_protocol.pull(liveserver_session, 'devtable', 'newrepo', 'latest', remote_images,
|
||||||
credentials=credentials, expected_failure=Failures.UNKNOWN_TAG)
|
credentials=credentials, expected_failure=Failures.UNKNOWN_TAG)
|
||||||
|
|
||||||
|
|
||||||
|
def test_push_pull_emoji_unicode(pusher, puller, unicode_emoji_images, liveserver_session,
|
||||||
|
app_reloader):
|
||||||
|
""" Test: Push an image with unicode inside and then pull it. """
|
||||||
|
credentials = ('devtable', 'password')
|
||||||
|
|
||||||
|
# Push a new repository.
|
||||||
|
pusher.push(liveserver_session, 'devtable', 'newrepo', 'latest', unicode_emoji_images,
|
||||||
|
credentials=credentials)
|
||||||
|
|
||||||
|
# Pull the repository to verify.
|
||||||
|
puller.pull(liveserver_session, 'devtable', 'newrepo', 'latest', unicode_emoji_images,
|
||||||
|
credentials=credentials)
|
||||||
|
|
||||||
|
|
||||||
|
def test_push_pull_emoji_unicode_direct(pusher, puller, unicode_emoji_images, liveserver_session,
|
||||||
|
app_reloader):
|
||||||
|
""" Test: Push an image with *unescaped* unicode inside and then pull it. """
|
||||||
|
credentials = ('devtable', 'password')
|
||||||
|
|
||||||
|
# Turn off automatic unicode encoding when building the manifests.
|
||||||
|
options = ProtocolOptions()
|
||||||
|
options.ensure_ascii = False
|
||||||
|
|
||||||
|
# Push a new repository.
|
||||||
|
pusher.push(liveserver_session, 'devtable', 'newrepo', 'latest', unicode_emoji_images,
|
||||||
|
credentials=credentials, options=options)
|
||||||
|
|
||||||
|
# Pull the repository to verify.
|
||||||
|
puller.pull(liveserver_session, 'devtable', 'newrepo', 'latest', unicode_emoji_images,
|
||||||
|
credentials=credentials, options=options)
|
||||||
|
|
Reference in a new issue