Merge pull request #3325 from quay/four-byte-utf8-fix
Fix handling of four byte utf8 manifests
This commit is contained in:
commit
396ce21020
8 changed files with 120 additions and 11 deletions
|
@ -70,6 +70,12 @@ SCHEME_RANDOM_FUNCTION = {
|
|||
}
|
||||
|
||||
|
||||
_EXTRA_ARGS = {
|
||||
'mysql': dict(charset='utf8mb4'),
|
||||
'mysql+pymysql': dict(charset='utf8mb4'),
|
||||
}
|
||||
|
||||
|
||||
def pipes_concat(arg1, arg2, *extra_args):
|
||||
""" Concat function for sqlite, since it doesn't support fn.Concat.
|
||||
Concatenates clauses with || characters.
|
||||
|
@ -315,6 +321,10 @@ def _db_from_url(url, db_kwargs, connect_timeout=DEFAULT_DB_CONNECT_TIMEOUT,
|
|||
db_kwargs.pop('stale_timeout', None)
|
||||
db_kwargs.pop('max_connections', None)
|
||||
|
||||
for key, value in _EXTRA_ARGS.get(parsed_url.drivername, {}).iteritems():
|
||||
if key not in db_kwargs:
|
||||
db_kwargs[key] = value
|
||||
|
||||
if allow_retry:
|
||||
driver = _wrap_for_retry(driver)
|
||||
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import uuid
|
||||
|
@ -864,3 +866,33 @@ def test_known_issue_schema1(registry_model):
|
|||
assert found.digest == digest
|
||||
assert found.internal_manifest_bytes.as_encoded_str() == manifest.bytes.as_encoded_str()
|
||||
assert found.get_parsed_manifest().digest == digest
|
||||
|
||||
|
||||
def test_unicode_emoji(registry_model):
|
||||
builder = DockerSchema1ManifestBuilder('devtable', 'simple', 'latest')
|
||||
builder.add_layer('sha256:abcde', json.dumps({
|
||||
'id': 'someid',
|
||||
'author': u'😱',
|
||||
}, ensure_ascii=False))
|
||||
|
||||
manifest = builder.build(ensure_ascii=False)
|
||||
manifest._validate()
|
||||
|
||||
for blob_digest in manifest.local_blob_digests:
|
||||
_populate_blob(blob_digest)
|
||||
|
||||
# Create the manifest in the database.
|
||||
repository_ref = registry_model.lookup_repository('devtable', 'simple')
|
||||
created_manifest, _ = registry_model.create_manifest_and_retarget_tag(repository_ref, manifest,
|
||||
'latest', storage)
|
||||
assert created_manifest
|
||||
assert created_manifest.digest == manifest.digest
|
||||
assert (created_manifest.internal_manifest_bytes.as_encoded_str() ==
|
||||
manifest.bytes.as_encoded_str())
|
||||
|
||||
# Look it up again and validate.
|
||||
found = registry_model.lookup_manifest_by_digest(repository_ref, manifest.digest, allow_dead=True)
|
||||
assert found
|
||||
assert found.digest == manifest.digest
|
||||
assert found.internal_manifest_bytes.as_encoded_str() == manifest.bytes.as_encoded_str()
|
||||
assert found.get_parsed_manifest().digest == manifest.digest
|
||||
|
|
|
@ -76,7 +76,7 @@ def fetch_manifest_by_tagname(namespace_name, repo_name, manifest_ref):
|
|||
supported.bytes.as_unicode(),
|
||||
status=200,
|
||||
headers={
|
||||
'Content-Type': supported.media_type,
|
||||
'Content-Type': '%s; charset=utf-8' % supported.media_type,
|
||||
'Docker-Content-Digest': supported.digest,
|
||||
},
|
||||
)
|
||||
|
@ -111,7 +111,7 @@ def fetch_manifest_by_digest(namespace_name, repo_name, manifest_ref):
|
|||
metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'v2', True])
|
||||
|
||||
return Response(supported.bytes.as_unicode(), status=200, headers={
|
||||
'Content-Type': supported.media_type,
|
||||
'Content-Type': '%s; charset=utf-8' % supported.media_type,
|
||||
'Docker-Content-Digest': supported.digest,
|
||||
})
|
||||
|
||||
|
|
|
@ -178,3 +178,22 @@ def test_validate_manifest_known_issue():
|
|||
|
||||
layers = list(manifest.get_layers(None))
|
||||
assert layers[-1].author is None
|
||||
|
||||
|
||||
@pytest.mark.parametrize('with_key', [
|
||||
None,
|
||||
docker_v2_signing_key,
|
||||
])
|
||||
def test_validate_manifest_with_emoji(with_key):
|
||||
builder = DockerSchema1ManifestBuilder('somenamespace', 'somerepo', 'sometag')
|
||||
builder.add_layer('sha256:abcde', json.dumps({
|
||||
'id': 'someid',
|
||||
'author': u'😱',
|
||||
}, ensure_ascii=False))
|
||||
|
||||
built = builder.build(with_key, ensure_ascii=False)
|
||||
built._validate()
|
||||
|
||||
# Ensure the manifest can be reloaded.
|
||||
built_bytes = built.bytes.as_encoded_str()
|
||||
DockerSchema1Manifest(Bytes.for_string_or_unicode(built_bytes))
|
||||
|
|
|
@ -83,6 +83,7 @@ def _init_db_path_sqlite(tmpdir_factory):
|
|||
initialize_database()
|
||||
|
||||
db.obj.execute_sql('PRAGMA foreign_keys = ON;')
|
||||
db.obj.execute_sql('PRAGMA encoding="UTF-8";')
|
||||
|
||||
populate_database()
|
||||
close_db_filter(None)
|
||||
|
@ -157,7 +158,9 @@ def initialized_db(appconfig):
|
|||
if not under_test_real_database:
|
||||
# Make absolutely sure foreign key constraints are on.
|
||||
db.obj.execute_sql('PRAGMA foreign_keys = ON;')
|
||||
db.obj.execute_sql('PRAGMA encoding="UTF-8";')
|
||||
assert db.obj.execute_sql('PRAGMA foreign_keys;').fetchone()[0] == 1
|
||||
assert db.obj.execute_sql('PRAGMA encoding;').fetchone()[0] == 'UTF-8'
|
||||
|
||||
# If under a test *real* database, setup a savepoint.
|
||||
if under_test_real_database:
|
||||
|
|
|
@ -138,6 +138,20 @@ def images_with_empty_layer():
|
|||
]
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def unicode_emoji_images():
|
||||
""" Returns basic images for push and pull testing that contain unicode in the image metadata. """
|
||||
# Note: order is from base layer down to leaf.
|
||||
parent_bytes = layer_bytes_for_contents('parent contents')
|
||||
image_bytes = layer_bytes_for_contents('some contents')
|
||||
return [
|
||||
Image(id='parentid', bytes=parent_bytes, parent_id=None),
|
||||
Image(id='someid', bytes=image_bytes, parent_id='parentid',
|
||||
config={'comment': u'😱',
|
||||
'author': u'Sômé guy'}),
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def jwk():
|
||||
return RSAKey(key=RSA.generate(2048))
|
||||
|
|
|
@ -168,9 +168,9 @@ class V2Protocol(RegistryProtocol):
|
|||
return None
|
||||
|
||||
# Parse the returned manifest list and ensure it matches.
|
||||
assert response.headers['Content-Type'] == DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE
|
||||
retrieved = parse_manifest_from_bytes(Bytes.for_string_or_unicode(response.text),
|
||||
response.headers['Content-Type'])
|
||||
ct, _ = response.headers['Content-Type'].split(';', 1)
|
||||
assert ct == DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE
|
||||
retrieved = parse_manifest_from_bytes(Bytes.for_string_or_unicode(response.text), ct)
|
||||
assert retrieved.schema_version == 2
|
||||
assert retrieved.is_manifest_list
|
||||
assert retrieved.digest == manifestlist.digest
|
||||
|
@ -185,9 +185,8 @@ class V2Protocol(RegistryProtocol):
|
|||
headers=headers)
|
||||
if expected_failure is not None:
|
||||
return None
|
||||
|
||||
manifest = parse_manifest_from_bytes(Bytes.for_string_or_unicode(response.text),
|
||||
response.headers['Content-Type'])
|
||||
ct, _ = response.headers['Content-Type'].split(';', 1)
|
||||
manifest = parse_manifest_from_bytes(Bytes.for_string_or_unicode(response.text), ct)
|
||||
assert not manifest.is_manifest_list
|
||||
assert manifest.digest == manifest_digest
|
||||
|
||||
|
@ -546,11 +545,11 @@ class V2Protocol(RegistryProtocol):
|
|||
return None
|
||||
|
||||
# Ensure the manifest returned by us is valid.
|
||||
ct, _ = response.headers['Content-Type'].split(';', 1)
|
||||
if not self.schema2:
|
||||
assert response.headers['Content-Type'] in DOCKER_SCHEMA1_CONTENT_TYPES
|
||||
assert ct in DOCKER_SCHEMA1_CONTENT_TYPES
|
||||
|
||||
manifest = parse_manifest_from_bytes(Bytes.for_string_or_unicode(response.text),
|
||||
response.headers['Content-Type'])
|
||||
manifest = parse_manifest_from_bytes(Bytes.for_string_or_unicode(response.text), ct)
|
||||
manifests[tag_name] = manifest
|
||||
|
||||
if manifest.schema_version == 1:
|
||||
|
|
|
@ -1822,3 +1822,35 @@ def test_push_legacy_pull_not_allowed(v22_protocol, v1_protocol, remote_images,
|
|||
# Attempt to pull. Should fail with a 404.
|
||||
v1_protocol.pull(liveserver_session, 'devtable', 'newrepo', 'latest', remote_images,
|
||||
credentials=credentials, expected_failure=Failures.UNKNOWN_TAG)
|
||||
|
||||
|
||||
def test_push_pull_emoji_unicode(pusher, puller, unicode_emoji_images, liveserver_session,
|
||||
app_reloader):
|
||||
""" Test: Push an image with unicode inside and then pull it. """
|
||||
credentials = ('devtable', 'password')
|
||||
|
||||
# Push a new repository.
|
||||
pusher.push(liveserver_session, 'devtable', 'newrepo', 'latest', unicode_emoji_images,
|
||||
credentials=credentials)
|
||||
|
||||
# Pull the repository to verify.
|
||||
puller.pull(liveserver_session, 'devtable', 'newrepo', 'latest', unicode_emoji_images,
|
||||
credentials=credentials)
|
||||
|
||||
|
||||
def test_push_pull_emoji_unicode_direct(pusher, puller, unicode_emoji_images, liveserver_session,
|
||||
app_reloader):
|
||||
""" Test: Push an image with *unescaped* unicode inside and then pull it. """
|
||||
credentials = ('devtable', 'password')
|
||||
|
||||
# Turn off automatic unicode encoding when building the manifests.
|
||||
options = ProtocolOptions()
|
||||
options.ensure_ascii = False
|
||||
|
||||
# Push a new repository.
|
||||
pusher.push(liveserver_session, 'devtable', 'newrepo', 'latest', unicode_emoji_images,
|
||||
credentials=credentials, options=options)
|
||||
|
||||
# Pull the repository to verify.
|
||||
puller.pull(liveserver_session, 'devtable', 'newrepo', 'latest', unicode_emoji_images,
|
||||
credentials=credentials, options=options)
|
||||
|
|
Reference in a new issue