Fix rendering <a> without href when scheme unsupported (#13040)

- Disallow links with relative paths
- Disallow iframes with non-http protocols and relative paths

Close #13037
This commit is contained in:
Eugen Rochko 2020-02-08 21:22:38 +01:00 committed by GitHub
parent b686e275e7
commit b1349342d2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 55 additions and 6 deletions

View file

@ -2,7 +2,23 @@
class Sanitize class Sanitize
module Config module Config
HTTP_PROTOCOLS ||= ['http', 'https', 'dat', 'dweb', 'ipfs', 'ipns', 'ssb', 'gopher', 'xmpp', 'magnet', :relative].freeze HTTP_PROTOCOLS = %w(
http
https
).freeze
LINK_PROTOCOLS = %w(
http
https
dat
dweb
ipfs
ipns
ssb
gopher
xmpp
magnet
).freeze
CLASS_WHITELIST_TRANSFORMER = lambda do |env| CLASS_WHITELIST_TRANSFORMER = lambda do |env|
node = env[:node] node = env[:node]
@ -19,19 +35,37 @@ class Sanitize
node['class'] = class_list.join(' ') node['class'] = class_list.join(' ')
end end
UNSUPPORTED_HREF_TRANSFORMER = lambda do |env|
return unless env[:node_name] == 'a'
current_node = env[:node]
scheme = begin
if current_node['href'] =~ Sanitize::REGEX_PROTOCOL
Regexp.last_match(1).downcase
else
:relative
end
end
current_node.replace(current_node.text) unless LINK_PROTOCOLS.include?(scheme)
end
UNSUPPORTED_ELEMENTS_TRANSFORMER = lambda do |env| UNSUPPORTED_ELEMENTS_TRANSFORMER = lambda do |env|
return unless %w(h1 h2 h3 h4 h5 h6 blockquote pre ul ol li).include?(env[:node_name]) return unless %w(h1 h2 h3 h4 h5 h6 blockquote pre ul ol li).include?(env[:node_name])
current_node = env[:node]
case env[:node_name] case env[:node_name]
when 'li' when 'li'
env[:node].traverse do |node| current_node.traverse do |node|
next unless %w(p ul ol li).include?(node.name) next unless %w(p ul ol li).include?(node.name)
node.add_next_sibling('<br>') if node.next_sibling node.add_next_sibling('<br>') if node.next_sibling
node.replace(node.children) unless node.text? node.replace(node.children) unless node.text?
end end
else else
env[:node].name = 'p' current_node.name = 'p'
end end
end end
@ -50,13 +84,12 @@ class Sanitize
}, },
}, },
protocols: { protocols: {},
'a' => { 'href' => HTTP_PROTOCOLS },
},
transformers: [ transformers: [
CLASS_WHITELIST_TRANSFORMER, CLASS_WHITELIST_TRANSFORMER,
UNSUPPORTED_ELEMENTS_TRANSFORMER, UNSUPPORTED_ELEMENTS_TRANSFORMER,
UNSUPPORTED_HREF_TRANSFORMER,
] ]
) )

View file

@ -26,5 +26,21 @@ describe Sanitize::Config do
it 'keep links in lists' do it 'keep links in lists' do
expect(Sanitize.fragment('<p>Check out:</p><ul><li><a href="https://joinmastodon.org" rel="nofollow noopener noreferrer" target="_blank">joinmastodon.org</a></li><li>Bar</li></ul>', subject)).to eq '<p>Check out:</p><p><a href="https://joinmastodon.org" rel="nofollow noopener noreferrer" target="_blank">joinmastodon.org</a><br>Bar</p>' expect(Sanitize.fragment('<p>Check out:</p><ul><li><a href="https://joinmastodon.org" rel="nofollow noopener noreferrer" target="_blank">joinmastodon.org</a></li><li>Bar</li></ul>', subject)).to eq '<p>Check out:</p><p><a href="https://joinmastodon.org" rel="nofollow noopener noreferrer" target="_blank">joinmastodon.org</a><br>Bar</p>'
end end
it 'removes a without href' do
expect(Sanitize.fragment('<a>Test</a>', subject)).to eq 'Test'
end
it 'removes a without href and only keeps text content' do
expect(Sanitize.fragment('<a><span class="invisible">foo&amp;</span><span>Test</span></a>', subject)).to eq 'foo&amp;Test'
end
it 'removes a with unsupported scheme in href' do
expect(Sanitize.fragment('<a href="foo://bar">Test</a>', subject)).to eq 'Test'
end
it 'keeps a with href' do
expect(Sanitize.fragment('<a href="http://example.com">Test</a>', subject)).to eq '<a href="http://example.com" rel="nofollow noopener noreferrer" target="_blank">Test</a>'
end
end end
end end