Refactor formatter (#17828)

* Refactor formatter

* Move custom emoji pre-rendering logic to view helpers

* Move more methods out of Formatter

* Fix code style issues

* Remove Formatter

* Add inline poll options to RSS feeds

* Remove unused helper method

* Fix code style issues

* Various fixes and improvements

* Fix test
This commit is contained in:
Eugen Rochko 2022-03-26 02:53:34 +01:00 committed by GitHub
parent 2dd30804b6
commit cefa526c6d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
44 changed files with 932 additions and 1024 deletions

View file

@ -57,7 +57,7 @@ class StatusesIndex < Chewy::Index
field :id, type: 'long'
field :account_id, type: 'long'
field :text, type: 'text', value: ->(status) { [status.spoiler_text, Formatter.instance.plaintext(status)].concat(status.ordered_media_attachments.map(&:description)).concat(status.preloadable_poll ? status.preloadable_poll.options : []).join("\n\n") } do
field :text, type: 'text', value: ->(status) { [status.spoiler_text, PlainTextFormatter.new(status.text, status.local?).to_s].concat(status.ordered_media_attachments.map(&:description)).concat(status.preloadable_poll ? status.preloadable_poll.options : []).join("\n\n") } do
field :stemmed, type: 'text', analyzer: 'content'
end

View file

@ -15,7 +15,7 @@ class Api::Web::EmbedsController < Api::Web::BaseController
return not_found if oembed.nil?
begin
oembed[:html] = Formatter.instance.sanitize(oembed[:html], Sanitize::Config::MASTODON_OEMBED)
oembed[:html] = Sanitize.fragment(oembed[:html], Sanitize::Config::MASTODON_OEMBED)
rescue ArgumentError
return not_found
end

View file

@ -2,10 +2,12 @@
module AccountsHelper
def display_name(account, **options)
str = account.display_name.presence || account.username
if options[:custom_emojify]
Formatter.instance.format_display_name(account, **options)
prerender_custom_emojis(h(str), account.emojis)
else
account.display_name.presence || account.username
str
end
end

View file

@ -12,9 +12,6 @@ module Admin::Trends::StatusesHelper
return '' if text.blank?
html = Formatter.instance.send(:encode, text)
html = Formatter.instance.send(:encode_custom_emojis, html, status.emojis, prefers_autoplay?)
html.html_safe # rubocop:disable Rails/OutputSafety
prerender_custom_emojis(h(text), status.emojis)
end
end

View file

@ -239,4 +239,8 @@ module ApplicationHelper
end
end.values
end
def prerender_custom_emojis(html, custom_emojis)
EmojiFormatter.new(html, custom_emojis, animate: prefers_autoplay?).to_s
end
end

View file

@ -0,0 +1,19 @@
# frozen_string_literal: true
module FormattingHelper
def html_aware_format(text, local, options = {})
HtmlAwareFormatter.new(text, local, options).to_s
end
def linkify(text, options = {})
TextFormatter.new(text, options).to_s
end
def extract_plain_text(text, local)
PlainTextFormatter.new(text, local).to_s
end
def status_content_format(status)
html_aware_format(status.text, status.local?, preloaded_accounts: [status.account] + (status.respond_to?(:active_mentions) ? status.active_mentions.map(&:account) : []))
end
end

View file

@ -2,6 +2,7 @@
module RoutingHelper
extend ActiveSupport::Concern
include Rails.application.routes.url_helpers
include ActionView::Helpers::AssetTagHelper
include Webpacker::Helper
@ -22,8 +23,6 @@ module RoutingHelper
full_asset_url(asset_pack_path(source, **options))
end
private
def use_storage?
Rails.configuration.x.use_s3 || Rails.configuration.x.use_swift
end

View file

@ -113,20 +113,6 @@ module StatusesHelper
end
end
private
def simplified_text(text)
text.dup.tap do |new_text|
URI.extract(new_text).each do |url|
new_text.gsub!(url, '')
end
new_text.gsub!(Account::MENTION_RE, '')
new_text.gsub!(Tag::HASHTAG_RE, '')
new_text.gsub!(/\s+/, '')
end
end
def embedded_view?
params[:controller] == EMBEDDED_CONTROLLER && params[:action] == EMBEDDED_ACTION
end

View file

@ -1,6 +1,8 @@
# frozen_string_literal: true
class ActivityPub::Activity::Create < ActivityPub::Activity
include FormattingHelper
def perform
dereference_object!
@ -367,7 +369,7 @@ class ActivityPub::Activity::Create < ActivityPub::Activity
end
def converted_text
Formatter.instance.linkify([@status_parser.title.presence, @status_parser.spoiler_text.presence, @status_parser.url || @status_parser.uri].compact.join("\n\n"))
linkify([@status_parser.title.presence, @status_parser.spoiler_text.presence, @status_parser.url || @status_parser.uri].compact.join("\n\n"))
end
def unsupported_media_type?(mime_type)

View file

@ -0,0 +1,98 @@
# frozen_string_literal: true
class EmojiFormatter
include RoutingHelper
DISALLOWED_BOUNDING_REGEX = /[[:alnum:]:]/.freeze
attr_reader :html, :custom_emojis, :options
# @param [ActiveSupport::SafeBuffer] html
# @param [Array<CustomEmoji>] custom_emojis
# @param [Hash] options
# @option options [Boolean] :animate
def initialize(html, custom_emojis, options = {})
raise ArgumentError unless html.html_safe?
@html = html
@custom_emojis = custom_emojis
@options = options
end
def to_s
return html if custom_emojis.empty? || html.blank?
i = -1
tag_open_index = nil
inside_shortname = false
shortname_start_index = -1
invisible_depth = 0
last_index = 0
result = ''.dup
while i + 1 < html.size
i += 1
if invisible_depth.zero? && inside_shortname && html[i] == ':'
inside_shortname = false
shortcode = html[shortname_start_index + 1..i - 1]
char_after = html[i + 1]
next unless (char_after.nil? || !DISALLOWED_BOUNDING_REGEX.match?(char_after)) && (emoji = emoji_map[shortcode])
result << html[last_index..shortname_start_index - 1] if shortname_start_index.positive?
result << image_for_emoji(shortcode, emoji)
last_index = i + 1
elsif tag_open_index && html[i] == '>'
tag = html[tag_open_index..i]
tag_open_index = nil
if invisible_depth.positive?
invisible_depth += count_tag_nesting(tag)
elsif tag == '<span class="invisible">'
invisible_depth = 1
end
elsif html[i] == '<'
tag_open_index = i
inside_shortname = false
elsif !tag_open_index && html[i] == ':' && (i.zero? || !DISALLOWED_BOUNDING_REGEX.match?(html[i - 1]))
inside_shortname = true
shortname_start_index = i
end
end
result << html[last_index..-1]
result.html_safe # rubocop:disable Rails/OutputSafety
end
private
def emoji_map
@emoji_map ||= custom_emojis.each_with_object({}) { |e, h| h[e.shortcode] = [full_asset_url(e.image.url), full_asset_url(e.image.url(:static))] }
end
def count_tag_nesting(tag)
if tag[1] == '/'
-1
elsif tag[-2] == '/'
0
else
1
end
end
def image_for_emoji(shortcode, emoji)
original_url, static_url = emoji
if animate?
image_tag(original_url, draggable: false, class: 'emojione', alt: ":#{shortcode}:", title: ":#{shortcode}:")
else
image_tag(original_url, draggable: false, class: 'emojione custom-emoji', alt: ":#{shortcode}:", title: ":#{shortcode}:", data: { original: original_url, static: static_url })
end
end
def animate?
@options[:animate]
end
end

View file

@ -5,18 +5,34 @@ module Extractor
module_function
# :yields: username, list_slug, start, end
def extract_entities_with_indices(text, options = {}, &block)
entities = begin
extract_urls_with_indices(text, options) +
extract_hashtags_with_indices(text, check_url_overlap: false) +
extract_mentions_or_lists_with_indices(text) +
extract_extra_uris_with_indices(text)
end
return [] if entities.empty?
entities = remove_overlapping_entities(entities)
entities.each(&block) if block_given?
entities
end
def extract_mentions_or_lists_with_indices(text)
return [] unless Twitter::TwitterText::Regex[:at_signs].match?(text)
return [] unless text && Twitter::TwitterText::Regex[:at_signs].match?(text)
possible_entries = []
text.to_s.scan(Account::MENTION_RE) do |screen_name, _|
text.scan(Account::MENTION_RE) do |screen_name, _|
match_data = $LAST_MATCH_INFO
after = $'
after = $'
unless Twitter::TwitterText::Regex[:end_mention_match].match?(after)
start_position = match_data.char_begin(1) - 1
end_position = match_data.char_end(1)
end_position = match_data.char_end(1)
possible_entries << {
screen_name: screen_name,
indices: [start_position, end_position],
@ -29,36 +45,70 @@ module Extractor
yield mention[:screen_name], mention[:indices].first, mention[:indices].last
end
end
possible_entries
end
def extract_hashtags_with_indices(text, **)
return [] unless /#/.match?(text)
def extract_hashtags_with_indices(text, _options = {})
return [] unless text&.index('#')
possible_entries = []
tags = []
text.scan(Tag::HASHTAG_RE) do |hash_text, _|
match_data = $LAST_MATCH_INFO
match_data = $LAST_MATCH_INFO
start_position = match_data.char_begin(1) - 1
end_position = match_data.char_end(1)
after = $'
end_position = match_data.char_end(1)
after = $'
if %r{\A://}.match?(after)
hash_text.match(/(.+)(https?\Z)/) do |matched|
hash_text = matched[1]
hash_text = matched[1]
end_position -= matched[2].codepoint_length
end
end
tags << {
possible_entries << {
hashtag: hash_text,
indices: [start_position, end_position],
}
end
tags.each { |tag| yield tag[:hashtag], tag[:indices].first, tag[:indices].last } if block_given?
tags
if block_given?
possible_entries.each do |tag|
yield tag[:hashtag], tag[:indices].first, tag[:indices].last
end
end
possible_entries
end
def extract_cashtags_with_indices(_text)
[] # always returns empty array
[]
end
def extract_extra_uris_with_indices(text)
return [] unless text&.index(':')
possible_entries = []
text.scan(Twitter::TwitterText::Regex[:valid_extended_uri]) do
valid_uri_match_data = $LAST_MATCH_INFO
start_position = valid_uri_match_data.char_begin(3)
end_position = valid_uri_match_data.char_end(3)
possible_entries << {
url: valid_uri_match_data[3],
indices: [start_position, end_position],
}
end
if block_given?
possible_entries.each do |url|
yield url[:url], url[:indices].first, url[:indices].last
end
end
possible_entries
end
end

View file

@ -5,6 +5,7 @@ require 'singleton'
class FeedManager
include Singleton
include Redisable
include FormattingHelper
# Maximum number of items stored in a single feed
MAX_ITEMS = 400
@ -445,7 +446,7 @@ class FeedManager
status = status.reblog if status.reblog?
combined_text = [
Formatter.instance.plaintext(status),
extract_plain_text(status.text, status.local?),
status.spoiler_text,
status.preloadable_poll ? status.preloadable_poll.options.join("\n\n") : nil,
status.ordered_media_attachments.map(&:description).join("\n\n"),

View file

@ -1,294 +0,0 @@
# frozen_string_literal: true
require 'singleton'
class Formatter
include Singleton
include RoutingHelper
include ActionView::Helpers::TextHelper
def format(status, **options)
if status.respond_to?(:reblog?) && status.reblog?
prepend_reblog = status.reblog.account.acct
status = status.proper
else
prepend_reblog = false
end
raw_content = status.text
if options[:inline_poll_options] && status.preloadable_poll
raw_content = raw_content + "\n\n" + status.preloadable_poll.options.map { |title| "[ ] #{title}" }.join("\n")
end
return '' if raw_content.blank?
unless status.local?
html = reformat(raw_content)
html = encode_custom_emojis(html, status.emojis, options[:autoplay]) if options[:custom_emojify]
return html.html_safe # rubocop:disable Rails/OutputSafety
end
linkable_accounts = status.respond_to?(:active_mentions) ? status.active_mentions.map(&:account) : []
linkable_accounts << status.account
html = raw_content
html = "RT @#{prepend_reblog} #{html}" if prepend_reblog
html = encode_and_link_urls(html, linkable_accounts)
html = encode_custom_emojis(html, status.emojis, options[:autoplay]) if options[:custom_emojify]
html = simple_format(html, {}, sanitize: false)
html = html.delete("\n")
html.html_safe # rubocop:disable Rails/OutputSafety
end
def reformat(html)
sanitize(html, Sanitize::Config::MASTODON_STRICT)
rescue ArgumentError
''
end
def plaintext(status)
return status.text if status.local?
text = status.text.gsub(/(<br \/>|<br>|<\/p>)+/) { |match| "#{match}\n" }
strip_tags(text)
end
def simplified_format(account, **options)
return '' if account.note.blank?
html = account.local? ? linkify(account.note) : reformat(account.note)
html = encode_custom_emojis(html, account.emojis, options[:autoplay]) if options[:custom_emojify]
html.html_safe # rubocop:disable Rails/OutputSafety
end
def sanitize(html, config)
Sanitize.fragment(html, config)
end
def format_spoiler(status, **options)
html = encode(status.spoiler_text)
html = encode_custom_emojis(html, status.emojis, options[:autoplay])
html.html_safe # rubocop:disable Rails/OutputSafety
end
def format_poll_option(status, option, **options)
html = encode(option.title)
html = encode_custom_emojis(html, status.emojis, options[:autoplay])
html.html_safe # rubocop:disable Rails/OutputSafety
end
def format_display_name(account, **options)
html = encode(account.display_name.presence || account.username)
html = encode_custom_emojis(html, account.emojis, options[:autoplay]) if options[:custom_emojify]
html.html_safe # rubocop:disable Rails/OutputSafety
end
def format_field(account, str, **options)
html = account.local? ? encode_and_link_urls(str, me: true, with_domain: true) : reformat(str)
html = encode_custom_emojis(html, account.emojis, options[:autoplay]) if options[:custom_emojify]
html.html_safe # rubocop:disable Rails/OutputSafety
end
def linkify(text)
html = encode_and_link_urls(text)
html = simple_format(html, {}, sanitize: false)
html = html.delete("\n")
html.html_safe # rubocop:disable Rails/OutputSafety
end
private
def html_entities
@html_entities ||= HTMLEntities.new
end
def encode(html)
html_entities.encode(html)
end
def encode_and_link_urls(html, accounts = nil, options = {})
entities = utf8_friendly_extractor(html, extract_url_without_protocol: false)
if accounts.is_a?(Hash)
options = accounts
accounts = nil
end
rewrite(html.dup, entities) do |entity|
if entity[:url]
link_to_url(entity, options)
elsif entity[:hashtag]
link_to_hashtag(entity)
elsif entity[:screen_name]
link_to_mention(entity, accounts, options)
end
end
end
def count_tag_nesting(tag)
if tag[1] == '/' then -1
elsif tag[-2] == '/' then 0
else 1
end
end
# rubocop:disable Metrics/BlockNesting
def encode_custom_emojis(html, emojis, animate = false)
return html if emojis.empty?
emoji_map = emojis.each_with_object({}) { |e, h| h[e.shortcode] = [full_asset_url(e.image.url), full_asset_url(e.image.url(:static))] }
i = -1
tag_open_index = nil
inside_shortname = false
shortname_start_index = -1
invisible_depth = 0
while i + 1 < html.size
i += 1
if invisible_depth.zero? && inside_shortname && html[i] == ':'
shortcode = html[shortname_start_index + 1..i - 1]
emoji = emoji_map[shortcode]
if emoji
original_url, static_url = emoji
replacement = begin
if animate
image_tag(original_url, draggable: false, class: 'emojione', alt: ":#{shortcode}:", title: ":#{shortcode}:")
else
image_tag(original_url, draggable: false, class: 'emojione custom-emoji', alt: ":#{shortcode}:", title: ":#{shortcode}:", data: { original: original_url, static: static_url })
end
end
before_html = shortname_start_index.positive? ? html[0..shortname_start_index - 1] : ''
html = before_html + replacement + html[i + 1..-1]
i += replacement.size - (shortcode.size + 2) - 1
else
i -= 1
end
inside_shortname = false
elsif tag_open_index && html[i] == '>'
tag = html[tag_open_index..i]
tag_open_index = nil
if invisible_depth.positive?
invisible_depth += count_tag_nesting(tag)
elsif tag == '<span class="invisible">'
invisible_depth = 1
end
elsif html[i] == '<'
tag_open_index = i
inside_shortname = false
elsif !tag_open_index && html[i] == ':'
inside_shortname = true
shortname_start_index = i
end
end
html
end
# rubocop:enable Metrics/BlockNesting
def rewrite(text, entities)
text = text.to_s
# Sort by start index
entities = entities.sort_by do |entity|
indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices]
indices.first
end
result = []
last_index = entities.reduce(0) do |index, entity|
indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices]
result << encode(text[index...indices.first])
result << yield(entity)
indices.last
end
result << encode(text[last_index..-1])
result.flatten.join
end
def utf8_friendly_extractor(text, options = {})
# Note: I couldn't obtain list_slug with @user/list-name format
# for mention so this requires additional check
special = Extractor.extract_urls_with_indices(text, options)
standard = Extractor.extract_entities_with_indices(text, options)
extra = Extractor.extract_extra_uris_with_indices(text, options)
Extractor.remove_overlapping_entities(special + standard + extra)
end
def link_to_url(entity, options = {})
url = Addressable::URI.parse(entity[:url])
html_attrs = { target: '_blank', rel: 'nofollow noopener noreferrer' }
html_attrs[:rel] = "me #{html_attrs[:rel]}" if options[:me]
Twitter::TwitterText::Autolink.send(:link_to_text, entity, link_html(entity[:url]), url, html_attrs)
rescue Addressable::URI::InvalidURIError, IDN::Idna::IdnaError
encode(entity[:url])
end
def link_to_mention(entity, linkable_accounts, options = {})
acct = entity[:screen_name]
return link_to_account(acct, options) unless linkable_accounts
same_username_hits = 0
account = nil
username, domain = acct.split('@')
domain = nil if TagManager.instance.local_domain?(domain)
linkable_accounts.each do |item|
same_username = item.username.casecmp(username).zero?
same_domain = item.domain.nil? ? domain.nil? : item.domain.casecmp(domain)&.zero?
if same_username && !same_domain
same_username_hits += 1
elsif same_username && same_domain
account = item
end
end
account ? mention_html(account, with_domain: same_username_hits.positive? || options[:with_domain]) : "@#{encode(acct)}"
end
def link_to_account(acct, options = {})
username, domain = acct.split('@')
domain = nil if TagManager.instance.local_domain?(domain)
account = EntityCache.instance.mention(username, domain)
account ? mention_html(account, with_domain: options[:with_domain]) : "@#{encode(acct)}"
end
def link_to_hashtag(entity)
hashtag_html(entity[:hashtag])
end
def link_html(url)
url = Addressable::URI.parse(url).to_s
prefix = url.match(/\A(https?:\/\/(www\.)?|xmpp:)/).to_s
text = url[prefix.length, 30]
suffix = url[prefix.length + 30..-1]
cutoff = url[prefix.length..-1].length > 30
"<span class=\"invisible\">#{encode(prefix)}</span><span class=\"#{cutoff ? 'ellipsis' : ''}\">#{encode(text)}</span><span class=\"invisible\">#{encode(suffix)}</span>"
end
def hashtag_html(tag)
"<a href=\"#{encode(tag_url(tag))}\" class=\"mention hashtag\" rel=\"tag\">#<span>#{encode(tag)}</span></a>"
end
def mention_html(account, with_domain: false)
"<span class=\"h-card\"><a href=\"#{encode(ActivityPub::TagManager.instance.url_for(account))}\" class=\"u-url mention\">@<span>#{encode(with_domain ? account.pretty_acct : account.username)}</span></a></span>"
end
end

View file

@ -0,0 +1,38 @@
# frozen_string_literal: true
class HtmlAwareFormatter
attr_reader :text, :local, :options
alias local? local
# @param [String] text
# @param [Boolean] local
# @param [Hash] options
def initialize(text, local, options = {})
@text = text
@local = local
@options = options
end
def to_s
return ''.html_safe if text.blank?
if local?
linkify
else
reformat.html_safe # rubocop:disable Rails/OutputSafety
end
rescue ArgumentError
''.html_safe
end
private
def reformat
Sanitize.fragment(text, Sanitize::Config::MASTODON_STRICT)
end
def linkify
TextFormatter.new(text, options).to_s
end
end

View file

@ -0,0 +1,30 @@
# frozen_string_literal: true
class PlainTextFormatter
include ActionView::Helpers::TextHelper
NEWLINE_TAGS_RE = /(<br \/>|<br>|<\/p>)+/.freeze
attr_reader :text, :local
alias local? local
def initialize(text, local)
@text = text
@local = local
end
def to_s
if local?
text
else
strip_tags(insert_newlines).chomp
end
end
private
def insert_newlines
text.gsub(NEWLINE_TAGS_RE) { |match| "#{match}\n" }
end
end

View file

@ -1,6 +1,8 @@
# frozen_string_literal: true
class RSS::Serializer
include FormattingHelper
private
def render_statuses(builder, statuses)
@ -9,7 +11,7 @@ class RSS::Serializer
item.title(status_title(status))
.link(ActivityPub::TagManager.instance.url_for(status))
.pub_date(status.created_at)
.description(status.spoiler_text.presence || Formatter.instance.format(status, inline_poll_options: true).to_str)
.description(status_description(status))
status.ordered_media_attachments.each do |media|
item.enclosure(full_asset_url(media.file.url(:original, false)), media.file.content_type, media.file.size)
@ -19,9 +21,8 @@ class RSS::Serializer
end
def status_title(status)
return "#{status.account.acct} deleted status" if status.destroyed?
preview = status.proper.spoiler_text.presence || status.proper.text
if preview.length > 30 || preview[0, 30].include?("\n")
preview = preview[0, 30]
preview = preview[0, preview.index("\n").presence || 30] + '…'
@ -35,4 +36,20 @@ class RSS::Serializer
"#{status.account.acct}: #{preview}"
end
end
def status_description(status)
if status.proper.spoiler_text?
status.proper.spoiler_text
else
html = status_content_format(status.proper).to_str
after_html = ''
if status.proper.preloadable_poll
poll_options_html = status.proper.preloadable_poll.options.map { |o| "[ ] #{o}" }.join('<br />')
after_html = "<p>#{poll_options_html}</p>"
end
"#{html}#{after_html}"
end
end
end

158
app/lib/text_formatter.rb Normal file
View file

@ -0,0 +1,158 @@
# frozen_string_literal: true
class TextFormatter
include ActionView::Helpers::TextHelper
include ERB::Util
include RoutingHelper
URL_PREFIX_REGEX = /\A(https?:\/\/(www\.)?|xmpp:)/.freeze
DEFAULT_REL = %w(nofollow noopener noreferrer).freeze
DEFAULT_OPTIONS = {
multiline: true,
}.freeze
attr_reader :text, :options
# @param [String] text
# @param [Hash] options
# @option options [Boolean] :multiline
# @option options [Boolean] :with_domains
# @option options [Boolean] :with_rel_me
# @option options [Array<Account>] :preloaded_accounts
def initialize(text, options = {})
@text = text
@options = DEFAULT_OPTIONS.merge(options)
end
def entities
@entities ||= Extractor.extract_entities_with_indices(text, extract_url_without_protocol: false)
end
def to_s
return ''.html_safe if text.blank?
html = rewrite do |entity|
if entity[:url]
link_to_url(entity)
elsif entity[:hashtag]
link_to_hashtag(entity)
elsif entity[:screen_name]
link_to_mention(entity)
end
end
html = simple_format(html, {}, sanitize: false).delete("\n") if multiline?
html.html_safe # rubocop:disable Rails/OutputSafety
end
private
def rewrite
entities.sort_by! do |entity|
entity[:indices].first
end
result = ''.dup
last_index = entities.reduce(0) do |index, entity|
indices = entity[:indices]
result << h(text[index...indices.first])
result << yield(entity)
indices.last
end
result << h(text[last_index..-1])
result
end
def link_to_url(entity)
url = Addressable::URI.parse(entity[:url]).to_s
rel = with_rel_me? ? (DEFAULT_REL + %w(me)) : DEFAULT_REL
prefix = url.match(URL_PREFIX_REGEX).to_s
display_url = url[prefix.length, 30]
suffix = url[prefix.length + 30..-1]
cutoff = url[prefix.length..-1].length > 30
<<~HTML.squish
<a href="#{h(url)}" target="_blank" rel="#{rel.join(' ')}"><span class="invisible">#{h(prefix)}</span><span class="#{cutoff ? 'ellipsis' : ''}">#{h(display_url)}</span><span class="invisible">#{h(suffix)}</span></a>
HTML
rescue Addressable::URI::InvalidURIError, IDN::Idna::IdnaError
h(entity[:url])
end
def link_to_hashtag(entity)
hashtag = entity[:hashtag]
url = tag_url(hashtag)
<<~HTML.squish
<a href="#{h(url)}" class="mention hashtag" rel="tag">#<span>#{h(hashtag)}</span></a>
HTML
end
def link_to_mention(entity)
username, domain = entity[:screen_name].split('@')
domain = nil if local_domain?(domain)
account = nil
if preloaded_accounts?
same_username_hits = 0
preloaded_accounts.each do |other_account|
same_username = other_account.username.casecmp(username).zero?
same_domain = other_account.domain.nil? ? domain.nil? : other_account.domain.casecmp(domain)&.zero?
if same_username && !same_domain
same_username_hits += 1
elsif same_username && same_domain
account = other_account
end
end
else
account = entity_cache.mention(username, domain)
end
return "@#{h(entity[:screen_name])}" if account.nil?
url = ActivityPub::TagManager.instance.url_for(account)
display_username = same_username_hits&.positive? || with_domains? ? account.pretty_acct : account.username
<<~HTML.squish
<span class="h-card"><a href="#{h(url)}" class="u-url mention">@<span>#{h(display_username)}</span></a></span>
HTML
end
def entity_cache
@entity_cache ||= EntityCache.instance
end
def tag_manager
@tag_manager ||= TagManager.instance
end
delegate :local_domain?, to: :tag_manager
def multiline?
options[:multiline]
end
def with_domains?
options[:with_domains]
end
def with_rel_me?
options[:with_rel_me]
end
def preloaded_accounts
options[:preloaded_accounts]
end
def preloaded_accounts?
preloaded_accounts.present?
end
end

View file

@ -5,6 +5,7 @@ class ApplicationMailer < ActionMailer::Base
helper :application
helper :instance
helper :formatting
protected

View file

@ -2,6 +2,7 @@
class ActivityPub::ActorSerializer < ActivityPub::Serializer
include RoutingHelper
include FormattingHelper
context :security
@ -102,7 +103,7 @@ class ActivityPub::ActorSerializer < ActivityPub::Serializer
end
def summary
object.suspended? ? '' : Formatter.instance.simplified_format(object)
object.suspended? ? '' : html_aware_format(object.note, object.local?)
end
def icon
@ -185,6 +186,8 @@ class ActivityPub::ActorSerializer < ActivityPub::Serializer
end
class Account::FieldSerializer < ActivityPub::Serializer
include FormattingHelper
attributes :type, :name, :value
def type
@ -192,7 +195,7 @@ class ActivityPub::ActorSerializer < ActivityPub::Serializer
end
def value
Formatter.instance.format_field(object.account, object.value)
html_aware_format(object.value, object.account.value?, with_rel_me: true, with_domains: true, multiline: false)
end
end

View file

@ -1,6 +1,8 @@
# frozen_string_literal: true
class ActivityPub::NoteSerializer < ActivityPub::Serializer
include FormattingHelper
context_extensions :atom_uri, :conversation, :sensitive, :voters_count
attributes :id, :type, :summary,
@ -39,11 +41,11 @@ class ActivityPub::NoteSerializer < ActivityPub::Serializer
end
def content
Formatter.instance.format(object)
status_content_format(object)
end
def content_map
{ object.language => Formatter.instance.format(object) }
{ object.language => content }
end
def replies

View file

@ -2,6 +2,7 @@
class REST::AccountSerializer < ActiveModel::Serializer
include RoutingHelper
include FormattingHelper
attributes :id, :username, :acct, :display_name, :locked, :bot, :discoverable, :group, :created_at,
:note, :url, :avatar, :avatar_static, :header, :header_static,
@ -14,10 +15,12 @@ class REST::AccountSerializer < ActiveModel::Serializer
attribute :suspended, if: :suspended?
class FieldSerializer < ActiveModel::Serializer
include FormattingHelper
attributes :name, :value, :verified_at
def value
Formatter.instance.format_field(object.account, object.value)
html_aware_format(object.value, object.account.local?, with_rel_me: true, with_domains: true, multiline: false)
end
end
@ -32,7 +35,7 @@ class REST::AccountSerializer < ActiveModel::Serializer
end
def note
object.suspended? ? '' : Formatter.instance.simplified_format(object)
object.suspended? ? '' : html_aware_format(object.note, object.local?)
end
def url

View file

@ -1,6 +1,8 @@
# frozen_string_literal: true
class REST::AnnouncementSerializer < ActiveModel::Serializer
include FormattingHelper
attributes :id, :content, :starts_at, :ends_at, :all_day,
:published_at, :updated_at
@ -25,7 +27,7 @@ class REST::AnnouncementSerializer < ActiveModel::Serializer
end
def content
Formatter.instance.linkify(object.text)
linkify(object.text)
end
def reactions

View file

@ -1,6 +1,8 @@
# frozen_string_literal: true
class REST::StatusEditSerializer < ActiveModel::Serializer
include FormattingHelper
has_one :account, serializer: REST::AccountSerializer
attributes :content, :spoiler_text, :sensitive, :created_at
@ -11,7 +13,7 @@ class REST::StatusEditSerializer < ActiveModel::Serializer
attribute :poll, if: -> { object.poll_options.present? }
def content
Formatter.instance.format(object)
status_content_format(object)
end
def poll

View file

@ -1,6 +1,8 @@
# frozen_string_literal: true
class REST::StatusSerializer < ActiveModel::Serializer
include FormattingHelper
attributes :id, :created_at, :in_reply_to_id, :in_reply_to_account_id,
:sensitive, :spoiler_text, :visibility, :language,
:uri, :url, :replies_count, :reblogs_count,
@ -71,7 +73,7 @@ class REST::StatusSerializer < ActiveModel::Serializer
end
def content
Formatter.instance.format(object)
status_content_format(object)
end
def url

View file

@ -134,7 +134,7 @@ class FetchLinkCardService < BaseService
when 'video'
@card.width = embed[:width].presence || 0
@card.height = embed[:height].presence || 0
@card.html = Formatter.instance.sanitize(embed[:html], Sanitize::Config::MASTODON_OEMBED)
@card.html = Sanitize.fragment(embed[:html], Sanitize::Config::MASTODON_OEMBED)
@card.image_remote_url = (url + embed[:thumbnail_url]).to_s if embed[:thumbnail_url].present?
when 'rich'
# Most providers rely on <script> tags, which is a no-no

View file

@ -5,17 +5,17 @@
.account__header__fields
- fields.each do |field|
%dl
%dt.emojify{ title: field.name }= Formatter.instance.format_field(account, field.name, custom_emojify: true)
%dt.emojify{ title: field.name }= prerender_custom_emojis(h(field.name), account.emojis)
%dd{ title: field.value, class: custom_field_classes(field) }
- if field.verified?
%span.verified__mark{ title: t('accounts.link_verified_on', date: l(field.verified_at)) }
= fa_icon 'check'
= Formatter.instance.format_field(account, field.value, custom_emojify: true)
= prerender_custom_emojis(html_aware_format(field.value, account.local?, with_rel_me: true, with_domains: true, multiline: false), account.emojis)
= account_badge(account)
- if account.note.present?
.account__header__content.emojify= Formatter.instance.simplified_format(account, custom_emojify: true)
.account__header__content.emojify= prerender_custom_emojis(html_aware_format(account.note, account.local?), account.emojis)
.public-account-bio__extra
= t 'accounts.joined', date: l(account.created_at, format: :month)

View file

@ -16,16 +16,16 @@
.account__header__fields
- fields.each do |field|
%dl
%dt.emojify{ title: field.name }= Formatter.instance.format_field(account, field.name, custom_emojify: true)
%dt.emojify{ title: field.name }= prerender_custom_emojis(h(field.name), account.emojis)
%dd{ title: field.value, class: custom_field_classes(field) }
- if field.verified?
%span.verified__mark{ title: t('accounts.link_verified_on', date: l(field.verified_at)) }
= fa_icon 'check'
= Formatter.instance.format_field(account, field.value, custom_emojify: true)
= prerender_custom_emojis(html_aware_format(field.value, account.local?, with_rel_me: true, with_domains: true, multiline: false), account.emojis)
- if account.note.present?
%div
.account__header__content.emojify= Formatter.instance.simplified_format(account, custom_emojify: true)
.account__header__content.emojify= prerender_custom_emojis(html_aware_format(account.note, account.local?), account.emojis)
.dashboard__counters.admin-account-counters
%div

View file

@ -4,12 +4,12 @@
.batch-table__row__content
.status__content><
- if status.proper.spoiler_text.blank?
= Formatter.instance.format(status.proper, custom_emojify: true)
= prerender_custom_emojis(status_content_format(status.proper), status.proper.emojis)
- else
%details<
%summary><
%strong> Content warning: #{Formatter.instance.format_spoiler(status.proper)}
= Formatter.instance.format(status.proper, custom_emojify: true)
%strong> Content warning: #{prerender_custom_emojis(h(status.proper.spoiler_text), status.proper.emojis)}
= prerender_custom_emojis(status_content_format(status.proper), status.proper.emojis)
- unless status.proper.ordered_media_attachments.empty?
- if status.proper.ordered_media_attachments.first.video?

View file

@ -27,7 +27,7 @@
= fa_icon('lock') if @report.target_account.locked?
- if @report.target_account.note.present?
.account-card__bio.emojify
= Formatter.instance.simplified_format(@report.target_account, custom_emojify: true)
= prerender_custom_emojis(html_aware_format(@report.target_account.note, @report.target_account.local?), @report.target_account.emojis)
.account-card__actions
.account-card__counters
.account-card__counters__item

View file

@ -34,7 +34,7 @@
= fa_icon('lock') if account.locked?
- if account.note.present?
.account-card__bio.emojify
= Formatter.instance.simplified_format(account, custom_emojify: true)
= prerender_custom_emojis(html_aware_format(account.note, account.local?), account.emojis)
- else
.flex-spacer
.account-card__actions

View file

@ -26,7 +26,7 @@
%p= t "user_mailer.warning.explanation.#{@strike.action}", instance: Rails.configuration.x.local_domain
- unless @strike.text.blank?
= Formatter.instance.linkify(@strike.text)
= linkify(@strike.text)
- if @strike.report && !@strike.report.other?
%p

View file

@ -28,10 +28,10 @@
- if status.spoiler_text?
%div.auto-dir
%p
= Formatter.instance.format_spoiler(status)
= status.spoiler_text
%div.auto-dir
= Formatter.instance.format(status)
= status_content_format(status)
- if status.ordered_media_attachments.size > 0
%p

View file

@ -3,6 +3,6 @@
> ----
>
<% end %>
> <%= raw word_wrap(Formatter.instance.plaintext(status), break_sequence: "\n> ") %>
> <%= raw word_wrap(extract_plain_text(status.text, status.local?), break_sequence: "\n> ") %>
<%= raw t('application_mailer.view')%> <%= web_url("statuses/#{status.id}") %>

View file

@ -5,7 +5,7 @@
* <%= raw t('notification_mailer.digest.mention', name: notification.from_account.pretty_acct) %>
<%= raw Formatter.instance.plaintext(notification.target_status) %>
<%= raw extract_plain_text(notification.target_status.text, notification.target_status.local?) %>
<%= raw t('application_mailer.view')%> <%= web_url("statuses/#{notification.target_status.id}") %>
<% end %>

View file

@ -18,10 +18,11 @@
.status__content.emojify{ :data => ({ spoiler: current_account&.user&.setting_expand_spoilers ? 'expanded' : 'folded' } if status.spoiler_text?) }<
- if status.spoiler_text?
%p<
%span.p-summary> #{Formatter.instance.format_spoiler(status, autoplay: prefers_autoplay?)}&nbsp;
%span.p-summary> #{prerender_custom_emojis(h(status.spoiler_text), status.emojis)}&nbsp;
%button.status__content__spoiler-link= t('statuses.show_more')
.e-content
= Formatter.instance.format(status, custom_emojify: true, autoplay: prefers_autoplay?)
= prerender_custom_emojis(status_content_format(status), status.emojis)
- if status.preloadable_poll
= render_poll_component(status)

View file

@ -12,7 +12,7 @@
%span.poll__number><
= "#{percent.round}%"
%span.poll__option__text
= Formatter.instance.format_poll_option(status, option, autoplay: prefers_autoplay?)
= prerender_custom_emojis(h(option.title), status.emojis)
- if own_votes.include?(index)
%span.poll__voted
%i.poll__voted__mark.fa.fa-check
@ -23,7 +23,7 @@
%label.poll__option><
%span.poll__input{ class: poll.multiple? ? 'checkbox' : nil}><
%span.poll__option__text
= Formatter.instance.format_poll_option(status, option, autoplay: prefers_autoplay?)
= prerender_custom_emojis(h(option.title), status.emojis)
.poll__footer
- unless show_results
%button.button.button-secondary{ disabled: true }

View file

@ -30,10 +30,11 @@
.status__content.emojify{ :data => ({ spoiler: current_account&.user&.setting_expand_spoilers ? 'expanded' : 'folded' } if status.spoiler_text?) }<
- if status.spoiler_text?
%p<
%span.p-summary> #{Formatter.instance.format_spoiler(status, autoplay: prefers_autoplay?)}&nbsp;
%span.p-summary> #{prerender_custom_emojis(h(status.spoiler_text), status.emojis)}&nbsp;
%button.status__content__spoiler-link= t('statuses.show_more')
.e-content
= Formatter.instance.format(status, custom_emojify: true, autoplay: prefers_autoplay?)
= prerender_custom_emojis(status_content_format(status), status.emojis)
- if status.preloadable_poll
= render_poll_component(status)

View file

@ -40,7 +40,7 @@
%p= t "user_mailer.warning.explanation.#{@warning.action}", instance: @instance
- unless @warning.text.blank?
= Formatter.instance.linkify(@warning.text)
= linkify(@warning.text)
- if @warning.report && !@warning.report.other?
%p

View file

@ -75,30 +75,4 @@ module Twitter::TwitterText
)
}iox
end
module Extractor
# Extracts a list of all XMPP and magnet URIs included in the Toot <tt>text</tt> along
# with the indices. If the <tt>text</tt> is <tt>nil</tt> or contains no
# XMPP or magnet URIs an empty array will be returned.
#
# If a block is given then it will be called for each XMPP URI.
def extract_extra_uris_with_indices(text, _options = {}) # :yields: uri, start, end
return [] unless text && text.index(":")
urls = []
text.to_s.scan(Twitter::TwitterText::Regex[:valid_extended_uri]) do
valid_uri_match_data = $~
start_position = valid_uri_match_data.char_begin(3)
end_position = valid_uri_match_data.char_end(3)
urls << {
:url => valid_uri_match_data[3],
:indices => [start_position, end_position]
}
end
urls.each{|url| yield url[:url], url[:indices].first, url[:indices].last} if block_given?
urls
end
end
end

View file

@ -0,0 +1,55 @@
require 'rails_helper'
RSpec.describe EmojiFormatter do
let!(:emoji) { Fabricate(:custom_emoji, shortcode: 'coolcat') }
def preformat_text(str)
TextFormatter.new(str).to_s
end
describe '#to_s' do
subject { described_class.new(text, emojis).to_s }
let(:emojis) { [emoji] }
context 'given text that is not marked as html-safe' do
let(:text) { 'Foo' }
it 'raises an argument error' do
expect { subject }.to raise_error ArgumentError
end
end
context 'given text with an emoji shortcode at the start' do
let(:text) { preformat_text(':coolcat: Beep boop') }
it 'converts the shortcode to an image tag' do
is_expected.to match(/<img draggable="false" class="emojione custom-emoji" alt=":coolcat:"/)
end
end
context 'given text with an emoji shortcode in the middle' do
let(:text) { preformat_text('Beep :coolcat: boop') }
it 'converts the shortcode to an image tag' do
is_expected.to match(/Beep <img draggable="false" class="emojione custom-emoji" alt=":coolcat:"/)
end
end
context 'given text with concatenated emoji shortcodes' do
let(:text) { preformat_text(':coolcat::coolcat:') }
it 'does not touch the shortcodes' do
is_expected.to match(/:coolcat::coolcat:/)
end
end
context 'given text with an emoji shortcode at the end' do
let(:text) { preformat_text('Beep boop :coolcat:') }
it 'converts the shortcode to an image tag' do
is_expected.to match(/boop <img draggable="false" class="emojione custom-emoji" alt=":coolcat:"/)
end
end
end
end

View file

@ -1,626 +0,0 @@
require 'rails_helper'
RSpec.describe Formatter do
let(:local_account) { Fabricate(:account, domain: nil, username: 'alice') }
let(:remote_account) { Fabricate(:account, domain: 'remote.test', username: 'bob', url: 'https://remote.test/') }
shared_examples 'encode and link URLs' do
context 'given a stand-alone medium URL' do
let(:text) { 'https://hackernoon.com/the-power-to-build-communities-a-response-to-mark-zuckerberg-3f2cac9148a4' }
it 'matches the full URL' do
is_expected.to include 'href="https://hackernoon.com/the-power-to-build-communities-a-response-to-mark-zuckerberg-3f2cac9148a4"'
end
end
context 'given a stand-alone google URL' do
let(:text) { 'http://google.com' }
it 'matches the full URL' do
is_expected.to include 'href="http://google.com"'
end
end
context 'given a stand-alone URL with a newer TLD' do
let(:text) { 'http://example.gay' }
it 'matches the full URL' do
is_expected.to include 'href="http://example.gay"'
end
end
context 'given a stand-alone IDN URL' do
let(:text) { 'https://nic.みんな/' }
it 'matches the full URL' do
is_expected.to include 'href="https://nic.みんな/"'
end
it 'has display URL' do
is_expected.to include '<span class="">nic.みんな/</span>'
end
end
context 'given a URL with a trailing period' do
let(:text) { 'http://www.mcmansionhell.com/post/156408871451/50-states-of-mcmansion-hell-scottsdale-arizona. ' }
it 'matches the full URL but not the period' do
is_expected.to include 'href="http://www.mcmansionhell.com/post/156408871451/50-states-of-mcmansion-hell-scottsdale-arizona"'
end
end
context 'given a URL enclosed with parentheses' do
let(:text) { '(http://google.com/)' }
it 'matches the full URL but not the parentheses' do
is_expected.to include 'href="http://google.com/"'
end
end
context 'given a URL with a trailing exclamation point' do
let(:text) { 'http://www.google.com!' }
it 'matches the full URL but not the exclamation point' do
is_expected.to include 'href="http://www.google.com"'
end
end
context 'given a URL with a trailing single quote' do
let(:text) { "http://www.google.com'" }
it 'matches the full URL but not the single quote' do
is_expected.to include 'href="http://www.google.com"'
end
end
context 'given a URL with a trailing angle bracket' do
let(:text) { 'http://www.google.com>' }
it 'matches the full URL but not the angle bracket' do
is_expected.to include 'href="http://www.google.com"'
end
end
context 'given a URL with a query string' do
context 'with escaped unicode character' do
let(:text) { 'https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&q=autolink' }
it 'matches the full URL' do
is_expected.to include 'href="https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&amp;q=autolink"'
end
end
context 'with unicode character' do
let(:text) { 'https://www.ruby-toolbox.com/search?utf8=✓&q=autolink' }
it 'matches the full URL' do
is_expected.to include 'href="https://www.ruby-toolbox.com/search?utf8=✓&amp;q=autolink"'
end
end
context 'with unicode character at the end' do
let(:text) { 'https://www.ruby-toolbox.com/search?utf8=✓' }
it 'matches the full URL' do
is_expected.to include 'href="https://www.ruby-toolbox.com/search?utf8=✓"'
end
end
context 'with escaped and not escaped unicode characters' do
let(:text) { 'https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&utf81=✓&q=autolink' }
it 'preserves escaped unicode characters' do
is_expected.to include 'href="https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&amp;utf81=✓&amp;q=autolink"'
end
end
end
context 'given a URL with parentheses in it' do
let(:text) { 'https://en.wikipedia.org/wiki/Diaspora_(software)' }
it 'matches the full URL' do
is_expected.to include 'href="https://en.wikipedia.org/wiki/Diaspora_(software)"'
end
end
context 'given a URL in quotation marks' do
let(:text) { '"https://example.com/"' }
it 'does not match the quotation marks' do
is_expected.to include 'href="https://example.com/"'
end
end
context 'given a URL in angle brackets' do
let(:text) { '<https://example.com/>' }
it 'does not match the angle brackets' do
is_expected.to include 'href="https://example.com/"'
end
end
context 'given a URL with Japanese path string' do
let(:text) { 'https://ja.wikipedia.org/wiki/日本' }
it 'matches the full URL' do
is_expected.to include 'href="https://ja.wikipedia.org/wiki/日本"'
end
end
context 'given a URL with Korean path string' do
let(:text) { 'https://ko.wikipedia.org/wiki/대한민국' }
it 'matches the full URL' do
is_expected.to include 'href="https://ko.wikipedia.org/wiki/대한민국"'
end
end
context 'given a URL with a full-width space' do
let(:text) { 'https://example.com/ abc123' }
it 'does not match the full-width space' do
is_expected.to include 'href="https://example.com/"'
end
end
context 'given a URL in Japanese quotation marks' do
let(:text) { '「[https://example.org/」' }
it 'does not match the quotation marks' do
is_expected.to include 'href="https://example.org/"'
end
end
context 'given a URL with Simplified Chinese path string' do
let(:text) { 'https://baike.baidu.com/item/中华人民共和国' }
it 'matches the full URL' do
is_expected.to include 'href="https://baike.baidu.com/item/中华人民共和国"'
end
end
context 'given a URL with Traditional Chinese path string' do
let(:text) { 'https://zh.wikipedia.org/wiki/臺灣' }
it 'matches the full URL' do
is_expected.to include 'href="https://zh.wikipedia.org/wiki/臺灣"'
end
end
context 'given a URL containing unsafe code (XSS attack, visible part)' do
let(:text) { %q{http://example.com/b<del>b</del>} }
it 'does not include the HTML in the URL' do
is_expected.to include '"http://example.com/b"'
end
it 'escapes the HTML' do
is_expected.to include '&lt;del&gt;b&lt;/del&gt;'
end
end
context 'given a URL containing unsafe code (XSS attack, invisible part)' do
let(:text) { %q{http://example.com/blahblahblahblah/a<script>alert("Hello")</script>} }
it 'does not include the HTML in the URL' do
is_expected.to include '"http://example.com/blahblahblahblah/a"'
end
it 'escapes the HTML' do
is_expected.to include '&lt;script&gt;alert(&quot;Hello&quot;)&lt;/script&gt;'
end
end
context 'given text containing HTML code (script tag)' do
let(:text) { '<script>alert("Hello")</script>' }
it 'escapes the HTML' do
is_expected.to include '<p>&lt;script&gt;alert(&quot;Hello&quot;)&lt;/script&gt;</p>'
end
end
context 'given text containing HTML (XSS attack)' do
let(:text) { %q{<img src="javascript:alert('XSS');">} }
it 'escapes the HTML' do
is_expected.to include '<p>&lt;img src=&quot;javascript:alert(&apos;XSS&apos;);&quot;&gt;</p>'
end
end
context 'given an invalid URL' do
let(:text) { 'http://www\.google\.com' }
it 'outputs the raw URL' do
is_expected.to eq '<p>http://www\.google\.com</p>'
end
end
context 'given text containing a hashtag' do
let(:text) { '#hashtag' }
it 'creates a hashtag link' do
is_expected.to include '/tags/hashtag" class="mention hashtag" rel="tag">#<span>hashtag</span></a>'
end
end
context 'given text containing a hashtag with Unicode chars' do
let(:text) { '#hashtagタグ' }
it 'creates a hashtag link' do
is_expected.to include '/tags/hashtag%E3%82%BF%E3%82%B0" class="mention hashtag" rel="tag">#<span>hashtagタグ</span></a>'
end
end
context 'given a stand-alone xmpp: URI' do
let(:text) { 'xmpp:user@instance.com' }
it 'matches the full URI' do
is_expected.to include 'href="xmpp:user@instance.com"'
end
end
context 'given a an xmpp: URI with a query-string' do
let(:text) { 'please join xmpp:muc@instance.com?join right now' }
it 'matches the full URI' do
is_expected.to include 'href="xmpp:muc@instance.com?join"'
end
end
context 'given text containing a magnet: URI' do
let(:text) { 'wikipedia gives this example of a magnet uri: magnet:?xt=urn:btih:c12fe1c06bba254a9dc9f519b335aa7c1367a88a' }
it 'matches the full URI' do
is_expected.to include 'href="magnet:?xt=urn:btih:c12fe1c06bba254a9dc9f519b335aa7c1367a88a"'
end
end
end
describe '#format_spoiler' do
subject { Formatter.instance.format_spoiler(status) }
context 'given a post containing plain text' do
let(:status) { Fabricate(:status, text: 'text', spoiler_text: 'Secret!', uri: nil) }
it 'Returns the spoiler text' do
is_expected.to eq 'Secret!'
end
end
context 'given a post with an emoji shortcode at the start' do
let!(:emoji) { Fabricate(:custom_emoji) }
let(:status) { Fabricate(:status, text: 'text', spoiler_text: ':coolcat: Secret!', uri: nil) }
let(:text) { ':coolcat: Beep boop' }
it 'converts the shortcode to an image tag' do
is_expected.to match(/<img draggable="false" class="emojione custom-emoji" alt=":coolcat:"/)
end
end
end
describe '#format' do
subject { Formatter.instance.format(status) }
context 'given a post with local status' do
context 'given a reblogged post' do
let(:reblog) { Fabricate(:status, account: local_account, text: 'Hello world', uri: nil) }
let(:status) { Fabricate(:status, reblog: reblog) }
it 'returns original status with credit to its author' do
is_expected.to include 'RT <span class="h-card"><a href="https://cb6e6126.ngrok.io/@alice" class="u-url mention">@<span>alice</span></a></span> Hello world'
end
end
context 'given a post containing plain text' do
let(:status) { Fabricate(:status, text: 'text', uri: nil) }
it 'paragraphizes the text' do
is_expected.to eq '<p>text</p>'
end
end
context 'given a post containing line feeds' do
let(:status) { Fabricate(:status, text: "line\nfeed", uri: nil) }
it 'removes line feeds' do
is_expected.not_to include "\n"
end
end
context 'given a post containing linkable mentions' do
let(:status) { Fabricate(:status, mentions: [ Fabricate(:mention, account: local_account) ], text: '@alice') }
it 'creates a mention link' do
is_expected.to include '<a href="https://cb6e6126.ngrok.io/@alice" class="u-url mention">@<span>alice</span></a></span>'
end
end
context 'given a post containing unlinkable mentions' do
let(:status) { Fabricate(:status, text: '@alice', uri: nil) }
it 'does not create a mention link' do
is_expected.to include '@alice'
end
end
context do
subject do
status = Fabricate(:status, text: text, uri: nil)
Formatter.instance.format(status)
end
include_examples 'encode and link URLs'
end
context 'given a post with custom_emojify option' do
let!(:emoji) { Fabricate(:custom_emoji) }
let(:status) { Fabricate(:status, account: local_account, text: text) }
subject { Formatter.instance.format(status, custom_emojify: true) }
context 'given a post with an emoji shortcode at the start' do
let(:text) { ':coolcat: Beep boop' }
it 'converts the shortcode to an image tag' do
is_expected.to match(/<p><img draggable="false" class="emojione custom-emoji" alt=":coolcat:"/)
end
end
context 'given a post with an emoji shortcode in the middle' do
let(:text) { 'Beep :coolcat: boop' }
it 'converts the shortcode to an image tag' do
is_expected.to match(/Beep <img draggable="false" class="emojione custom-emoji" alt=":coolcat:"/)
end
end
context 'given a post with concatenated emoji shortcodes' do
let(:text) { ':coolcat::coolcat:' }
it 'does not touch the shortcodes' do
is_expected.to match(/:coolcat::coolcat:/)
end
end
context 'given a post with an emoji shortcode at the end' do
let(:text) { 'Beep boop :coolcat:' }
it 'converts the shortcode to an image tag' do
is_expected.to match(/boop <img draggable="false" class="emojione custom-emoji" alt=":coolcat:"/)
end
end
end
end
context 'given a post with remote status' do
let(:status) { Fabricate(:status, account: remote_account, text: 'Beep boop') }
it 'reformats the post' do
is_expected.to eq 'Beep boop'
end
context 'given a post with custom_emojify option' do
let!(:emoji) { Fabricate(:custom_emoji, domain: remote_account.domain) }
let(:status) { Fabricate(:status, account: remote_account, text: text) }
subject { Formatter.instance.format(status, custom_emojify: true) }
context 'given a post with an emoji shortcode at the start' do
let(:text) { '<p>:coolcat: Beep boop<br />' }
it 'converts the shortcode to an image tag' do
is_expected.to match(/<p><img draggable="false" class="emojione custom-emoji" alt=":coolcat:"/)
end
end
context 'given a post with an emoji shortcode in the middle' do
let(:text) { '<p>Beep :coolcat: boop</p>' }
it 'converts the shortcode to an image tag' do
is_expected.to match(/Beep <img draggable="false" class="emojione custom-emoji" alt=":coolcat:"/)
end
end
context 'given a post with concatenated emoji' do
let(:text) { '<p>:coolcat::coolcat:</p>' }
it 'does not touch the shortcodes' do
is_expected.to match(/<p>:coolcat::coolcat:<\/p>/)
end
end
context 'given a post with an emoji shortcode at the end' do
let(:text) { '<p>Beep boop<br />:coolcat:</p>' }
it 'converts the shortcode to an image tag' do
is_expected.to match(/<br><img draggable="false" class="emojione custom-emoji" alt=":coolcat:"/)
end
end
end
end
end
describe '#reformat' do
subject { Formatter.instance.reformat(text) }
context 'given a post containing plain text' do
let(:text) { 'Beep boop' }
it 'keeps the plain text' do
is_expected.to include 'Beep boop'
end
end
context 'given a post containing script tags' do
let(:text) { '<script>alert("Hello")</script>' }
it 'strips the scripts' do
is_expected.to_not include '<script>alert("Hello")</script>'
end
end
context 'given a post containing malicious classes' do
let(:text) { '<span class="mention status__content__spoiler-link">Show more</span>' }
it 'strips the malicious classes' do
is_expected.to_not include 'status__content__spoiler-link'
end
end
end
describe '#plaintext' do
subject { Formatter.instance.plaintext(status) }
context 'given a post with local status' do
let(:status) { Fabricate(:status, text: '<p>a text by a nerd who uses an HTML tag in text</p>', uri: nil) }
it 'returns the raw text' do
is_expected.to eq '<p>a text by a nerd who uses an HTML tag in text</p>'
end
end
context 'given a post with remote status' do
let(:status) { Fabricate(:status, account: remote_account, text: '<script>alert("Hello")</script>') }
it 'returns tag-stripped text' do
is_expected.to eq ''
end
end
end
describe '#simplified_format' do
subject { Formatter.instance.simplified_format(account) }
context 'given a post with local status' do
let(:account) { Fabricate(:account, domain: nil, note: text) }
context 'given a post containing linkable mentions for local accounts' do
let(:text) { '@alice' }
before { local_account }
it 'creates a mention link' do
is_expected.to eq '<p><span class="h-card"><a href="https://cb6e6126.ngrok.io/@alice" class="u-url mention">@<span>alice</span></a></span></p>'
end
end
context 'given a post containing linkable mentions for remote accounts' do
let(:text) { '@bob@remote.test' }
before { remote_account }
it 'creates a mention link' do
is_expected.to eq '<p><span class="h-card"><a href="https://remote.test/" class="u-url mention">@<span>bob</span></a></span></p>'
end
end
context 'given a post containing unlinkable mentions' do
let(:text) { '@alice' }
it 'does not create a mention link' do
is_expected.to eq '<p>@alice</p>'
end
end
context 'given a post with custom_emojify option' do
let!(:emoji) { Fabricate(:custom_emoji) }
before { account.note = text }
subject { Formatter.instance.simplified_format(account, custom_emojify: true) }
context 'given a post with an emoji shortcode at the start' do
let(:text) { ':coolcat: Beep boop' }
it 'converts the shortcode to an image tag' do
is_expected.to match(/<p><img draggable="false" class="emojione custom-emoji" alt=":coolcat:"/)
end
end
context 'given a post with an emoji shortcode in the middle' do
let(:text) { 'Beep :coolcat: boop' }
it 'converts the shortcode to an image tag' do
is_expected.to match(/Beep <img draggable="false" class="emojione custom-emoji" alt=":coolcat:"/)
end
end
context 'given a post with concatenated emoji shortcodes' do
let(:text) { ':coolcat::coolcat:' }
it 'does not touch the shortcodes' do
is_expected.to match(/:coolcat::coolcat:/)
end
end
context 'given a post with an emoji shortcode at the end' do
let(:text) { 'Beep boop :coolcat:' }
it 'converts the shortcode to an image tag' do
is_expected.to match(/boop <img draggable="false" class="emojione custom-emoji" alt=":coolcat:"/)
end
end
end
include_examples 'encode and link URLs'
end
context 'given a post with remote status' do
let(:text) { '<script>alert("Hello")</script>' }
let(:account) { Fabricate(:account, domain: 'remote', note: text) }
it 'reformats' do
is_expected.to_not include '<script>alert("Hello")</script>'
end
context 'with custom_emojify option' do
let!(:emoji) { Fabricate(:custom_emoji, domain: remote_account.domain) }
before { remote_account.note = text }
subject { Formatter.instance.simplified_format(remote_account, custom_emojify: true) }
context 'given a post with an emoji shortcode at the start' do
let(:text) { '<p>:coolcat: Beep boop<br />' }
it 'converts shortcode to image tag' do
is_expected.to match(/<p><img draggable="false" class="emojione custom-emoji" alt=":coolcat:"/)
end
end
context 'given a post with an emoji shortcode in the middle' do
let(:text) { '<p>Beep :coolcat: boop</p>' }
it 'converts shortcode to image tag' do
is_expected.to match(/Beep <img draggable="false" class="emojione custom-emoji" alt=":coolcat:"/)
end
end
context 'given a post with concatenated emoji shortcodes' do
let(:text) { '<p>:coolcat::coolcat:</p>' }
it 'does not touch the shortcodes' do
is_expected.to match(/<p>:coolcat::coolcat:<\/p>/)
end
end
context 'given a post with an emoji shortcode at the end' do
let(:text) { '<p>Beep boop<br />:coolcat:</p>' }
it 'converts shortcode to image tag' do
is_expected.to match(/<br><img draggable="false" class="emojione custom-emoji" alt=":coolcat:"/)
end
end
end
end
end
describe '#sanitize' do
let(:html) { '<script>alert("Hello")</script>' }
subject { Formatter.instance.sanitize(html, Sanitize::Config::MASTODON_STRICT) }
it 'sanitizes' do
is_expected.to eq ''
end
end
end

View file

@ -0,0 +1,44 @@
require 'rails_helper'
RSpec.describe HtmlAwareFormatter do
describe '#to_s' do
subject { described_class.new(text, local).to_s }
context 'when local' do
let(:local) { true }
let(:text) { 'Foo bar' }
it 'returns formatted text' do
is_expected.to eq '<p>Foo bar</p>'
end
end
context 'when remote' do
let(:local) { false }
context 'given plain text' do
let(:text) { 'Beep boop' }
it 'keeps the plain text' do
is_expected.to include 'Beep boop'
end
end
context 'given text containing script tags' do
let(:text) { '<script>alert("Hello")</script>' }
it 'strips the scripts' do
is_expected.to_not include '<script>alert("Hello")</script>'
end
end
context 'given text containing malicious classes' do
let(:text) { '<span class="mention status__content__spoiler-link">Show more</span>' }
it 'strips the malicious classes' do
is_expected.to_not include 'status__content__spoiler-link'
end
end
end
end
end

View file

@ -0,0 +1,24 @@
require 'rails_helper'
RSpec.describe PlainTextFormatter do
describe '#to_s' do
subject { described_class.new(status.text, status.local?).to_s }
context 'given a post with local status' do
let(:status) { Fabricate(:status, text: '<p>a text by a nerd who uses an HTML tag in text</p>', uri: nil) }
it 'returns the raw text' do
is_expected.to eq '<p>a text by a nerd who uses an HTML tag in text</p>'
end
end
context 'given a post with remote status' do
let(:remote_account) { Fabricate(:account, domain: 'remote.test', username: 'bob', url: 'https://remote.test/') }
let(:status) { Fabricate(:status, account: remote_account, text: '<p>Hello</p><script>alert("Hello")</script>') }
it 'returns tag-stripped text' do
is_expected.to eq 'Hello'
end
end
end
end

View file

@ -0,0 +1,313 @@
require 'rails_helper'
RSpec.describe TextFormatter do
describe '#to_s' do
let(:preloaded_accounts) { nil }
subject { described_class.new(text, preloaded_accounts: preloaded_accounts).to_s }
context 'given text containing plain text' do
let(:text) { 'text' }
it 'paragraphizes the text' do
is_expected.to eq '<p>text</p>'
end
end
context 'given text containing line feeds' do
let(:text) { "line\nfeed" }
it 'removes line feeds' do
is_expected.not_to include "\n"
end
end
context 'given text containing linkable mentions' do
let(:preloaded_accounts) { [Fabricate(:account, username: 'alice')] }
let(:text) { '@alice' }
it 'creates a mention link' do
is_expected.to include '<a href="https://cb6e6126.ngrok.io/@alice" class="u-url mention">@<span>alice</span></a></span>'
end
end
context 'given text containing unlinkable mentions' do
let(:preloaded_accounts) { [] }
let(:text) { '@alice' }
it 'does not create a mention link' do
is_expected.to include '@alice'
end
end
context 'given a stand-alone medium URL' do
let(:text) { 'https://hackernoon.com/the-power-to-build-communities-a-response-to-mark-zuckerberg-3f2cac9148a4' }
it 'matches the full URL' do
is_expected.to include 'href="https://hackernoon.com/the-power-to-build-communities-a-response-to-mark-zuckerberg-3f2cac9148a4"'
end
end
context 'given a stand-alone google URL' do
let(:text) { 'http://google.com' }
it 'matches the full URL' do
is_expected.to include 'href="http://google.com"'
end
end
context 'given a stand-alone URL with a newer TLD' do
let(:text) { 'http://example.gay' }
it 'matches the full URL' do
is_expected.to include 'href="http://example.gay"'
end
end
context 'given a stand-alone IDN URL' do
let(:text) { 'https://nic.みんな/' }
it 'matches the full URL' do
is_expected.to include 'href="https://nic.みんな/"'
end
it 'has display URL' do
is_expected.to include '<span class="">nic.みんな/</span>'
end
end
context 'given a URL with a trailing period' do
let(:text) { 'http://www.mcmansionhell.com/post/156408871451/50-states-of-mcmansion-hell-scottsdale-arizona. ' }
it 'matches the full URL but not the period' do
is_expected.to include 'href="http://www.mcmansionhell.com/post/156408871451/50-states-of-mcmansion-hell-scottsdale-arizona"'
end
end
context 'given a URL enclosed with parentheses' do
let(:text) { '(http://google.com/)' }
it 'matches the full URL but not the parentheses' do
is_expected.to include 'href="http://google.com/"'
end
end
context 'given a URL with a trailing exclamation point' do
let(:text) { 'http://www.google.com!' }
it 'matches the full URL but not the exclamation point' do
is_expected.to include 'href="http://www.google.com"'
end
end
context 'given a URL with a trailing single quote' do
let(:text) { "http://www.google.com'" }
it 'matches the full URL but not the single quote' do
is_expected.to include 'href="http://www.google.com"'
end
end
context 'given a URL with a trailing angle bracket' do
let(:text) { 'http://www.google.com>' }
it 'matches the full URL but not the angle bracket' do
is_expected.to include 'href="http://www.google.com"'
end
end
context 'given a URL with a query string' do
context 'with escaped unicode character' do
let(:text) { 'https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&q=autolink' }
it 'matches the full URL' do
is_expected.to include 'href="https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&amp;q=autolink"'
end
end
context 'with unicode character' do
let(:text) { 'https://www.ruby-toolbox.com/search?utf8=✓&q=autolink' }
it 'matches the full URL' do
is_expected.to include 'href="https://www.ruby-toolbox.com/search?utf8=✓&amp;q=autolink"'
end
end
context 'with unicode character at the end' do
let(:text) { 'https://www.ruby-toolbox.com/search?utf8=✓' }
it 'matches the full URL' do
is_expected.to include 'href="https://www.ruby-toolbox.com/search?utf8=✓"'
end
end
context 'with escaped and not escaped unicode characters' do
let(:text) { 'https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&utf81=✓&q=autolink' }
it 'preserves escaped unicode characters' do
is_expected.to include 'href="https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&amp;utf81=✓&amp;q=autolink"'
end
end
end
context 'given a URL with parentheses in it' do
let(:text) { 'https://en.wikipedia.org/wiki/Diaspora_(software)' }
it 'matches the full URL' do
is_expected.to include 'href="https://en.wikipedia.org/wiki/Diaspora_(software)"'
end
end
context 'given a URL in quotation marks' do
let(:text) { '"https://example.com/"' }
it 'does not match the quotation marks' do
is_expected.to include 'href="https://example.com/"'
end
end
context 'given a URL in angle brackets' do
let(:text) { '<https://example.com/>' }
it 'does not match the angle brackets' do
is_expected.to include 'href="https://example.com/"'
end
end
context 'given a URL with Japanese path string' do
let(:text) { 'https://ja.wikipedia.org/wiki/日本' }
it 'matches the full URL' do
is_expected.to include 'href="https://ja.wikipedia.org/wiki/日本"'
end
end
context 'given a URL with Korean path string' do
let(:text) { 'https://ko.wikipedia.org/wiki/대한민국' }
it 'matches the full URL' do
is_expected.to include 'href="https://ko.wikipedia.org/wiki/대한민국"'
end
end
context 'given a URL with a full-width space' do
let(:text) { 'https://example.com/ abc123' }
it 'does not match the full-width space' do
is_expected.to include 'href="https://example.com/"'
end
end
context 'given a URL in Japanese quotation marks' do
let(:text) { '「[https://example.org/」' }
it 'does not match the quotation marks' do
is_expected.to include 'href="https://example.org/"'
end
end
context 'given a URL with Simplified Chinese path string' do
let(:text) { 'https://baike.baidu.com/item/中华人民共和国' }
it 'matches the full URL' do
is_expected.to include 'href="https://baike.baidu.com/item/中华人民共和国"'
end
end
context 'given a URL with Traditional Chinese path string' do
let(:text) { 'https://zh.wikipedia.org/wiki/臺灣' }
it 'matches the full URL' do
is_expected.to include 'href="https://zh.wikipedia.org/wiki/臺灣"'
end
end
context 'given a URL containing unsafe code (XSS attack, visible part)' do
let(:text) { %q{http://example.com/b<del>b</del>} }
it 'does not include the HTML in the URL' do
is_expected.to include '"http://example.com/b"'
end
it 'escapes the HTML' do
is_expected.to include '&lt;del&gt;b&lt;/del&gt;'
end
end
context 'given a URL containing unsafe code (XSS attack, invisible part)' do
let(:text) { %q{http://example.com/blahblahblahblah/a<script>alert("Hello")</script>} }
it 'does not include the HTML in the URL' do
is_expected.to include '"http://example.com/blahblahblahblah/a"'
end
it 'escapes the HTML' do
is_expected.to include '&lt;script&gt;alert(&quot;Hello&quot;)&lt;/script&gt;'
end
end
context 'given text containing HTML code (script tag)' do
let(:text) { '<script>alert("Hello")</script>' }
it 'escapes the HTML' do
is_expected.to include '<p>&lt;script&gt;alert(&quot;Hello&quot;)&lt;/script&gt;</p>'
end
end
context 'given text containing HTML (XSS attack)' do
let(:text) { %q{<img src="javascript:alert('XSS');">} }
it 'escapes the HTML' do
is_expected.to include '<p>&lt;img src=&quot;javascript:alert(&#39;XSS&#39;);&quot;&gt;</p>'
end
end
context 'given an invalid URL' do
let(:text) { 'http://www\.google\.com' }
it 'outputs the raw URL' do
is_expected.to eq '<p>http://www\.google\.com</p>'
end
end
context 'given text containing a hashtag' do
let(:text) { '#hashtag' }
it 'creates a hashtag link' do
is_expected.to include '/tags/hashtag" class="mention hashtag" rel="tag">#<span>hashtag</span></a>'
end
end
context 'given text containing a hashtag with Unicode chars' do
let(:text) { '#hashtagタグ' }
it 'creates a hashtag link' do
is_expected.to include '/tags/hashtag%E3%82%BF%E3%82%B0" class="mention hashtag" rel="tag">#<span>hashtagタグ</span></a>'
end
end
context 'given text with a stand-alone xmpp: URI' do
let(:text) { 'xmpp:user@instance.com' }
it 'matches the full URI' do
is_expected.to include 'href="xmpp:user@instance.com"'
end
end
context 'given text with an xmpp: URI with a query-string' do
let(:text) { 'please join xmpp:muc@instance.com?join right now' }
it 'matches the full URI' do
is_expected.to include 'href="xmpp:muc@instance.com?join"'
end
end
context 'given text containing a magnet: URI' do
let(:text) { 'wikipedia gives this example of a magnet uri: magnet:?xt=urn:btih:c12fe1c06bba254a9dc9f519b335aa7c1367a88a' }
it 'matches the full URI' do
is_expected.to include 'href="magnet:?xt=urn:btih:c12fe1c06bba254a9dc9f519b335aa7c1367a88a"'
end
end
end
end