Fix being able to post URLs longer than 4096 characters
This commit is contained in:
parent
ec1bb2a4fe
commit
cc706b1191
3 changed files with 58 additions and 14 deletions
|
@ -1,6 +1,8 @@
|
||||||
# frozen_string_literal: true
|
# frozen_string_literal: true
|
||||||
|
|
||||||
module Extractor
|
module Extractor
|
||||||
|
MAX_DOMAIN_LENGTH = 253
|
||||||
|
|
||||||
extend Twitter::TwitterText::Extractor
|
extend Twitter::TwitterText::Extractor
|
||||||
|
|
||||||
module_function
|
module_function
|
||||||
|
@ -14,7 +16,12 @@ module Extractor
|
||||||
text.to_s.scan(Account::MENTION_RE) do |screen_name, _|
|
text.to_s.scan(Account::MENTION_RE) do |screen_name, _|
|
||||||
match_data = $LAST_MATCH_INFO
|
match_data = $LAST_MATCH_INFO
|
||||||
after = $'
|
after = $'
|
||||||
|
|
||||||
unless Twitter::TwitterText::Regex[:end_mention_match].match?(after)
|
unless Twitter::TwitterText::Regex[:end_mention_match].match?(after)
|
||||||
|
username, domain = screen_name.split('@')
|
||||||
|
|
||||||
|
next if domain.present? && domain.length > MAX_DOMAIN_LENGTH
|
||||||
|
|
||||||
start_position = match_data.char_begin(1) - 1
|
start_position = match_data.char_begin(1) - 1
|
||||||
end_position = match_data.char_end(1)
|
end_position = match_data.char_end(1)
|
||||||
possible_entries << {
|
possible_entries << {
|
||||||
|
|
|
@ -3,35 +3,57 @@
|
||||||
class StatusLengthValidator < ActiveModel::Validator
|
class StatusLengthValidator < ActiveModel::Validator
|
||||||
MAX_CHARS = 500
|
MAX_CHARS = 500
|
||||||
URL_PLACEHOLDER_CHARS = 23
|
URL_PLACEHOLDER_CHARS = 23
|
||||||
URL_PLACEHOLDER = "\1#{'x' * URL_PLACEHOLDER_CHARS}"
|
URL_PLACEHOLDER = 'x' * 23
|
||||||
|
|
||||||
def validate(status)
|
def validate(status)
|
||||||
return unless status.local? && !status.reblog?
|
return unless status.local? && !status.reblog?
|
||||||
|
|
||||||
@status = status
|
status.errors.add(:text, I18n.t('statuses.over_character_limit', max: MAX_CHARS)) if too_long?(status)
|
||||||
status.errors.add(:text, I18n.t('statuses.over_character_limit', max: MAX_CHARS)) if too_long?
|
|
||||||
end
|
end
|
||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
def too_long?
|
def too_long?(status)
|
||||||
countable_length > MAX_CHARS
|
countable_length(combined_text(status)) > MAX_CHARS
|
||||||
end
|
end
|
||||||
|
|
||||||
def countable_length
|
def countable_length(str)
|
||||||
total_text.mb_chars.grapheme_length
|
str.mb_chars.grapheme_length
|
||||||
end
|
end
|
||||||
|
|
||||||
def total_text
|
def combined_text(status)
|
||||||
[@status.spoiler_text, countable_text].join
|
[status.spoiler_text, countable_text(status.text)].join
|
||||||
end
|
end
|
||||||
|
|
||||||
def countable_text
|
def countable_text(str)
|
||||||
return '' if @status.text.nil?
|
return '' if str.blank?
|
||||||
|
|
||||||
@status.text.dup.tap do |new_text|
|
# To ensure that we only give length concessions to entities that
|
||||||
new_text.gsub!(FetchLinkCardService::URL_PATTERN, URL_PLACEHOLDER)
|
# will be correctly parsed during formatting, we go through full
|
||||||
new_text.gsub!(Account::MENTION_RE, '@\2')
|
# entity extraction
|
||||||
|
|
||||||
|
entities = Extractor.remove_overlapping_entities(Extractor.extract_urls_with_indices(str, extract_url_without_protocol: false) + Extractor.extract_mentions_or_lists_with_indices(str))
|
||||||
|
|
||||||
|
rewrite_entities(str, entities) do |entity|
|
||||||
|
if entity[:url]
|
||||||
|
URL_PLACEHOLDER
|
||||||
|
elsif entity[:screen_name]
|
||||||
|
"@#{entity[:screen_name].split('@').first}"
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def rewrite_entities(str, entities)
|
||||||
|
entities.sort_by! { |entity| entity[:indices].first }
|
||||||
|
result = ''.dup
|
||||||
|
|
||||||
|
last_index = entities.reduce(0) do |index, entity|
|
||||||
|
result << str[index...entity[:indices].first]
|
||||||
|
result << yield(entity)
|
||||||
|
entity[:indices].last
|
||||||
|
end
|
||||||
|
|
||||||
|
result << str[last_index..-1]
|
||||||
|
result
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -50,6 +50,13 @@ describe StatusLengthValidator do
|
||||||
expect(status.errors).to have_received(:add)
|
expect(status.errors).to have_received(:add)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it 'does not count overly long URLs as 23 characters flat' do
|
||||||
|
text = "http://example.com/valid?#{'#foo?' * 1000}"
|
||||||
|
status = double(spoiler_text: '', text: text, errors: double(add: nil), local?: true, reblog?: false)
|
||||||
|
subject.validate(status)
|
||||||
|
expect(status.errors).to have_received(:add)
|
||||||
|
end
|
||||||
|
|
||||||
it 'counts only the front part of remote usernames' do
|
it 'counts only the front part of remote usernames' do
|
||||||
text = ('a' * 475) + " @alice@#{'b' * 30}.com"
|
text = ('a' * 475) + " @alice@#{'b' * 30}.com"
|
||||||
status = double(spoiler_text: '', text: text, errors: double(add: nil), local?: true, reblog?: false)
|
status = double(spoiler_text: '', text: text, errors: double(add: nil), local?: true, reblog?: false)
|
||||||
|
@ -57,5 +64,13 @@ describe StatusLengthValidator do
|
||||||
subject.validate(status)
|
subject.validate(status)
|
||||||
expect(status.errors).to_not have_received(:add)
|
expect(status.errors).to_not have_received(:add)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it 'does count both parts of remote usernames for overly long domains' do
|
||||||
|
text = "@alice@#{'b' * 500}.com"
|
||||||
|
status = double(spoiler_text: '', text: text, errors: double(add: nil), local?: true, reblog?: false)
|
||||||
|
|
||||||
|
subject.validate(status)
|
||||||
|
expect(status.errors).to have_received(:add)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue