Fix being able to post URLs longer than 4096 characters

2022-03-22 09:51:46 +01:00 · 2022-03-22 09:51:46 +01:00 · cc706b1191
commit cc706b1191
parent ec1bb2a4fe
3 changed files with 58 additions and 14 deletions
--- a/app/lib/extractor.rb
+++ b/app/lib/extractor.rb
@ -1,6 +1,8 @@
 # frozen_string_literal: true
 module Extractor
  MAX_DOMAIN_LENGTH = 253
  extend Twitter::TwitterText::Extractor
  module_function
@ -14,7 +16,12 @@ module Extractor
    text.to_s.scan(Account::MENTION_RE) do |screen_name, _|
      match_data = $LAST_MATCH_INFO
      after = $'
      unless Twitter::TwitterText::Regex[:end_mention_match].match?(after)
        username, domain = screen_name.split('@')
        next if domain.present? && domain.length > MAX_DOMAIN_LENGTH
        start_position = match_data.char_begin(1) - 1
        end_position = match_data.char_end(1)
        possible_entries << {
--- a/app/validators/status_length_validator.rb
+++ b/app/validators/status_length_validator.rb
@ -3,35 +3,57 @@
 class StatusLengthValidator < ActiveModel::Validator
  MAX_CHARS = 500
  URL_PLACEHOLDER_CHARS = 23
-  URL_PLACEHOLDER = "\1#{'x' * URL_PLACEHOLDER_CHARS}"
+  URL_PLACEHOLDER = 'x' * 23
  def validate(status)
    return unless status.local? && !status.reblog?
-    @status = status
+    status.errors.add(:text, I18n.t('statuses.over_character_limit', max: MAX_CHARS)) if too_long?(status)
    status.errors.add(:text, I18n.t('statuses.over_character_limit', max: MAX_CHARS)) if too_long?
  end
  private
-  def too_long?
+  def too_long?(status)
-    countable_length > MAX_CHARS
+    countable_length(combined_text(status)) > MAX_CHARS
  end
-  def countable_length
+  def countable_length(str)
-    total_text.mb_chars.grapheme_length
+    str.mb_chars.grapheme_length
  end
-  def total_text
+  def combined_text(status)
-    [@status.spoiler_text, countable_text].join
+    [status.spoiler_text, countable_text(status.text)].join
  end
-  def countable_text
+  def countable_text(str)
-    return '' if @status.text.nil?
+    return '' if str.blank?
-    @status.text.dup.tap do |new_text|
+    # To ensure that we only give length concessions to entities that
-      new_text.gsub!(FetchLinkCardService::URL_PATTERN, URL_PLACEHOLDER)
+    # will be correctly parsed during formatting, we go through full
-      new_text.gsub!(Account::MENTION_RE, '@\2')
+    # entity extraction
    entities = Extractor.remove_overlapping_entities(Extractor.extract_urls_with_indices(str, extract_url_without_protocol: false) + Extractor.extract_mentions_or_lists_with_indices(str))
    rewrite_entities(str, entities) do |entity|
      if entity[:url]
        URL_PLACEHOLDER
      elsif entity[:screen_name]
        "@#{entity[:screen_name].split('@').first}"
      end
    end
  end
  def rewrite_entities(str, entities)
    entities.sort_by! { |entity| entity[:indices].first }
    result = ''.dup
    last_index = entities.reduce(0) do |index, entity|
      result << str[index...entity[:indices].first]
      result << yield(entity)
      entity[:indices].last
    end
    result << str[last_index..-1]
    result
  end
 end
--- a/spec/validators/status_length_validator_spec.rb
+++ b/spec/validators/status_length_validator_spec.rb
@ -50,6 +50,13 @@ describe StatusLengthValidator do
      expect(status.errors).to have_received(:add)
    end
    it 'does not count overly long URLs as 23 characters flat' do
      text = "http://example.com/valid?#{'#foo?' * 1000}"
      status = double(spoiler_text: '', text: text, errors: double(add: nil), local?: true, reblog?: false)
      subject.validate(status)
      expect(status.errors).to have_received(:add)
    end
    it 'counts only the front part of remote usernames' do
      text   = ('a' * 475) + " @alice@#{'b' * 30}.com"
      status = double(spoiler_text: '', text: text, errors: double(add: nil), local?: true, reblog?: false)
@ -57,5 +64,13 @@ describe StatusLengthValidator do
      subject.validate(status)
      expect(status.errors).to_not have_received(:add)
    end
    it 'does count both parts of remote usernames for overly long domains' do
      text   = "@alice@#{'b' * 500}.com"
      status = double(spoiler_text: '', text: text, errors: double(add: nil), local?: true, reblog?: false)
      subject.validate(status)
      expect(status.errors).to have_received(:add)
    end
  end
 end