Enable to recognize most kinds of characters as URL paths (#4941)
This commit is contained in:
parent
b39d512ade
commit
3816943e6b
5 changed files with 96 additions and 5 deletions
|
@ -131,7 +131,7 @@ class Formatter
|
||||||
end
|
end
|
||||||
|
|
||||||
def link_html(url)
|
def link_html(url)
|
||||||
url = Addressable::URI.parse(url).display_uri.to_s
|
url = Addressable::URI.parse(url).to_s
|
||||||
prefix = url.match(/\Ahttps?:\/\/(www\.)?/).to_s
|
prefix = url.match(/\Ahttps?:\/\/(www\.)?/).to_s
|
||||||
text = url[prefix.length, 30]
|
text = url[prefix.length, 30]
|
||||||
suffix = url[prefix.length + 30..-1]
|
suffix = url[prefix.length + 30..-1]
|
||||||
|
|
|
@ -1,9 +1,15 @@
|
||||||
# frozen_string_literal: true
|
# frozen_string_literal: true
|
||||||
|
|
||||||
class FetchLinkCardService < BaseService
|
class FetchLinkCardService < BaseService
|
||||||
include ActionView::Helpers::TagHelper
|
URL_PATTERN = %r{
|
||||||
|
( # $1 URL
|
||||||
URL_PATTERN = %r{https?://\S+}
|
(https?:\/\/)? # $2 Protocol (optional)
|
||||||
|
(#{Twitter::Regex[:valid_domain]}) # $3 Domain(s)
|
||||||
|
(?::(#{Twitter::Regex[:valid_port_number]}))? # $4 Port number (optional)
|
||||||
|
(/#{Twitter::Regex[:valid_url_path]}*)? # $5 URL Path and anchor
|
||||||
|
(\?#{Twitter::Regex[:valid_url_query_chars]}*#{Twitter::Regex[:valid_url_query_ending_chars]})? # $6 Query String
|
||||||
|
)
|
||||||
|
}iox
|
||||||
|
|
||||||
def call(status)
|
def call(status)
|
||||||
@status = status
|
@status = status
|
||||||
|
@ -42,7 +48,7 @@ class FetchLinkCardService < BaseService
|
||||||
|
|
||||||
def parse_urls
|
def parse_urls
|
||||||
if @status.local?
|
if @status.local?
|
||||||
urls = @status.text.match(URL_PATTERN).to_a.map { |uri| Addressable::URI.parse(uri).normalize }
|
urls = @status.text.scan(URL_PATTERN).map { |array| Addressable::URI.parse(array[0]).normalize }
|
||||||
else
|
else
|
||||||
html = Nokogiri::HTML(@status.text)
|
html = Nokogiri::HTML(@status.text)
|
||||||
links = html.css('a')
|
links = html.css('a')
|
||||||
|
|
42
config/initializers/twitter_regex.rb
Normal file
42
config/initializers/twitter_regex.rb
Normal file
|
@ -0,0 +1,42 @@
|
||||||
|
module Twitter
|
||||||
|
class Regex
|
||||||
|
|
||||||
|
REGEXEN[:valid_general_url_path_chars] = /[^\p{White_Space}\(\)\?]/iou
|
||||||
|
REGEXEN[:valid_url_path_ending_chars] = /[^\p{White_Space}\(\)\?!\*';:=\,\.\$%\[\]\p{Pd}_~&\|@]|(?:#{REGEXEN[:valid_url_balanced_parens]})/iou
|
||||||
|
REGEXEN[:valid_url_balanced_parens] = /
|
||||||
|
\(
|
||||||
|
(?:
|
||||||
|
#{REGEXEN[:valid_general_url_path_chars]}+
|
||||||
|
|
|
||||||
|
# allow one nested level of balanced parentheses
|
||||||
|
(?:
|
||||||
|
#{REGEXEN[:valid_general_url_path_chars]}*
|
||||||
|
\(
|
||||||
|
#{REGEXEN[:valid_general_url_path_chars]}+
|
||||||
|
\)
|
||||||
|
#{REGEXEN[:valid_general_url_path_chars]}*
|
||||||
|
)
|
||||||
|
)
|
||||||
|
\)
|
||||||
|
/iox
|
||||||
|
REGEXEN[:valid_url_path] = /(?:
|
||||||
|
(?:
|
||||||
|
#{REGEXEN[:valid_general_url_path_chars]}*
|
||||||
|
(?:#{REGEXEN[:valid_url_balanced_parens]} #{REGEXEN[:valid_general_url_path_chars]}*)*
|
||||||
|
#{REGEXEN[:valid_url_path_ending_chars]}
|
||||||
|
)|(?:#{REGEXEN[:valid_general_url_path_chars]}+\/)
|
||||||
|
)/iox
|
||||||
|
REGEXEN[:valid_url] = %r{
|
||||||
|
( # $1 total match
|
||||||
|
(#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceeding chracter
|
||||||
|
( # $3 URL
|
||||||
|
(https?:\/\/)? # $4 Protocol (optional)
|
||||||
|
(#{REGEXEN[:valid_domain]}) # $5 Domain(s)
|
||||||
|
(?::(#{REGEXEN[:valid_port_number]}))? # $6 Port number (optional)
|
||||||
|
(/#{REGEXEN[:valid_url_path]}*)? # $7 URL Path and anchor
|
||||||
|
(\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $8 Query String
|
||||||
|
)
|
||||||
|
)
|
||||||
|
}iox
|
||||||
|
end
|
||||||
|
end
|
|
@ -89,6 +89,38 @@ RSpec.describe Formatter do
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
context 'matches a URL with Japanese path string' do
|
||||||
|
let(:text) { 'https://ja.wikipedia.org/wiki/日本' }
|
||||||
|
|
||||||
|
it 'has valid URL' do
|
||||||
|
is_expected.to include 'href="https://ja.wikipedia.org/wiki/%E6%97%A5%E6%9C%AC"'
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'matches a URL with Korean path string' do
|
||||||
|
let(:text) { 'https://ko.wikipedia.org/wiki/대한민국' }
|
||||||
|
|
||||||
|
it 'has valid URL' do
|
||||||
|
is_expected.to include 'href="https://ko.wikipedia.org/wiki/%EB%8C%80%ED%95%9C%EB%AF%BC%EA%B5%AD"'
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'matches a URL with Simplified Chinese path string' do
|
||||||
|
let(:text) { 'https://baike.baidu.com/item/中华人民共和国' }
|
||||||
|
|
||||||
|
it 'has valid URL' do
|
||||||
|
is_expected.to include 'href="https://baike.baidu.com/item/%E4%B8%AD%E5%8D%8E%E4%BA%BA%E6%B0%91%E5%85%B1%E5%92%8C%E5%9B%BD"'
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'matches a URL with Traditional Chinese path string' do
|
||||||
|
let(:text) { 'https://zh.wikipedia.org/wiki/臺灣' }
|
||||||
|
|
||||||
|
it 'has valid URL' do
|
||||||
|
is_expected.to include 'href="https://zh.wikipedia.org/wiki/%E8%87%BA%E7%81%A3"'
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
context 'contains HTML (script tag)' do
|
context 'contains HTML (script tag)' do
|
||||||
let(:text) { '<script>alert("Hello")</script>' }
|
let(:text) { '<script>alert("Hello")</script>' }
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,8 @@ RSpec.describe FetchLinkCardService do
|
||||||
stub_request(:get, 'http://example.com/sjis_with_wrong_charset').to_return(request_fixture('sjis_with_wrong_charset.txt'))
|
stub_request(:get, 'http://example.com/sjis_with_wrong_charset').to_return(request_fixture('sjis_with_wrong_charset.txt'))
|
||||||
stub_request(:head, 'http://example.com/koi8-r').to_return(status: 200, headers: { 'Content-Type' => 'text/html' })
|
stub_request(:head, 'http://example.com/koi8-r').to_return(status: 200, headers: { 'Content-Type' => 'text/html' })
|
||||||
stub_request(:get, 'http://example.com/koi8-r').to_return(request_fixture('koi8-r.txt'))
|
stub_request(:get, 'http://example.com/koi8-r').to_return(request_fixture('koi8-r.txt'))
|
||||||
|
stub_request(:head, 'http://example.com/日本語').to_return(status: 200, headers: { 'Content-Type' => 'text/html' })
|
||||||
|
stub_request(:get, 'http://example.com/日本語').to_return(request_fixture('sjis.txt'))
|
||||||
stub_request(:head, 'https://github.com/qbi/WannaCry').to_return(status: 404)
|
stub_request(:head, 'https://github.com/qbi/WannaCry').to_return(status: 404)
|
||||||
|
|
||||||
subject.call(status)
|
subject.call(status)
|
||||||
|
@ -52,6 +54,15 @@ RSpec.describe FetchLinkCardService do
|
||||||
expect(status.preview_cards.first.title).to eq("Московя начинаетъ только въ XVI ст. привлекать внимане иностранцевъ.")
|
expect(status.preview_cards.first.title).to eq("Московя начинаетъ только въ XVI ст. привлекать внимане иностранцевъ.")
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
context do
|
||||||
|
let(:status) { Fabricate(:status, text: 'テストhttp://example.com/日本語') }
|
||||||
|
|
||||||
|
it 'works with Japanese path string' do
|
||||||
|
expect(a_request(:get, 'http://example.com/日本語')).to have_been_made.at_least_once
|
||||||
|
expect(status.preview_cards.first.title).to eq("SJISのページ")
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
context 'in a remote status' do
|
context 'in a remote status' do
|
||||||
|
|
Loading…
Reference in a new issue