2016-11-15 15:56:29 +00:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2016-09-26 14:42:38 +00:00
|
|
|
class FetchAtomService < BaseService
|
2017-04-27 12:42:22 +00:00
|
|
|
include HttpHelper
|
|
|
|
|
2016-09-26 14:42:38 +00:00
|
|
|
def call(url)
|
2017-02-12 16:28:15 +00:00
|
|
|
return if url.blank?
|
|
|
|
|
2016-09-26 14:42:38 +00:00
|
|
|
response = http_client.head(url)
|
|
|
|
|
|
|
|
Rails.logger.debug "Remote status HEAD request returned code #{response.code}"
|
2016-11-08 18:09:22 +00:00
|
|
|
|
|
|
|
response = http_client.get(url) if response.code == 405
|
|
|
|
|
|
|
|
Rails.logger.debug "Remote status GET request returned code #{response.code}"
|
|
|
|
|
2016-09-26 14:42:38 +00:00
|
|
|
return nil if response.code != 200
|
2016-11-15 15:56:29 +00:00
|
|
|
return [url, fetch(url)] if response.mime_type == 'application/atom+xml'
|
2017-06-08 11:24:28 +00:00
|
|
|
return process_headers(url, response) if response['Link'].present?
|
2016-11-15 15:56:29 +00:00
|
|
|
process_html(fetch(url))
|
2016-10-05 11:26:44 +00:00
|
|
|
rescue OpenSSL::SSL::SSLError => e
|
|
|
|
Rails.logger.debug "SSL error: #{e}"
|
2017-06-29 11:04:07 +00:00
|
|
|
nil
|
|
|
|
rescue HTTP::ConnectionError => e
|
|
|
|
Rails.logger.debug "HTTP ConnectionError: #{e}"
|
|
|
|
nil
|
2016-09-26 14:42:38 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
def process_html(body)
|
2016-09-29 19:28:21 +00:00
|
|
|
Rails.logger.debug 'Processing HTML'
|
2016-09-26 14:42:38 +00:00
|
|
|
|
|
|
|
page = Nokogiri::HTML(body)
|
|
|
|
alternate_link = page.xpath('//link[@rel="alternate"]').find { |link| link['type'] == 'application/atom+xml' }
|
|
|
|
|
|
|
|
return nil if alternate_link.nil?
|
2016-11-15 15:56:29 +00:00
|
|
|
[alternate_link['href'], fetch(alternate_link['href'])]
|
2016-09-26 14:42:38 +00:00
|
|
|
end
|
|
|
|
|
2016-09-26 15:04:05 +00:00
|
|
|
def process_headers(url, response)
|
2016-09-29 19:28:21 +00:00
|
|
|
Rails.logger.debug 'Processing link header'
|
2016-09-26 14:42:38 +00:00
|
|
|
|
2016-09-27 21:49:12 +00:00
|
|
|
link_header = LinkHeader.parse(response['Link'].is_a?(Array) ? response['Link'].first : response['Link'])
|
2016-11-15 15:56:29 +00:00
|
|
|
alternate_link = link_header.find_link(%w(rel alternate), %w(type application/atom+xml))
|
2016-09-26 14:42:38 +00:00
|
|
|
|
2016-09-26 15:04:05 +00:00
|
|
|
return process_html(fetch(url)) if alternate_link.nil?
|
2016-11-15 15:56:29 +00:00
|
|
|
[alternate_link.href, fetch(alternate_link.href)]
|
2016-09-26 14:42:38 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
def fetch(url)
|
|
|
|
http_client.get(url).to_s
|
|
|
|
end
|
|
|
|
end
|