2019-05-03 06:17:27 +08:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2014-01-01 03:37:43 +08:00
|
|
|
#
|
|
|
|
# Creates and Updates Topics based on an RSS or ATOM feed.
|
|
|
|
#
|
|
|
|
require 'digest/sha1'
|
2017-12-06 07:45:09 +08:00
|
|
|
require 'excon'
|
|
|
|
require_dependency 'final_destination'
|
2014-01-01 03:37:43 +08:00
|
|
|
require_dependency 'post_creator'
|
|
|
|
require_dependency 'post_revisor'
|
2018-07-28 01:41:53 +08:00
|
|
|
require_dependency 'encodings'
|
2014-01-01 03:37:43 +08:00
|
|
|
|
|
|
|
module Jobs
|
|
|
|
class PollFeed < Jobs::Scheduled
|
2017-05-11 02:28:16 +08:00
|
|
|
every 5.minutes
|
2014-02-06 07:14:41 +08:00
|
|
|
|
2014-01-01 03:37:43 +08:00
|
|
|
sidekiq_options retry: false
|
|
|
|
|
|
|
|
def execute(args)
|
|
|
|
poll_feed if SiteSetting.feed_polling_enabled? &&
|
2017-05-11 02:28:16 +08:00
|
|
|
SiteSetting.feed_polling_url.present? &&
|
|
|
|
not_polled_recently?
|
2014-01-01 03:37:43 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
def feed_key
|
2017-05-23 04:26:18 +08:00
|
|
|
"feed-modified:#{Digest::SHA1.hexdigest(SiteSetting.feed_polling_url)}"
|
2014-01-01 03:37:43 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
def poll_feed
|
2018-02-15 10:11:22 +08:00
|
|
|
ensure_rss_loaded
|
|
|
|
# defer loading rss
|
2014-04-25 00:48:45 +08:00
|
|
|
feed = Feed.new
|
|
|
|
import_topics(feed.topics)
|
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
2018-02-15 10:30:34 +08:00
|
|
|
@@rss_loaded = false
|
|
|
|
|
2018-02-15 10:11:22 +08:00
|
|
|
# rss lib is very expensive memory wise, no need to load it till it is needed
|
|
|
|
def ensure_rss_loaded
|
|
|
|
return if @@rss_loaded
|
|
|
|
require 'rss'
|
|
|
|
require_dependency 'feed_item_accessor'
|
|
|
|
require_dependency 'feed_element_installer'
|
2018-02-15 10:13:31 +08:00
|
|
|
@@rss_loaded = true
|
2018-02-15 10:11:22 +08:00
|
|
|
end
|
|
|
|
|
2017-05-11 02:28:16 +08:00
|
|
|
def not_polled_recently?
|
|
|
|
$redis.set(
|
|
|
|
'feed-polled-recently',
|
|
|
|
"1",
|
|
|
|
ex: SiteSetting.feed_polling_frequency_mins.minutes - 10.seconds,
|
|
|
|
nx: true
|
|
|
|
)
|
|
|
|
end
|
|
|
|
|
2014-04-25 00:48:45 +08:00
|
|
|
def import_topics(feed_topics)
|
|
|
|
feed_topics.each do |topic|
|
|
|
|
import_topic(topic)
|
|
|
|
end
|
|
|
|
end
|
2014-01-01 03:37:43 +08:00
|
|
|
|
2014-04-25 00:48:45 +08:00
|
|
|
def import_topic(topic)
|
|
|
|
if topic.user
|
2017-12-06 07:45:09 +08:00
|
|
|
TopicEmbed.import(topic.user, topic.url, topic.title, CGI.unescapeHTML(topic.content))
|
2014-04-25 00:48:45 +08:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
class Feed
|
|
|
|
def initialize
|
|
|
|
@feed_url = SiteSetting.feed_polling_url
|
2014-08-20 05:51:14 +08:00
|
|
|
@feed_url = "http://#{@feed_url}" if @feed_url !~ /^https?\:\/\//
|
2014-04-25 00:48:45 +08:00
|
|
|
end
|
2014-01-01 03:37:43 +08:00
|
|
|
|
2014-04-25 00:48:45 +08:00
|
|
|
def topics
|
|
|
|
feed_topics = []
|
|
|
|
|
2017-12-06 07:45:09 +08:00
|
|
|
rss = parsed_feed
|
2016-12-06 01:29:14 +08:00
|
|
|
return feed_topics unless rss.present?
|
|
|
|
|
2014-04-25 00:48:45 +08:00
|
|
|
rss.items.each do |i|
|
|
|
|
current_feed_topic = FeedTopic.new(i)
|
|
|
|
feed_topics << current_feed_topic if current_feed_topic.content
|
2014-01-03 03:29:27 +08:00
|
|
|
end
|
2014-04-25 00:48:45 +08:00
|
|
|
|
|
|
|
return feed_topics
|
2014-01-01 03:37:43 +08:00
|
|
|
end
|
2014-04-25 00:48:45 +08:00
|
|
|
|
|
|
|
private
|
|
|
|
|
2017-12-06 07:45:09 +08:00
|
|
|
def parsed_feed
|
2018-07-28 01:43:20 +08:00
|
|
|
raw_feed, encoding = fetch_rss
|
2018-08-11 00:37:14 +08:00
|
|
|
return nil if raw_feed.nil?
|
|
|
|
|
2018-07-28 01:43:20 +08:00
|
|
|
encoded_feed = Encodings.try_utf8(raw_feed, encoding) if encoding
|
2018-07-31 18:02:18 +08:00
|
|
|
encoded_feed = Encodings.to_utf8(raw_feed) unless encoded_feed
|
2018-07-28 01:41:53 +08:00
|
|
|
|
|
|
|
return nil if encoded_feed.blank?
|
2017-12-06 07:45:09 +08:00
|
|
|
|
|
|
|
if SiteSetting.embed_username_key_from_feed.present?
|
2018-07-28 01:41:53 +08:00
|
|
|
FeedElementInstaller.install(SiteSetting.embed_username_key_from_feed, encoded_feed)
|
2017-12-06 07:45:09 +08:00
|
|
|
end
|
|
|
|
|
2018-07-28 01:41:53 +08:00
|
|
|
RSS::Parser.parse(encoded_feed)
|
2017-12-06 07:45:09 +08:00
|
|
|
rescue RSS::NotWellFormedError, RSS::InvalidRSSError
|
2016-12-06 01:29:14 +08:00
|
|
|
nil
|
2014-04-25 00:48:45 +08:00
|
|
|
end
|
|
|
|
|
2017-12-06 07:45:09 +08:00
|
|
|
def fetch_rss
|
|
|
|
final_destination = FinalDestination.new(@feed_url, verbose: true)
|
|
|
|
feed_final_url = final_destination.resolve
|
|
|
|
return nil unless final_destination.status == :resolved
|
|
|
|
|
2018-07-28 01:43:20 +08:00
|
|
|
response = Excon.new(feed_final_url.to_s).request(method: :get, expects: 200)
|
|
|
|
[response.body, detect_charset(response)]
|
2017-12-06 07:45:09 +08:00
|
|
|
rescue Excon::Error::HTTPStatus
|
|
|
|
nil
|
|
|
|
end
|
2018-07-28 01:43:20 +08:00
|
|
|
|
|
|
|
def detect_charset(response)
|
|
|
|
if response.headers['Content-Type'] =~ /charset\s*=\s*([a-z0-9\-]+)/i
|
|
|
|
Encoding.find($1)
|
|
|
|
end
|
|
|
|
rescue ArgumentError
|
|
|
|
nil
|
|
|
|
end
|
2014-04-25 00:48:45 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
class FeedTopic
|
|
|
|
def initialize(article_rss_item)
|
2017-12-06 07:45:09 +08:00
|
|
|
@accessor = FeedItemAccessor.new(article_rss_item)
|
2014-04-25 00:48:45 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
def url
|
2017-12-06 07:45:09 +08:00
|
|
|
link = @accessor.link
|
2014-04-25 00:48:45 +08:00
|
|
|
if url?(link)
|
|
|
|
return link
|
|
|
|
else
|
2017-12-06 07:45:09 +08:00
|
|
|
return @accessor.element_content(:id)
|
2014-04-25 00:48:45 +08:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def content
|
2017-12-06 07:45:09 +08:00
|
|
|
content = nil
|
|
|
|
|
|
|
|
%i[content_encoded content description].each do |content_element_name|
|
|
|
|
content ||= @accessor.element_content(content_element_name)
|
|
|
|
end
|
|
|
|
|
|
|
|
content&.force_encoding('UTF-8')&.scrub
|
2014-04-25 00:48:45 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
def title
|
2017-12-06 07:45:09 +08:00
|
|
|
@accessor.element_content(:title).force_encoding('UTF-8').scrub
|
2014-04-25 00:48:45 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
def user
|
|
|
|
author_user || default_user
|
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
def url?(link)
|
|
|
|
if link.blank? || link !~ /^https?\:\/\//
|
|
|
|
return false
|
|
|
|
else
|
|
|
|
return true
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def author_username
|
2017-12-06 07:45:09 +08:00
|
|
|
@accessor.element_content(SiteSetting.embed_username_key_from_feed.sub(':', '_'))
|
2014-04-25 00:48:45 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
def default_user
|
|
|
|
find_user(SiteSetting.embed_by_username.downcase)
|
|
|
|
end
|
|
|
|
|
|
|
|
def author_user
|
|
|
|
return nil if !author_username.present?
|
|
|
|
|
|
|
|
find_user(author_username)
|
|
|
|
end
|
|
|
|
|
|
|
|
def find_user(user_name)
|
|
|
|
User.where(username_lower: user_name).first
|
|
|
|
end
|
2014-01-01 03:37:43 +08:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|