discourse/app/jobs/scheduled/poll_feed.rb
Sam Saffron 30990006a9 DEV: enable frozen string literal on all files
This reduces chances of errors where consumers of strings mutate inputs
and reduces memory usage of the app.

Test suite passes now, but there may be some stuff left, so we will run
a few sites on a branch prior to merging
2019-05-13 09:31:32 +08:00

192 lines
4.5 KiB
Ruby

# frozen_string_literal: true
#
# Creates and Updates Topics based on an RSS or ATOM feed.
#
require 'digest/sha1'
require 'excon'
require_dependency 'final_destination'
require_dependency 'post_creator'
require_dependency 'post_revisor'
require_dependency 'encodings'
module Jobs
class PollFeed < Jobs::Scheduled
every 5.minutes
sidekiq_options retry: false
def execute(args)
poll_feed if SiteSetting.feed_polling_enabled? &&
SiteSetting.feed_polling_url.present? &&
not_polled_recently?
end
def feed_key
"feed-modified:#{Digest::SHA1.hexdigest(SiteSetting.feed_polling_url)}"
end
def poll_feed
ensure_rss_loaded
# defer loading rss
feed = Feed.new
import_topics(feed.topics)
end
private
@@rss_loaded = false
# rss lib is very expensive memory wise, no need to load it till it is needed
def ensure_rss_loaded
return if @@rss_loaded
require 'rss'
require_dependency 'feed_item_accessor'
require_dependency 'feed_element_installer'
@@rss_loaded = true
end
def not_polled_recently?
$redis.set(
'feed-polled-recently',
"1",
ex: SiteSetting.feed_polling_frequency_mins.minutes - 10.seconds,
nx: true
)
end
def import_topics(feed_topics)
feed_topics.each do |topic|
import_topic(topic)
end
end
def import_topic(topic)
if topic.user
TopicEmbed.import(topic.user, topic.url, topic.title, CGI.unescapeHTML(topic.content))
end
end
class Feed
def initialize
@feed_url = SiteSetting.feed_polling_url
@feed_url = "http://#{@feed_url}" if @feed_url !~ /^https?\:\/\//
end
def topics
feed_topics = []
rss = parsed_feed
return feed_topics unless rss.present?
rss.items.each do |i|
current_feed_topic = FeedTopic.new(i)
feed_topics << current_feed_topic if current_feed_topic.content
end
return feed_topics
end
private
def parsed_feed
raw_feed, encoding = fetch_rss
return nil if raw_feed.nil?
encoded_feed = Encodings.try_utf8(raw_feed, encoding) if encoding
encoded_feed = Encodings.to_utf8(raw_feed) unless encoded_feed
return nil if encoded_feed.blank?
if SiteSetting.embed_username_key_from_feed.present?
FeedElementInstaller.install(SiteSetting.embed_username_key_from_feed, encoded_feed)
end
RSS::Parser.parse(encoded_feed)
rescue RSS::NotWellFormedError, RSS::InvalidRSSError
nil
end
def fetch_rss
final_destination = FinalDestination.new(@feed_url, verbose: true)
feed_final_url = final_destination.resolve
return nil unless final_destination.status == :resolved
response = Excon.new(feed_final_url.to_s).request(method: :get, expects: 200)
[response.body, detect_charset(response)]
rescue Excon::Error::HTTPStatus
nil
end
def detect_charset(response)
if response.headers['Content-Type'] =~ /charset\s*=\s*([a-z0-9\-]+)/i
Encoding.find($1)
end
rescue ArgumentError
nil
end
end
class FeedTopic
def initialize(article_rss_item)
@accessor = FeedItemAccessor.new(article_rss_item)
end
def url
link = @accessor.link
if url?(link)
return link
else
return @accessor.element_content(:id)
end
end
def content
content = nil
%i[content_encoded content description].each do |content_element_name|
content ||= @accessor.element_content(content_element_name)
end
content&.force_encoding('UTF-8')&.scrub
end
def title
@accessor.element_content(:title).force_encoding('UTF-8').scrub
end
def user
author_user || default_user
end
private
def url?(link)
if link.blank? || link !~ /^https?\:\/\//
return false
else
return true
end
end
def author_username
@accessor.element_content(SiteSetting.embed_username_key_from_feed.sub(':', '_'))
end
def default_user
find_user(SiteSetting.embed_by_username.downcase)
end
def author_user
return nil if !author_username.present?
find_user(author_username)
end
def find_user(user_name)
User.where(username_lower: user_name).first
end
end
end
end