mirror of
https://github.com/discourse/discourse.git
synced 2024-11-25 00:43:24 +08:00
394cd43d77
Scrubbing an ASCII-8BIT string isn't ever going to remove anything, because there's no code point that isn't valid 8-bit ASCII. Since we'd really prefer it if everything were UTF-8 anyway, we'll just assume, for now, that whatever comes out of SimpleRSS is probably UTF-8, and just nuke anything that isn't a valid UTF-8 codepoint. Of course, the *real* bug here is that SimpleRSS [unilaterally converts everything to ASCII-8BIT](https://github.com/cardmagic/simple-rss/issues/15). It's presumably *far* too much to ask that it detects the encoding of the source RSS feed and marks the parsed strings with the correct encoding...
137 lines
2.8 KiB
Ruby
137 lines
2.8 KiB
Ruby
#
|
|
# Creates and Updates Topics based on an RSS or ATOM feed.
|
|
#
|
|
require 'digest/sha1'
|
|
require_dependency 'post_creator'
|
|
require_dependency 'post_revisor'
|
|
require 'open-uri'
|
|
|
|
module Jobs
|
|
class PollFeed < Jobs::Scheduled
|
|
every 1.hour
|
|
|
|
sidekiq_options retry: false
|
|
|
|
def execute(args)
|
|
poll_feed if SiteSetting.feed_polling_enabled? &&
|
|
SiteSetting.feed_polling_url.present?
|
|
end
|
|
|
|
def feed_key
|
|
@feed_key ||= "feed-modified:#{Digest::SHA1.hexdigest(SiteSetting.feed_polling_url)}"
|
|
end
|
|
|
|
def poll_feed
|
|
feed = Feed.new
|
|
import_topics(feed.topics)
|
|
end
|
|
|
|
private
|
|
|
|
def import_topics(feed_topics)
|
|
feed_topics.each do |topic|
|
|
import_topic(topic)
|
|
end
|
|
end
|
|
|
|
def import_topic(topic)
|
|
if topic.user
|
|
TopicEmbed.import(topic.user, topic.url, topic.title, CGI.unescapeHTML(topic.content.scrub))
|
|
end
|
|
end
|
|
|
|
class Feed
|
|
require 'simple-rss'
|
|
|
|
if SiteSetting.embed_username_key_from_feed.present?
|
|
SimpleRSS.item_tags << SiteSetting.embed_username_key_from_feed.to_sym
|
|
end
|
|
|
|
def initialize
|
|
@feed_url = SiteSetting.feed_polling_url
|
|
@feed_url = "http://#{@feed_url}" if @feed_url !~ /^https?\:\/\//
|
|
end
|
|
|
|
def topics
|
|
feed_topics = []
|
|
|
|
rss.items.each do |i|
|
|
current_feed_topic = FeedTopic.new(i)
|
|
feed_topics << current_feed_topic if current_feed_topic.content
|
|
end
|
|
|
|
return feed_topics
|
|
end
|
|
|
|
private
|
|
|
|
def rss
|
|
SimpleRSS.parse open(@feed_url, allow_redirections: :all)
|
|
end
|
|
|
|
end
|
|
|
|
class FeedTopic
|
|
def initialize(article_rss_item)
|
|
@article_rss_item = article_rss_item
|
|
end
|
|
|
|
def url
|
|
link = @article_rss_item.link
|
|
if url?(link)
|
|
return link
|
|
else
|
|
return @article_rss_item.id
|
|
end
|
|
end
|
|
|
|
def content
|
|
@article_rss_item.content.try(:force_encoding, "UTF-8").try(:scrub) || @article_rss_item.description.try(:force_encoding, "UTF-8").try(:scrub)
|
|
end
|
|
|
|
def title
|
|
@article_rss_item.title.force_encoding("UTF-8").scrub
|
|
end
|
|
|
|
def user
|
|
author_user || default_user
|
|
end
|
|
|
|
private
|
|
|
|
def url?(link)
|
|
if link.blank? || link !~ /^https?\:\/\//
|
|
return false
|
|
else
|
|
return true
|
|
end
|
|
end
|
|
|
|
def author_username
|
|
begin
|
|
@article_rss_item.send(SiteSetting.embed_username_key_from_feed.to_sym)
|
|
rescue
|
|
nil
|
|
end
|
|
end
|
|
|
|
def default_user
|
|
find_user(SiteSetting.embed_by_username.downcase)
|
|
end
|
|
|
|
def author_user
|
|
return nil if !author_username.present?
|
|
|
|
find_user(author_username)
|
|
end
|
|
|
|
def find_user(user_name)
|
|
User.where(username_lower: user_name).first
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|