2017-05-23 04:42:19 +08:00
|
|
|
require_dependency "onebox/discourse_onebox_sanitize_config"
|
|
|
|
require_dependency 'final_destination'
|
|
|
|
|
2016-10-25 07:25:44 +08:00
|
|
|
Dir["#{Rails.root}/lib/onebox/engine/*_onebox.rb"].sort.each { |f| require f }
|
2014-02-26 02:35:08 +08:00
|
|
|
|
2013-02-06 03:16:51 +08:00
|
|
|
module Oneboxer
|
2013-04-30 10:43:21 +08:00
|
|
|
# keep reloaders happy
|
|
|
|
unless defined? Oneboxer::Result
|
|
|
|
Result = Struct.new(:doc, :changed) do
|
|
|
|
def to_html
|
|
|
|
doc.to_html
|
|
|
|
end
|
2013-04-10 15:52:38 +08:00
|
|
|
|
2013-04-30 10:43:21 +08:00
|
|
|
def changed?
|
|
|
|
changed
|
|
|
|
end
|
2013-04-10 15:52:38 +08:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2014-01-28 04:09:09 +08:00
|
|
|
def self.preview(url, options=nil)
|
|
|
|
options ||= {}
|
2016-12-20 07:31:10 +08:00
|
|
|
invalidate(url) if options[:invalidate_oneboxes]
|
2014-04-01 12:29:14 +08:00
|
|
|
onebox_raw(url)[:preview]
|
2013-02-06 03:16:51 +08:00
|
|
|
end
|
|
|
|
|
2014-01-28 04:09:09 +08:00
|
|
|
def self.onebox(url, options=nil)
|
|
|
|
options ||= {}
|
2016-12-20 07:31:10 +08:00
|
|
|
invalidate(url) if options[:invalidate_oneboxes]
|
2014-04-01 12:29:14 +08:00
|
|
|
onebox_raw(url)[:onebox]
|
2014-03-18 10:12:58 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
def self.cached_onebox(url)
|
2014-04-01 12:29:14 +08:00
|
|
|
if c = Rails.cache.read(onebox_cache_key(url))
|
|
|
|
c[:onebox]
|
|
|
|
end
|
2014-05-28 15:15:10 +08:00
|
|
|
rescue => e
|
|
|
|
invalidate(url)
|
|
|
|
Rails.logger.warn("invalid cached onebox for #{url} #{e}")
|
|
|
|
""
|
2014-03-18 10:12:58 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
def self.cached_preview(url)
|
2014-04-01 12:29:14 +08:00
|
|
|
if c = Rails.cache.read(onebox_cache_key(url))
|
|
|
|
c[:preview]
|
|
|
|
end
|
2014-05-28 15:15:10 +08:00
|
|
|
rescue => e
|
|
|
|
invalidate(url)
|
|
|
|
Rails.logger.warn("invalid cached preview for #{url} #{e}")
|
|
|
|
""
|
2013-08-14 23:05:53 +08:00
|
|
|
end
|
|
|
|
|
2014-01-29 02:18:19 +08:00
|
|
|
def self.invalidate(url)
|
2014-03-18 10:12:58 +08:00
|
|
|
Rails.cache.delete(onebox_cache_key(url))
|
2013-02-06 03:16:51 +08:00
|
|
|
end
|
2014-01-29 02:18:19 +08:00
|
|
|
|
2013-02-06 03:16:51 +08:00
|
|
|
# Parse URLs out of HTML, returning the document when finished.
|
|
|
|
def self.each_onebox_link(string_or_doc)
|
|
|
|
doc = string_or_doc
|
2013-04-10 15:52:38 +08:00
|
|
|
doc = Nokogiri::HTML::fragment(doc) if doc.is_a?(String)
|
2013-02-06 03:16:51 +08:00
|
|
|
|
|
|
|
onebox_links = doc.search("a.onebox")
|
|
|
|
if onebox_links.present?
|
|
|
|
onebox_links.each do |link|
|
2016-11-04 05:48:32 +08:00
|
|
|
yield(link['href'], link) if link['href'].present?
|
2013-02-06 03:16:51 +08:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
doc
|
|
|
|
end
|
|
|
|
|
2015-09-25 18:14:38 +08:00
|
|
|
def self.append_source_topic_id(url, topic_id)
|
|
|
|
# hack urls to create proper expansions
|
|
|
|
if url =~ Regexp.new("^#{Discourse.base_url.gsub(".","\\.")}.*$", true)
|
|
|
|
uri = URI.parse(url) rescue nil
|
|
|
|
if uri && uri.path
|
|
|
|
route = Rails.application.routes.recognize_path(uri.path) rescue nil
|
|
|
|
if route && route[:controller] == 'topics'
|
|
|
|
url += (url =~ /\?/ ? "&" : "?") + "source_topic_id=#{topic_id}"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
url
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.apply(string_or_doc, args=nil)
|
2013-04-10 15:52:38 +08:00
|
|
|
doc = string_or_doc
|
|
|
|
doc = Nokogiri::HTML::fragment(doc) if doc.is_a?(String)
|
|
|
|
changed = false
|
|
|
|
|
2016-12-20 07:31:10 +08:00
|
|
|
each_onebox_link(doc) do |url, element|
|
2015-09-25 18:14:38 +08:00
|
|
|
if args && args[:topic_id]
|
|
|
|
url = append_source_topic_id(url, args[:topic_id])
|
|
|
|
end
|
2015-09-22 10:42:13 +08:00
|
|
|
onebox, _preview = yield(url,element)
|
2013-04-10 15:52:38 +08:00
|
|
|
if onebox
|
|
|
|
parsed_onebox = Nokogiri::HTML::fragment(onebox)
|
2013-05-01 14:37:27 +08:00
|
|
|
next unless parsed_onebox.children.count > 0
|
2013-04-10 15:52:38 +08:00
|
|
|
|
|
|
|
# special logic to strip empty p elements
|
2013-05-01 14:37:27 +08:00
|
|
|
if element.parent &&
|
2015-09-22 10:42:13 +08:00
|
|
|
element.parent.node_name &&
|
2013-05-01 14:37:27 +08:00
|
|
|
element.parent.node_name.downcase == "p" &&
|
2014-07-05 04:09:51 +08:00
|
|
|
element.parent.children.count == 1
|
2013-05-01 14:37:27 +08:00
|
|
|
element = element.parent
|
2013-04-10 15:52:38 +08:00
|
|
|
end
|
|
|
|
changed = true
|
|
|
|
element.swap parsed_onebox.to_html
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
Result.new(doc, changed)
|
|
|
|
end
|
|
|
|
|
2016-12-20 07:31:10 +08:00
|
|
|
def self.is_previewing?(user_id)
|
|
|
|
$redis.get(preview_key(user_id)) == "1"
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.preview_onebox!(user_id)
|
|
|
|
$redis.setex(preview_key(user_id), 1.minute, "1")
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.onebox_previewed!(user_id)
|
|
|
|
$redis.del(preview_key(user_id))
|
|
|
|
end
|
|
|
|
|
2017-01-06 10:01:14 +08:00
|
|
|
def self.engine(url)
|
|
|
|
Onebox::Matcher.new(url).oneboxed
|
|
|
|
end
|
|
|
|
|
2014-03-18 10:12:58 +08:00
|
|
|
private
|
|
|
|
|
2016-12-20 07:31:10 +08:00
|
|
|
def self.preview_key(user_id)
|
2016-12-20 18:18:47 +08:00
|
|
|
"onebox:preview:#{user_id}"
|
2016-12-20 07:31:10 +08:00
|
|
|
end
|
|
|
|
|
2016-10-24 18:46:22 +08:00
|
|
|
def self.blank_onebox
|
|
|
|
{ preview: "", onebox: "" }
|
2014-04-10 04:57:45 +08:00
|
|
|
end
|
|
|
|
|
2016-10-24 18:46:22 +08:00
|
|
|
def self.onebox_cache_key(url)
|
|
|
|
"onebox__#{url}"
|
|
|
|
end
|
2015-08-24 08:43:07 +08:00
|
|
|
|
2016-10-24 18:46:22 +08:00
|
|
|
def self.onebox_raw(url)
|
2017-05-23 04:42:19 +08:00
|
|
|
|
2016-10-24 18:46:22 +08:00
|
|
|
Rails.cache.fetch(onebox_cache_key(url), expires_in: 1.day) do
|
2017-06-07 03:02:11 +08:00
|
|
|
fd = FinalDestination.new(url)
|
|
|
|
uri = fd.resolve
|
2016-10-24 18:46:22 +08:00
|
|
|
return blank_onebox if uri.blank? || SiteSetting.onebox_domains_blacklist.include?(uri.hostname)
|
2017-06-07 03:02:11 +08:00
|
|
|
options = {
|
|
|
|
cache: {},
|
|
|
|
max_width: 695,
|
|
|
|
sanitize_config: Sanitize::Config::DISCOURSE_ONEBOX
|
|
|
|
}
|
|
|
|
|
|
|
|
options[:cookie] = fd.cookie if fd.cookie
|
|
|
|
|
2017-06-07 04:39:15 +08:00
|
|
|
r = Onebox.preview(uri.to_s, options)
|
2016-10-24 18:46:22 +08:00
|
|
|
{ onebox: r.to_s, preview: r.try(:placeholder_html).to_s }
|
|
|
|
end
|
|
|
|
rescue => e
|
|
|
|
# no point warning here, just cause we have an issue oneboxing a url
|
|
|
|
# we can later hunt for failed oneboxes by searching logs if needed
|
|
|
|
Rails.logger.info("Failed to onebox #{url} #{e} #{e.backtrace}")
|
|
|
|
# return a blank hash, so rest of the code works
|
|
|
|
blank_onebox
|
|
|
|
end
|
2014-03-18 10:12:58 +08:00
|
|
|
|
2013-02-06 03:16:51 +08:00
|
|
|
end
|