2013-02-06 03:16:51 +08:00
|
|
|
require 'open-uri'
|
2013-03-22 01:11:54 +08:00
|
|
|
require 'digest/sha1'
|
2013-02-06 03:16:51 +08:00
|
|
|
|
|
|
|
require_dependency 'oneboxer/base'
|
|
|
|
require_dependency 'oneboxer/whitelist'
|
2013-02-26 00:42:20 +08:00
|
|
|
Dir["#{Rails.root}/lib/oneboxer/*_onebox.rb"].each {|f|
|
|
|
|
require_dependency(f.split('/')[-2..-1].join('/'))
|
2013-02-06 03:16:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
module Oneboxer
|
|
|
|
extend Oneboxer::Base
|
|
|
|
|
2013-03-21 23:47:01 +08:00
|
|
|
Dir["#{Rails.root}/lib/oneboxer/*_onebox.rb"].sort.each do |f|
|
2013-02-06 03:16:51 +08:00
|
|
|
add_onebox "Oneboxer::#{Pathname.new(f).basename.to_s.gsub(/\.rb$/, '').classify}".constantize
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.default_expiry
|
2013-03-22 01:11:54 +08:00
|
|
|
1.day
|
2013-02-06 03:16:51 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
# Return a oneboxer for a given URL
|
|
|
|
def self.onebox_for_url(url)
|
2013-03-21 23:47:01 +08:00
|
|
|
matchers.each do |matcher|
|
|
|
|
regexp = matcher.regexp
|
|
|
|
klass = matcher.klass
|
|
|
|
|
2013-02-06 13:22:11 +08:00
|
|
|
regexp = regexp.call if regexp.class == Proc
|
2013-03-21 23:47:01 +08:00
|
|
|
return klass.new(url) if url =~ regexp
|
2013-02-06 03:16:51 +08:00
|
|
|
end
|
|
|
|
nil
|
|
|
|
end
|
|
|
|
|
|
|
|
# Retrieve the onebox for a url without caching
|
|
|
|
def self.onebox_nocache(url)
|
|
|
|
oneboxer = onebox_for_url(url)
|
|
|
|
return oneboxer.onebox if oneboxer.present?
|
|
|
|
|
2013-02-20 00:46:36 +08:00
|
|
|
whitelist_entry = Whitelist.entry_for_url(url)
|
2013-02-06 03:16:51 +08:00
|
|
|
|
2013-02-20 00:46:36 +08:00
|
|
|
if whitelist_entry.present?
|
2013-02-06 03:16:51 +08:00
|
|
|
page_html = open(url).read
|
|
|
|
if page_html.present?
|
2013-02-12 22:46:45 +08:00
|
|
|
doc = Nokogiri::HTML(page_html)
|
2013-02-06 03:16:51 +08:00
|
|
|
|
2013-02-20 00:46:36 +08:00
|
|
|
if whitelist_entry.allows_oembed?
|
|
|
|
# See if if it has an oembed thing we can use
|
|
|
|
(doc/"link[@type='application/json+oembed']").each do |oembed|
|
|
|
|
return OembedOnebox.new(oembed[:href]).onebox
|
|
|
|
end
|
|
|
|
(doc/"link[@type='text/json+oembed']").each do |oembed|
|
2013-02-26 23:42:49 +08:00
|
|
|
return OembedOnebox.new(oembed[:href]).onebox
|
2013-02-20 00:46:36 +08:00
|
|
|
end
|
2013-02-17 17:10:17 +08:00
|
|
|
end
|
2013-02-06 03:16:51 +08:00
|
|
|
|
|
|
|
# Check for opengraph
|
|
|
|
open_graph = Oneboxer.parse_open_graph(doc)
|
|
|
|
return OpenGraphOnebox.new(url, open_graph).onebox if open_graph.present?
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2013-02-17 17:10:17 +08:00
|
|
|
nil
|
|
|
|
rescue OpenURI::HTTPError
|
|
|
|
nil
|
2013-02-06 03:16:51 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
# Parse URLs out of HTML, returning the document when finished.
|
|
|
|
def self.each_onebox_link(string_or_doc)
|
|
|
|
doc = string_or_doc
|
2013-02-12 22:46:45 +08:00
|
|
|
doc = Nokogiri::HTML(doc) if doc.is_a?(String)
|
2013-02-06 03:16:51 +08:00
|
|
|
|
|
|
|
onebox_links = doc.search("a.onebox")
|
|
|
|
if onebox_links.present?
|
|
|
|
onebox_links.each do |link|
|
|
|
|
if link['href'].present?
|
|
|
|
yield link['href'], link
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
doc
|
|
|
|
end
|
|
|
|
|
2013-03-22 01:11:54 +08:00
|
|
|
def self.cache_key_for(url)
|
|
|
|
"onebox:#{Digest::SHA1.hexdigest(url)}"
|
2013-02-06 03:16:51 +08:00
|
|
|
end
|
|
|
|
|
2013-03-22 01:11:54 +08:00
|
|
|
def self.render_from_cache(url)
|
|
|
|
Rails.cache.read(cache_key_for(url))
|
2013-02-06 03:16:51 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
# Cache results from a onebox call
|
|
|
|
def self.fetch_and_cache(url, args)
|
2013-03-22 01:11:54 +08:00
|
|
|
contents = onebox_nocache(url)
|
|
|
|
return nil if contents.blank?
|
2013-02-06 03:16:51 +08:00
|
|
|
|
2013-03-22 01:11:54 +08:00
|
|
|
Rails.cache.write(cache_key_for(url), contents, expires_in: default_expiry)
|
|
|
|
contents
|
2013-02-06 03:16:51 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
def self.invalidate(url)
|
2013-03-22 01:11:54 +08:00
|
|
|
Rails.cache.delete(cache_key_for(url))
|
2013-02-06 03:16:51 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
# Return the cooked content for a url, caching the result for performance
|
|
|
|
def self.onebox(url, args={})
|
|
|
|
|
2013-03-22 01:11:54 +08:00
|
|
|
if args[:invalidate_oneboxes]
|
2013-02-06 03:16:51 +08:00
|
|
|
# Remove the onebox from the cache
|
|
|
|
Oneboxer.invalidate(url)
|
|
|
|
else
|
2013-03-22 01:11:54 +08:00
|
|
|
contents = render_from_cache(url)
|
|
|
|
return contents if contents.present?
|
2013-02-06 03:16:51 +08:00
|
|
|
end
|
|
|
|
|
2013-03-22 01:11:54 +08:00
|
|
|
fetch_and_cache(url, args)
|
2013-02-06 03:16:51 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
end
|