2021-05-26 17:41:35 +08:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2022-06-13 23:32:34 +08:00
|
|
|
require 'cgi'
|
|
|
|
require 'onebox/normalizer'
|
|
|
|
require 'onebox/open_graph'
|
2021-05-26 17:41:35 +08:00
|
|
|
require 'onebox/oembed'
|
2022-06-13 23:32:34 +08:00
|
|
|
require 'onebox/json_ld'
|
2021-05-26 17:41:35 +08:00
|
|
|
|
|
|
|
module Onebox
|
|
|
|
module Engine
|
|
|
|
module StandardEmbed
|
|
|
|
def self.oembed_providers
|
|
|
|
@@oembed_providers ||= {}
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.add_oembed_provider(regexp, endpoint)
|
|
|
|
oembed_providers[regexp] = endpoint
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.opengraph_providers
|
|
|
|
@@opengraph_providers ||= []
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.add_opengraph_provider(regexp)
|
|
|
|
opengraph_providers << regexp
|
|
|
|
end
|
|
|
|
|
|
|
|
# Some oembed providers (like meetup.com) don't provide links to themselves
|
|
|
|
add_oembed_provider(/www\.meetup\.com\//, 'http://api.meetup.com/oembed')
|
|
|
|
add_oembed_provider(/www\.mixcloud\.com\//, 'https://www.mixcloud.com/oembed/')
|
|
|
|
# In order to support Private Videos
|
|
|
|
add_oembed_provider(/vimeo\.com\//, 'https://vimeo.com/api/oembed.json')
|
|
|
|
# NYT requires login so use oembed only
|
|
|
|
add_oembed_provider(/nytimes\.com\//, 'https://www.nytimes.com/svc/oembed/json/')
|
|
|
|
|
|
|
|
def always_https?
|
|
|
|
AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts) || super
|
|
|
|
end
|
|
|
|
|
|
|
|
def raw
|
|
|
|
return @raw if defined?(@raw)
|
|
|
|
|
|
|
|
@raw = {}
|
|
|
|
|
2022-06-13 23:32:34 +08:00
|
|
|
set_opengraph_data_on_raw
|
|
|
|
set_twitter_data_on_raw
|
|
|
|
set_oembed_data_on_raw
|
|
|
|
set_json_ld_data_on_raw
|
|
|
|
set_favicon_data_on_raw
|
|
|
|
set_description_on_raw
|
2021-05-26 17:41:35 +08:00
|
|
|
|
|
|
|
@raw
|
|
|
|
end
|
|
|
|
|
|
|
|
protected
|
|
|
|
|
|
|
|
def html_doc
|
|
|
|
return @html_doc if defined?(@html_doc)
|
|
|
|
|
|
|
|
headers = nil
|
|
|
|
headers = { 'Cookie' => options[:cookie] } if options[:cookie]
|
|
|
|
|
|
|
|
@html_doc = Onebox::Helpers.fetch_html_doc(url, headers)
|
|
|
|
end
|
|
|
|
|
|
|
|
def get_oembed
|
|
|
|
@oembed ||= Onebox::Oembed.new(get_json_response)
|
|
|
|
end
|
|
|
|
|
|
|
|
def get_opengraph
|
|
|
|
@opengraph ||= ::Onebox::OpenGraph.new(html_doc)
|
|
|
|
end
|
|
|
|
|
|
|
|
def get_twitter
|
|
|
|
return {} unless html_doc
|
|
|
|
|
|
|
|
twitter = {}
|
|
|
|
|
|
|
|
html_doc.css('meta').each do |m|
|
|
|
|
if (m["property"] && m["property"][/^twitter:(.+)$/i]) || (m["name"] && m["name"][/^twitter:(.+)$/i])
|
|
|
|
value = (m["content"] || m["value"]).to_s
|
|
|
|
twitter[$1.tr('-:' , '_').to_sym] ||= value unless (Onebox::Helpers::blank?(value) || value == "0 minutes")
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
twitter
|
|
|
|
end
|
|
|
|
|
|
|
|
def get_favicon
|
|
|
|
return nil unless html_doc
|
|
|
|
|
|
|
|
favicon = html_doc.css('link[rel="shortcut icon"], link[rel="icon shortcut"], link[rel="shortcut"], link[rel="icon"]').first
|
|
|
|
favicon = favicon.nil? ? nil : (favicon['href'].nil? ? nil : favicon['href'].strip)
|
|
|
|
|
|
|
|
Onebox::Helpers::get_absolute_image_url(favicon, url)
|
|
|
|
end
|
|
|
|
|
2021-12-18 08:36:54 +08:00
|
|
|
def get_description
|
|
|
|
return nil unless html_doc
|
|
|
|
|
|
|
|
description = html_doc.at("meta[name='description']").to_h['content']
|
|
|
|
description ||= html_doc.at("meta[name='Description']").to_h['content']
|
|
|
|
|
|
|
|
description
|
|
|
|
end
|
|
|
|
|
2021-05-26 17:41:35 +08:00
|
|
|
def get_json_response
|
|
|
|
oembed_url = get_oembed_url
|
|
|
|
|
|
|
|
return "{}" if Onebox::Helpers.blank?(oembed_url)
|
|
|
|
|
|
|
|
Onebox::Helpers.fetch_response(oembed_url) rescue "{}"
|
|
|
|
rescue Errno::ECONNREFUSED, Net::HTTPError, Net::HTTPFatalError, MultiJson::LoadError
|
|
|
|
"{}"
|
|
|
|
end
|
|
|
|
|
|
|
|
def get_oembed_url
|
|
|
|
oembed_url = nil
|
|
|
|
|
|
|
|
StandardEmbed.oembed_providers.each do |regexp, endpoint|
|
|
|
|
if url =~ regexp
|
|
|
|
oembed_url = "#{endpoint}?url=#{url}"
|
|
|
|
break
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
if html_doc
|
|
|
|
if Onebox::Helpers.blank?(oembed_url)
|
|
|
|
application_json = html_doc.at("//link[@type='application/json+oembed']/@href")
|
|
|
|
oembed_url = application_json.value if application_json
|
|
|
|
end
|
|
|
|
|
|
|
|
if Onebox::Helpers.blank?(oembed_url)
|
|
|
|
text_json = html_doc.at("//link[@type='text/json+oembed']/@href")
|
|
|
|
oembed_url ||= text_json.value if text_json
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
oembed_url
|
|
|
|
end
|
2022-06-13 23:32:34 +08:00
|
|
|
|
|
|
|
def get_json_ld
|
|
|
|
@json_ld ||= Onebox::JsonLd.new(html_doc)
|
|
|
|
end
|
|
|
|
|
|
|
|
def set_from_normalizer_data(normalizer)
|
|
|
|
normalizer.data.each do |k, v|
|
|
|
|
v = normalizer.send(k)
|
|
|
|
@raw[k] ||= v unless v.nil?
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def set_opengraph_data_on_raw
|
|
|
|
og = get_opengraph
|
|
|
|
set_from_normalizer_data(og)
|
|
|
|
@raw.except!(:title_attr)
|
|
|
|
end
|
|
|
|
|
|
|
|
def set_twitter_data_on_raw
|
|
|
|
twitter = get_twitter
|
|
|
|
twitter.each { |k, v| @raw[k] ||= v unless Onebox::Helpers::blank?(v) }
|
|
|
|
end
|
|
|
|
|
|
|
|
def set_oembed_data_on_raw
|
|
|
|
oembed = get_oembed
|
|
|
|
set_from_normalizer_data(oembed)
|
|
|
|
end
|
|
|
|
|
|
|
|
def set_json_ld_data_on_raw
|
|
|
|
json_ld = get_json_ld
|
|
|
|
set_from_normalizer_data(json_ld)
|
|
|
|
end
|
|
|
|
|
|
|
|
def set_favicon_data_on_raw
|
|
|
|
favicon = get_favicon
|
|
|
|
@raw[:favicon] = favicon unless Onebox::Helpers::blank?(favicon)
|
|
|
|
end
|
|
|
|
|
|
|
|
def set_description_on_raw
|
|
|
|
unless @raw[:description]
|
|
|
|
description = get_description
|
|
|
|
@raw[:description] = description unless Onebox::Helpers::blank?(description)
|
|
|
|
end
|
|
|
|
end
|
2021-05-26 17:41:35 +08:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|