# frozen_string_literal: true require 'cgi' require 'onebox/normalizer' require 'onebox/open_graph' require 'onebox/oembed' require 'onebox/json_ld' module Onebox module Engine module StandardEmbed def self.oembed_providers @@oembed_providers ||= {} end def self.add_oembed_provider(regexp, endpoint) oembed_providers[regexp] = endpoint end def self.opengraph_providers @@opengraph_providers ||= [] end def self.add_opengraph_provider(regexp) opengraph_providers << regexp end # Some oembed providers (like meetup.com) don't provide links to themselves add_oembed_provider(/www\.meetup\.com\//, 'http://api.meetup.com/oembed') add_oembed_provider(/www\.mixcloud\.com\//, 'https://www.mixcloud.com/oembed/') # In order to support Private Videos add_oembed_provider(/vimeo\.com\//, 'https://vimeo.com/api/oembed.json') # NYT requires login so use oembed only add_oembed_provider(/nytimes\.com\//, 'https://www.nytimes.com/svc/oembed/json/') def always_https? AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts) || super end def raw return @raw if defined?(@raw) @raw = {} set_opengraph_data_on_raw set_twitter_data_on_raw set_oembed_data_on_raw set_json_ld_data_on_raw set_favicon_data_on_raw set_description_on_raw @raw end protected def html_doc return @html_doc if defined?(@html_doc) headers = nil headers = { 'Cookie' => options[:cookie] } if options[:cookie] @html_doc = Onebox::Helpers.fetch_html_doc(url, headers) end def get_oembed @oembed ||= Onebox::Oembed.new(get_json_response) end def get_opengraph @opengraph ||= ::Onebox::OpenGraph.new(html_doc) end def get_twitter return {} unless html_doc twitter = {} html_doc.css('meta').each do |m| if (m["property"] && m["property"][/^twitter:(.+)$/i]) || (m["name"] && m["name"][/^twitter:(.+)$/i]) value = (m["content"] || m["value"]).to_s twitter[$1.tr('-:' , '_').to_sym] ||= value unless (Onebox::Helpers::blank?(value) || value == "0 minutes") end end twitter end def get_favicon return nil unless html_doc favicon = html_doc.css('link[rel="shortcut icon"], link[rel="icon shortcut"], link[rel="shortcut"], link[rel="icon"]').first favicon = favicon.nil? ? nil : (favicon['href'].nil? ? nil : favicon['href'].strip) Onebox::Helpers::get_absolute_image_url(favicon, url) end def get_description return nil unless html_doc description = html_doc.at("meta[name='description']").to_h['content'] description ||= html_doc.at("meta[name='Description']").to_h['content'] description end def get_json_response oembed_url = get_oembed_url return "{}" if Onebox::Helpers.blank?(oembed_url) Onebox::Helpers.fetch_response(oembed_url) rescue "{}" rescue Errno::ECONNREFUSED, Net::HTTPError, Net::HTTPFatalError, MultiJson::LoadError "{}" end def get_oembed_url oembed_url = nil StandardEmbed.oembed_providers.each do |regexp, endpoint| if url =~ regexp oembed_url = "#{endpoint}?url=#{url}" break end end if html_doc if Onebox::Helpers.blank?(oembed_url) application_json = html_doc.at("//link[@type='application/json+oembed']/@href") oembed_url = application_json.value if application_json end if Onebox::Helpers.blank?(oembed_url) text_json = html_doc.at("//link[@type='text/json+oembed']/@href") oembed_url ||= text_json.value if text_json end end oembed_url end def get_json_ld @json_ld ||= Onebox::JsonLd.new(html_doc) end def set_from_normalizer_data(normalizer) normalizer.data.each do |k, v| v = normalizer.send(k) @raw[k] ||= v unless v.nil? end end def set_opengraph_data_on_raw og = get_opengraph set_from_normalizer_data(og) @raw.except!(:title_attr) end def set_twitter_data_on_raw twitter = get_twitter twitter.each { |k, v| @raw[k] ||= v unless Onebox::Helpers::blank?(v) } end def set_oembed_data_on_raw oembed = get_oembed set_from_normalizer_data(oembed) end def set_json_ld_data_on_raw json_ld = get_json_ld set_from_normalizer_data(json_ld) end def set_favicon_data_on_raw favicon = get_favicon @raw[:favicon] = favicon unless Onebox::Helpers::blank?(favicon) end def set_description_on_raw unless @raw[:description] description = get_description @raw[:description] = description unless Onebox::Helpers::blank?(description) end end end end end