# frozen_string_literal: true require 'uri' Dir["#{Rails.root}/lib/onebox/engine/*_onebox.rb"].sort.each { |f| require f } module Oneboxer ONEBOX_CSS_CLASS = "onebox" AUDIO_REGEX = /^\.(mp3|og[ga]|opus|wav|m4[abpr]|aac|flac)$/i VIDEO_REGEX = /^\.(mov|mp4|webm|m4v|3gp|ogv|avi|mpeg|ogv)$/i # keep reloaders happy unless defined? Oneboxer::Result Result = Struct.new(:doc, :changed) do def to_html doc.to_html end def changed? changed end end end def self.ignore_redirects @ignore_redirects ||= ['http://www.dropbox.com', 'http://store.steampowered.com', 'http://vimeo.com', 'https://www.youtube.com', Discourse.base_url] end def self.amazon_domains amazon_suffixes = %w(com com.br ca cn fr de in it co.jp com.mx nl pl sa sg es se com.tr ae co.uk) amazon_suffixes.collect { |suffix| "https://www.amazon.#{suffix}" } end def self.force_get_hosts hosts = ['http://us.battle.net', 'https://news.yahoo.com'] hosts += SiteSetting.cache_onebox_response_body_domains.split('|').collect { |domain| "https://www.#{domain}" } hosts += amazon_domains hosts.uniq end def self.force_custom_user_agent_hosts SiteSetting.force_custom_user_agent_hosts.split('|') end def self.allowed_post_types @allowed_post_types ||= [Post.types[:regular], Post.types[:moderator_action]] end def self.preview(url, options = nil) options ||= {} invalidate(url) if options[:invalidate_oneboxes] onebox_raw(url, options)[:preview] end def self.onebox(url, options = nil) options ||= {} invalidate(url) if options[:invalidate_oneboxes] onebox_raw(url, options)[:onebox] end def self.cached_onebox(url) if c = Discourse.cache.read(onebox_cache_key(url)) c[:onebox] end rescue => e invalidate(url) Rails.logger.warn("invalid cached onebox for #{url} #{e}") "" end def self.cached_preview(url) if c = Discourse.cache.read(onebox_cache_key(url)) c[:preview] end rescue => e invalidate(url) Rails.logger.warn("invalid cached preview for #{url} #{e}") "" end def self.invalidate(url) Discourse.cache.delete(onebox_cache_key(url)) Discourse.cache.delete(onebox_failed_cache_key(url)) end def self.cache_response_body?(uri) uri = URI.parse(uri) if uri.is_a?(String) if SiteSetting.cache_onebox_response_body? SiteSetting.cache_onebox_response_body_domains.split("|").any? { |domain| uri.hostname.ends_with?(domain) } end end def self.cache_response_body(uri, response) key = redis_cached_response_body_key(uri) Discourse.redis.without_namespace.setex(key, 1.minutes.to_i, response) end def self.cached_response_body_exists?(uri) key = redis_cached_response_body_key(uri) Discourse.redis.without_namespace.exists(key).to_i > 0 end def self.fetch_cached_response_body(uri) key = redis_cached_response_body_key(uri) Discourse.redis.without_namespace.get(key) end def self.redis_cached_response_body_key(uri) "CACHED_RESPONSE_#{uri}" end # Parse URLs out of HTML, returning the document when finished. def self.each_onebox_link(string_or_doc, extra_paths: []) doc = string_or_doc doc = Nokogiri::HTML5::fragment(doc) if doc.is_a?(String) onebox_links = doc.css("a.#{ONEBOX_CSS_CLASS}", *extra_paths) if onebox_links.present? onebox_links.each do |link| yield(link['href'], link) if link['href'].present? end end doc end HTML5_BLOCK_ELEMENTS ||= %w{address article aside blockquote canvas center dd div dl dt fieldset figcaption figure footer form h1 h2 h3 h4 h5 h6 header hgroup hr li main nav noscript ol output p pre section table tfoot ul video} def self.apply(string_or_doc, extra_paths: nil) doc = string_or_doc doc = Nokogiri::HTML5::fragment(doc) if doc.is_a?(String) changed = false each_onebox_link(doc, extra_paths: extra_paths) do |url, element| onebox, _ = yield(url, element) next if onebox.blank? parsed_onebox = Nokogiri::HTML5::fragment(onebox) next if parsed_onebox.children.blank? changed = true parent = element.parent if parent&.node_name&.downcase == "p" && parsed_onebox.children.any? { |child| HTML5_BLOCK_ELEMENTS.include?(child.node_name.downcase) } siblings = parent.children element_idx = siblings.find_index(element) before_idx = first_significant_element_index(siblings, element_idx - 1, -1) after_idx = first_significant_element_index(siblings, element_idx + 1, +1) if before_idx < 0 && after_idx >= siblings.size parent.replace parsed_onebox elsif before_idx < 0 parent.children = siblings[after_idx..siblings.size] parent.add_previous_sibling(parsed_onebox) elsif after_idx >= siblings.size parent.children = siblings[0..before_idx] parent.add_next_sibling(parsed_onebox) else parent_rest = parent.dup parent.children = siblings[0..before_idx] parent_rest.children = siblings[after_idx..siblings.size] parent.add_next_sibling(parent_rest) parent.add_next_sibling(parsed_onebox) end else element.replace parsed_onebox end end Result.new(doc, changed) end def self.first_significant_element_index(elements, index, step) while index >= 0 && index < elements.size && (elements[index].node_name.downcase == "br" || (elements[index].node_name.downcase == "text" && elements[index].to_html.strip.blank?)) index = index + step end index end def self.is_previewing?(user_id) Discourse.redis.get(preview_key(user_id)) == "1" end def self.preview_onebox!(user_id) Discourse.redis.setex(preview_key(user_id), 1.minute, "1") end def self.onebox_previewed!(user_id) Discourse.redis.del(preview_key(user_id)) end def self.engine(url) Onebox::Matcher.new(url, { allowed_iframe_regexes: Onebox::Engine.origins_to_regexes(allowed_iframe_origins) }).oneboxed end def self.recently_failed?(url) Discourse.cache.read(onebox_failed_cache_key(url)).present? end def self.cache_failed!(url) Discourse.cache.write(onebox_failed_cache_key(url), true, expires_in: 1.hour) end private def self.preview_key(user_id) "onebox:preview:#{user_id}" end def self.blank_onebox { preview: "", onebox: "" } end def self.onebox_cache_key(url) "onebox__#{url}" end def self.onebox_failed_cache_key(url) "onebox_failed__#{url}" end def self.onebox_raw(url, opts = {}) url = UrlHelper.escape_uri(url).to_s local_onebox(url, opts) || external_onebox(url) rescue => e # no point warning here, just cause we have an issue oneboxing a url # we can later hunt for failed oneboxes by searching logs if needed Rails.logger.info("Failed to onebox #{url} #{e} #{e.backtrace}") # return a blank hash, so rest of the code works blank_onebox end def self.local_onebox(url, opts = {}) return unless route = Discourse.route_for(url) html = case route[:controller] when "uploads" then local_upload_html(url) when "topics" then local_topic_html(url, route, opts) when "users" then local_user_html(url, route) when "list" then local_category_html(url, route) end html = html.presence || "#{URI(url).to_s}" { onebox: html, preview: html } end def self.local_upload_html(url) additional_controls = \ if SiteSetting.disable_onebox_media_download_controls "controlslist='nodownload'" else "" end case File.extname(URI(url).path || "") when VIDEO_REGEX <<~HTML