FEATURE: Additional control of iframes in oneboxes (#10523)

This commit adds a new site setting "allowed_onebox_iframes". By default, all onebox iframes are allowed. When the list of domains is restricted, Onebox will automatically skip engines which require those domains, and use a fallback engine.
This commit is contained in:
David Taylor 2020-08-27 20:12:13 +01:00 committed by GitHub
parent c172f2068d
commit a3577435f7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 84 additions and 48 deletions

View File

@ -242,7 +242,7 @@ GEM
omniauth-twitter (1.4.0) omniauth-twitter (1.4.0)
omniauth-oauth (~> 1.1) omniauth-oauth (~> 1.1)
rack rack
onebox (2.0.2) onebox (2.1.0)
addressable (~> 2.7.0) addressable (~> 2.7.0)
htmlentities (~> 4.3) htmlentities (~> 4.3)
multi_json (~> 1.11) multi_json (~> 1.11)

View File

@ -1553,6 +1553,7 @@ en:
use_admin_ip_allowlist: "Admins can only log in if they are at an IP address defined in the Screened IPs list (Admin > Logs > Screened Ips)." use_admin_ip_allowlist: "Admins can only log in if they are at an IP address defined in the Screened IPs list (Admin > Logs > Screened Ips)."
blocked_ip_blocks: "A list of private IP blocks that should never be crawled by Discourse" blocked_ip_blocks: "A list of private IP blocks that should never be crawled by Discourse"
allowed_internal_hosts: "A list of internal hosts that discourse can safely crawl for oneboxing and other purposes" allowed_internal_hosts: "A list of internal hosts that discourse can safely crawl for oneboxing and other purposes"
allowed_onebox_iframes: "A list of iframe src domains which are allowed via Onebox embeds. `*` will allow all default Onebox engines."
allowed_iframes: "A list of iframe src domain prefixes that discourse can safely allow in posts" allowed_iframes: "A list of iframe src domain prefixes that discourse can safely allow in posts"
allowed_crawler_user_agents: "User agents of web crawlers that should be allowed to access the site. WARNING! SETTING THIS WILL DISALLOW ALL CRAWLERS NOT LISTED HERE!" allowed_crawler_user_agents: "User agents of web crawlers that should be allowed to access the site. WARNING! SETTING THIS WILL DISALLOW ALL CRAWLERS NOT LISTED HERE!"
blocked_crawler_user_agents: "Unique case insensitive word in the user agent string identifying web crawlers that should not be allowed to access the site. Does not apply if allowlist is defined." blocked_crawler_user_agents: "Unique case insensitive word in the user agent string identifying web crawlers that should not be allowed to access the site. Does not apply if allowlist is defined."

View File

@ -1482,6 +1482,11 @@ security:
allowed_internal_hosts: allowed_internal_hosts:
default: "" default: ""
type: list type: list
allowed_onebox_iframes:
default: "*"
type: list
allow_any: false
choices: "['*'] + Onebox::Engine.all_iframe_origins"
allowed_iframes: allowed_iframes:
default: "https://www.google.com/maps/embed?|https://www.openstreetmap.org/export/embed.html?|https://calendar.google.com/calendar/embed?|https://codepen.io/" default: "https://www.google.com/maps/embed?|https://www.openstreetmap.org/export/embed.html?|https://calendar.google.com/calendar/embed?|https://codepen.io/"
type: list type: list

View File

@ -16,24 +16,6 @@ module Onebox
Float::INFINITY Float::INFINITY
end end
private
# overwrite to allowlist iframes
def is_embedded?
return false unless data[:html] && data[:height]
return true if AllowlistedGenericOnebox.html_providers.include?(data[:provider_name])
if data[:html]["iframe"]
fragment = Nokogiri::HTML5::fragment(data[:html])
if iframe = fragment.at_css("iframe")
src = iframe["src"]
return src.present? && SiteSetting.allowed_iframes.split("|").any? { |url| src.start_with?(url) }
end
end
false
end
end end
end end
end end

View File

@ -138,7 +138,9 @@ module Oneboxer
end end
def self.engine(url) def self.engine(url)
Onebox::Matcher.new(url).oneboxed Onebox::Matcher.new(url, {
allowed_iframe_regexes: Onebox::Engine.origins_to_regexes(allowed_iframe_origins)
}).oneboxed
end end
def self.recently_failed?(url) def self.recently_failed?(url)
@ -300,6 +302,14 @@ module Oneboxer
@preserve_fragment_url_hosts ||= ['http://github.com'] @preserve_fragment_url_hosts ||= ['http://github.com']
end end
def self.allowed_iframe_origins
allowed = SiteSetting.allowed_onebox_iframes.split("|")
if allowed.include?("*")
allowed = Onebox::Engine.all_iframe_origins
end
allowed += SiteSetting.allowed_iframes.split("|")
end
def self.external_onebox(url) def self.external_onebox(url)
Discourse.cache.fetch(onebox_cache_key(url), expires_in: 1.day) do Discourse.cache.fetch(onebox_cache_key(url), expires_in: 1.day) do
fd = FinalDestination.new(url, fd = FinalDestination.new(url,
@ -314,6 +324,7 @@ module Oneboxer
options = { options = {
max_width: 695, max_width: 695,
sanitize_config: Onebox::DiscourseOneboxSanitizeConfig::Config::DISCOURSE_ONEBOX, sanitize_config: Onebox::DiscourseOneboxSanitizeConfig::Config::DISCOURSE_ONEBOX,
allowed_iframe_origins: allowed_iframe_origins,
hostname: GlobalSetting.hostname, hostname: GlobalSetting.hostname,
} }

View File

@ -18,32 +18,4 @@ describe Onebox::Engine::AllowlistedGenericOnebox do
end end
it "allowlists iframes" do
allowlisted_body = '<html><head><link rel="alternate" type="application/json+oembed" href="https://allowlist.ed/iframes.json" />'
blocklisted_body = '<html><head><link rel="alternate" type="application/json+oembed" href="https://blocklist.ed/iframes.json" />'
allowlisted_oembed = {
type: "rich",
height: "100",
html: "<iframe src='https://ifram.es/foo/bar'></iframe>"
}
blocklisted_oembed = {
type: "rich",
height: "100",
html: "<iframe src='https://malicious/discourse.org/'></iframe>"
}
stub_request(:get, "https://blocklist.ed/iframes").to_return(status: 200, body: blocklisted_body)
stub_request(:get, "https://blocklist.ed/iframes.json").to_return(status: 200, body: blocklisted_oembed.to_json)
stub_request(:get, "https://allowlist.ed/iframes").to_return(status: 200, body: allowlisted_body)
stub_request(:get, "https://allowlist.ed/iframes.json").to_return(status: 200, body: allowlisted_oembed.to_json)
SiteSetting.allowed_iframes = "discourse.org|https://ifram.es"
expect(Onebox.preview("https://blocklist.ed/iframes").to_s).to be_empty
expect(Onebox.preview("https://allowlist.ed/iframes").to_s).to match("iframe src")
end
end end

View File

@ -183,4 +183,69 @@ describe Oneboxer do
expect(Oneboxer.preview(url, invalidate_oneboxes: true)).to be_present expect(Oneboxer.preview(url, invalidate_oneboxes: true)).to be_present
end end
context "with youtube stub" do
let(:html) do
<<~HTML
<html>
<head>
<meta property="og:title" content="Onebox1">
<meta property="og:description" content="this is bodycontent">
</head>
<body>
<p>body</p>
</body>
<html>
HTML
end
before do
stub_request(:any, "https://www.youtube.com/watch?v=dQw4w9WgXcQ").to_return(status: 200, body: html)
end
it "allows restricting engines based on the allowed_onebox_iframes setting" do
output = Oneboxer.onebox("https://www.youtube.com/watch?v=dQw4w9WgXcQ", invalidate_oneboxes: true)
expect(output).to include("<iframe") # Regular youtube onebox
# Disable all onebox iframes:
SiteSetting.allowed_onebox_iframes = ""
output = Oneboxer.onebox("https://www.youtube.com/watch?v=dQw4w9WgXcQ", invalidate_oneboxes: true)
expect(output).not_to include("<iframe") # Generic onebox
expect(output).to include("allowlistedgeneric")
# Just enable youtube:
SiteSetting.allowed_onebox_iframes = "https://www.youtube.com"
output = Oneboxer.onebox("https://www.youtube.com/watch?v=dQw4w9WgXcQ", invalidate_oneboxes: true)
expect(output).to include("<iframe") # Regular youtube onebox
end
end
it "allows iframes from generic sites via the allowed_iframes setting" do
allowlisted_body = '<html><head><link rel="alternate" type="application/json+oembed" href="https://allowlist.ed/iframes.json" />'
blocklisted_body = '<html><head><link rel="alternate" type="application/json+oembed" href="https://blocklist.ed/iframes.json" />'
allowlisted_oembed = {
type: "rich",
height: "100",
html: "<iframe src='https://ifram.es/foo/bar'></iframe>"
}
blocklisted_oembed = {
type: "rich",
height: "100",
html: "<iframe src='https://malicious/discourse.org/'></iframe>"
}
stub_request(:any, "https://blocklist.ed/iframes").to_return(status: 200, body: blocklisted_body)
stub_request(:any, "https://blocklist.ed/iframes.json").to_return(status: 200, body: blocklisted_oembed.to_json)
stub_request(:any, "https://allowlist.ed/iframes").to_return(status: 200, body: allowlisted_body)
stub_request(:any, "https://allowlist.ed/iframes.json").to_return(status: 200, body: allowlisted_oembed.to_json)
SiteSetting.allowed_iframes = "discourse.org|https://ifram.es"
expect(Oneboxer.onebox("https://blocklist.ed/iframes", invalidate_oneboxes: true)).to be_empty
expect(Oneboxer.onebox("https://allowlist.ed/iframes", invalidate_oneboxes: true)).to match("iframe src")
end
end end