FIX: Respect blocked domains list when redirecting (#15656)

Our previous implementation used a simple `blocked_domain_array.include?(hostname)`
so some values were not matching. Additionally, in some configurations like ours, we'd used
"cat.*.dog.com" with the assumption we'd support globbing.

This change implicitly allows globbing by blocking "http://a.b.com" if "b.com" is a blocked 
domain but does not actively do anything for "*".

An upcoming change might include frontend validation for values that can be inserted.
This commit is contained in:
Natalie Tay 2022-01-20 14:12:34 +08:00 committed by GitHub
parent 191bdac4f0
commit f5ea00c73f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 73 additions and 9 deletions

View File

@ -51,7 +51,6 @@ class InlineOneboxer
always_allow = SiteSetting.enable_inline_onebox_on_all_domains
allowed_domains = SiteSetting.allowed_inline_onebox_domains&.split('|') unless always_allow
blocked_domains = SiteSetting.blocked_onebox_domains&.split('|')
if always_allow || allowed_domains
uri = begin
@ -62,7 +61,7 @@ class InlineOneboxer
if uri.present? &&
uri.hostname.present? &&
(always_allow || allowed_domains.include?(uri.hostname)) &&
!blocked_domains.include?(uri.hostname)
!domain_is_blocked?(uri.hostname)
title = RetrieveTitle.crawl(url)
title = nil if title && title.length < MIN_TITLE_LENGTH
return onebox_for(url, title, opts)
@ -74,6 +73,12 @@ class InlineOneboxer
private
def self.domain_is_blocked?(hostname)
SiteSetting.blocked_onebox_domains&.split('|').any? do |blocked|
hostname == blocked || hostname.end_with?(".#{blocked}")
end
end
def self.onebox_for(url, title, opts)
title = title && Emoji.gsub_emoji_to_unicode(title)
if title && opts[:post_number]

View File

@ -60,6 +60,10 @@ module RetrieveTitle
encoding = nil
fd.get do |_response, chunk, uri|
if (uri.present? && InlineOneboxer.domain_is_blocked?(uri.hostname))
throw :done
end
unless Net::HTTPRedirection === _response
if current
current << chunk

View File

@ -153,12 +153,6 @@ describe InlineOneboxer do
expect(onebox).to be_blank
end
it "will not crawl domains that are blocked" do
SiteSetting.blocked_onebox_domains = "eviltrout.com"
onebox = InlineOneboxer.lookup("https://eviltrout.com", skip_cache: true)
expect(onebox).to be_blank
end
it "will crawl anything if allowed to" do
SiteSetting.enable_inline_onebox_on_all_domains = true
@ -204,6 +198,38 @@ describe InlineOneboxer do
expect(onebox[:title]).to eq("Evil Trout's Blog")
end
end
describe "lookups for blocked domains in the hostname" do
shared_examples "blocks the domain" do |setting, domain_to_test|
it "does not retrieve title" do
SiteSetting.blocked_onebox_domains = setting
onebox = InlineOneboxer.lookup(domain_to_test, skip_cache: true)
expect(onebox).to be_blank
end
end
shared_examples "does not fulfil blocked domain" do |setting, domain_to_test|
it "retrieves title" do
SiteSetting.blocked_onebox_domains = setting
onebox = InlineOneboxer.lookup(domain_to_test, skip_cache: true)
expect(onebox).to be_present
end
end
include_examples "blocks the domain", "api.cat.org|kitten.cloud", "https://api.cat.org"
include_examples "blocks the domain", "api.cat.org|kitten.cloud", "http://kitten.cloud"
include_examples "blocks the domain", "kitten.cloud", "http://cat.kitten.cloud"
include_examples "blocks the domain", "api.cat.org", "https://api.cat.org/subdirectory/moar"
include_examples "blocks the domain", "kitten.cloud", "https://cat.kitten.cloud/subd"
include_examples "does not fulfil blocked domain", "kitten.cloud", "https://cat.2kitten.cloud"
include_examples "does not fulfil blocked domain", "kitten.cloud", "https://cat.kitten.cloud9"
include_examples "does not fulfil blocked domain", "api.cat.org", "https://api-cat.org"
end
end
end

View File

@ -100,6 +100,35 @@ describe RetrieveTitle do
IPSocket.stubs(:getaddress).returns('100.2.3.4')
expect(RetrieveTitle.crawl("http://foobar.com/amazing")).to eq("very amazing")
end
it "returns empty title if redirect uri is in blacklist" do
SiteSetting.blocked_onebox_domains = "wikipedia.com"
stub_request(:get, "http://foobar.com/amazing")
.to_return(status: 301, body: "", headers: { "location" => "https://wikipedia.com/amazing" })
stub_request(:get, "https://wikipedia.com/amazing")
.to_return(status: 200, body: "<html><title>very amazing</title>", headers: {})
IPSocket.stubs(:getaddress).returns('100.2.3.4')
expect(RetrieveTitle.crawl("http://foobar.com/amazing")).to eq(nil)
end
it "returns title if 'midway redirect' is blocked but final redirect uri is not blocked" do
SiteSetting.blocked_onebox_domains = "wikipedia.com"
stub_request(:get, "http://foobar.com/amazing")
.to_return(status: 301, body: "", headers: { "location" => "https://wikipedia.com/amazing" })
stub_request(:get, "https://wikipedia.com/amazing")
.to_return(status: 301, body: "", headers: { "location" => "https://cat.com/meow" })
stub_request(:get, "https://cat.com/meow")
.to_return(status: 200, body: "<html><title>very amazing</title>", headers: {})
IPSocket.stubs(:getaddress).returns('100.2.3.4')
expect(RetrieveTitle.crawl("http://foobar.com/amazing")).to eq("very amazing")
end
end
context 'fetch_title' do