mirror of
https://github.com/discourse/discourse.git
synced 2024-11-22 11:23:25 +08:00
FIX: Respect blocked domains list when redirecting (#15656)
Our previous implementation used a simple `blocked_domain_array.include?(hostname)` so some values were not matching. Additionally, in some configurations like ours, we'd used "cat.*.dog.com" with the assumption we'd support globbing. This change implicitly allows globbing by blocking "http://a.b.com" if "b.com" is a blocked domain but does not actively do anything for "*". An upcoming change might include frontend validation for values that can be inserted.
This commit is contained in:
parent
191bdac4f0
commit
f5ea00c73f
|
@ -51,7 +51,6 @@ class InlineOneboxer
|
|||
|
||||
always_allow = SiteSetting.enable_inline_onebox_on_all_domains
|
||||
allowed_domains = SiteSetting.allowed_inline_onebox_domains&.split('|') unless always_allow
|
||||
blocked_domains = SiteSetting.blocked_onebox_domains&.split('|')
|
||||
|
||||
if always_allow || allowed_domains
|
||||
uri = begin
|
||||
|
@ -62,7 +61,7 @@ class InlineOneboxer
|
|||
if uri.present? &&
|
||||
uri.hostname.present? &&
|
||||
(always_allow || allowed_domains.include?(uri.hostname)) &&
|
||||
!blocked_domains.include?(uri.hostname)
|
||||
!domain_is_blocked?(uri.hostname)
|
||||
title = RetrieveTitle.crawl(url)
|
||||
title = nil if title && title.length < MIN_TITLE_LENGTH
|
||||
return onebox_for(url, title, opts)
|
||||
|
@ -74,6 +73,12 @@ class InlineOneboxer
|
|||
|
||||
private
|
||||
|
||||
def self.domain_is_blocked?(hostname)
|
||||
SiteSetting.blocked_onebox_domains&.split('|').any? do |blocked|
|
||||
hostname == blocked || hostname.end_with?(".#{blocked}")
|
||||
end
|
||||
end
|
||||
|
||||
def self.onebox_for(url, title, opts)
|
||||
title = title && Emoji.gsub_emoji_to_unicode(title)
|
||||
if title && opts[:post_number]
|
||||
|
|
|
@ -60,6 +60,10 @@ module RetrieveTitle
|
|||
encoding = nil
|
||||
|
||||
fd.get do |_response, chunk, uri|
|
||||
if (uri.present? && InlineOneboxer.domain_is_blocked?(uri.hostname))
|
||||
throw :done
|
||||
end
|
||||
|
||||
unless Net::HTTPRedirection === _response
|
||||
if current
|
||||
current << chunk
|
||||
|
|
|
@ -153,12 +153,6 @@ describe InlineOneboxer do
|
|||
expect(onebox).to be_blank
|
||||
end
|
||||
|
||||
it "will not crawl domains that are blocked" do
|
||||
SiteSetting.blocked_onebox_domains = "eviltrout.com"
|
||||
onebox = InlineOneboxer.lookup("https://eviltrout.com", skip_cache: true)
|
||||
expect(onebox).to be_blank
|
||||
end
|
||||
|
||||
it "will crawl anything if allowed to" do
|
||||
SiteSetting.enable_inline_onebox_on_all_domains = true
|
||||
|
||||
|
@ -204,6 +198,38 @@ describe InlineOneboxer do
|
|||
expect(onebox[:title]).to eq("Evil Trout's Blog")
|
||||
end
|
||||
|
||||
end
|
||||
describe "lookups for blocked domains in the hostname" do
|
||||
shared_examples "blocks the domain" do |setting, domain_to_test|
|
||||
it "does not retrieve title" do
|
||||
SiteSetting.blocked_onebox_domains = setting
|
||||
|
||||
onebox = InlineOneboxer.lookup(domain_to_test, skip_cache: true)
|
||||
|
||||
expect(onebox).to be_blank
|
||||
end
|
||||
end
|
||||
|
||||
shared_examples "does not fulfil blocked domain" do |setting, domain_to_test|
|
||||
it "retrieves title" do
|
||||
SiteSetting.blocked_onebox_domains = setting
|
||||
|
||||
onebox = InlineOneboxer.lookup(domain_to_test, skip_cache: true)
|
||||
|
||||
expect(onebox).to be_present
|
||||
end
|
||||
end
|
||||
|
||||
include_examples "blocks the domain", "api.cat.org|kitten.cloud", "https://api.cat.org"
|
||||
include_examples "blocks the domain", "api.cat.org|kitten.cloud", "http://kitten.cloud"
|
||||
|
||||
include_examples "blocks the domain", "kitten.cloud", "http://cat.kitten.cloud"
|
||||
|
||||
include_examples "blocks the domain", "api.cat.org", "https://api.cat.org/subdirectory/moar"
|
||||
include_examples "blocks the domain", "kitten.cloud", "https://cat.kitten.cloud/subd"
|
||||
|
||||
include_examples "does not fulfil blocked domain", "kitten.cloud", "https://cat.2kitten.cloud"
|
||||
include_examples "does not fulfil blocked domain", "kitten.cloud", "https://cat.kitten.cloud9"
|
||||
include_examples "does not fulfil blocked domain", "api.cat.org", "https://api-cat.org"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -100,6 +100,35 @@ describe RetrieveTitle do
|
|||
IPSocket.stubs(:getaddress).returns('100.2.3.4')
|
||||
expect(RetrieveTitle.crawl("http://foobar.com/amazing")).to eq("very amazing")
|
||||
end
|
||||
|
||||
it "returns empty title if redirect uri is in blacklist" do
|
||||
SiteSetting.blocked_onebox_domains = "wikipedia.com"
|
||||
|
||||
stub_request(:get, "http://foobar.com/amazing")
|
||||
.to_return(status: 301, body: "", headers: { "location" => "https://wikipedia.com/amazing" })
|
||||
|
||||
stub_request(:get, "https://wikipedia.com/amazing")
|
||||
.to_return(status: 200, body: "<html><title>very amazing</title>", headers: {})
|
||||
|
||||
IPSocket.stubs(:getaddress).returns('100.2.3.4')
|
||||
expect(RetrieveTitle.crawl("http://foobar.com/amazing")).to eq(nil)
|
||||
end
|
||||
|
||||
it "returns title if 'midway redirect' is blocked but final redirect uri is not blocked" do
|
||||
SiteSetting.blocked_onebox_domains = "wikipedia.com"
|
||||
|
||||
stub_request(:get, "http://foobar.com/amazing")
|
||||
.to_return(status: 301, body: "", headers: { "location" => "https://wikipedia.com/amazing" })
|
||||
|
||||
stub_request(:get, "https://wikipedia.com/amazing")
|
||||
.to_return(status: 301, body: "", headers: { "location" => "https://cat.com/meow" })
|
||||
|
||||
stub_request(:get, "https://cat.com/meow")
|
||||
.to_return(status: 200, body: "<html><title>very amazing</title>", headers: {})
|
||||
|
||||
IPSocket.stubs(:getaddress).returns('100.2.3.4')
|
||||
expect(RetrieveTitle.crawl("http://foobar.com/amazing")).to eq("very amazing")
|
||||
end
|
||||
end
|
||||
|
||||
context 'fetch_title' do
|
||||
|
|
Loading…
Reference in New Issue
Block a user