FEATURE: A site setting to prevent crawling on private IP blocks

This commit is contained in:
Robin Ward 2017-05-23 11:51:23 -04:00
parent c9028f517a
commit 93a5fc62bf
4 changed files with 70 additions and 43 deletions

View File

@ -1029,6 +1029,7 @@ en:
allow_moderators_to_create_categories: "Allow moderators to create new categories"
cors_origins: "Allowed origins for cross-origin requests (CORS). Each origin must include http:// or https://. The DISCOURSE_ENABLE_CORS env variable must be set to true to enable CORS."
use_admin_ip_whitelist: "Admins can only log in if they are at an IP address defined in the Screened IPs list (Admin > Logs > Screened Ips)."
blacklist_ip_blocks: "A list of private IP blocks that should never be crawled by Discourse"
top_menu: "Determine which items appear in the homepage navigation, and in what order. Example latest|new|unread|categories|top|read|posted|bookmarks"
post_menu: "Determine which items appear on the post menu, and in what order. Example like|edit|flag|delete|share|bookmark|reply"
post_menu_hidden_items: "The menu items to hide by default in the post menu unless an expansion ellipsis is clicked on."

View File

@ -890,6 +890,10 @@ security:
use_admin_ip_whitelist:
default: false
client: true
blacklist_ip_blocks:
default: ''
type: list
shadowed_by_global: true
onebox:
enable_flash_video_onebox: false

View File

@ -7,7 +7,7 @@ class FinalDestination
attr_reader :status
def initialize(url, opts = nil)
def initialize(url, opts=nil)
@uri = URI(url) rescue nil
@opts = opts || {}
@opts[:max_redirects] ||= 5
@ -85,9 +85,8 @@ class FinalDestination
return false unless address_s
address = IPAddr.new(address_s)
private_match = FinalDestination.private_ranges.any? {|r| r === address }
if private_match
if private_ranges.any? {|r| r === address }
@status = :invalid_address
return false
end
@ -95,7 +94,12 @@ class FinalDestination
true
end
def self.private_ranges
def private_ranges
FinalDestination.standard_private_ranges +
SiteSetting.blacklist_ip_blocks.split('|').map {|r| IPAddr.new(r) rescue nil }.compact
end
def self.standard_private_ranges
@private_ranges ||= [
IPAddr.new('127.0.0.1'),
IPAddr.new('172.16.0.0/12'),

View File

@ -11,6 +11,7 @@ describe FinalDestination do
when 'codinghorror.com' then '91.146.108.148'
when 'discourse.org' then '104.25.152.10'
when 'private-host.com' then '192.168.10.1'
when 'internal-ipv6.com' then '2001:abc:de:01:3:3d0:6a65:c2bf'
else
host
end
@ -27,15 +28,19 @@ describe FinalDestination do
Excon.stub({ method: :head, hostname: from }, { status: 302, headers: { "Location" => dest } })
end
def fd(url)
FinalDestination.new(url, opts)
end
describe '.resolve' do
it "has a ready status code before anything happens" do
expect(FinalDestination.new('https://eviltrout.com').status).to eq(:ready)
expect(fd('https://eviltrout.com').status).to eq(:ready)
end
it "returns nil an invalid url" do
expect(FinalDestination.new(nil, opts).resolve).to be_nil
expect(FinalDestination.new('asdf', opts).resolve).to be_nil
expect(fd(nil).resolve).to be_nil
expect(fd('asdf').resolve).to be_nil
end
context "without redirects" do
@ -44,10 +49,10 @@ describe FinalDestination do
end
it "returns the final url" do
fd = FinalDestination.new('https://eviltrout.com', opts)
expect(fd.resolve.to_s).to eq('https://eviltrout.com')
expect(fd.redirected?).to eq(false)
expect(fd.status).to eq(:resolved)
final = FinalDestination.new('https://eviltrout.com', opts)
expect(final.resolve.to_s).to eq('https://eviltrout.com')
expect(final.redirected?).to eq(false)
expect(final.status).to eq(:resolved)
end
end
@ -59,10 +64,10 @@ describe FinalDestination do
end
it "returns the final url" do
fd = FinalDestination.new('https://eviltrout.com', opts)
expect(fd.resolve.to_s).to eq('https://discourse.org')
expect(fd.redirected?).to eq(true)
expect(fd.status).to eq(:resolved)
final = FinalDestination.new('https://eviltrout.com', opts)
expect(final.resolve.to_s).to eq('https://discourse.org')
expect(final.redirected?).to eq(true)
expect(final.status).to eq(:resolved)
end
end
@ -74,10 +79,10 @@ describe FinalDestination do
end
it "returns the final url" do
fd = FinalDestination.new('https://eviltrout.com', opts.merge(max_redirects: 1))
expect(fd.resolve).to be_nil
expect(fd.redirected?).to eq(true)
expect(fd.status).to eq(:too_many_redirects)
final = FinalDestination.new('https://eviltrout.com', opts.merge(max_redirects: 1))
expect(final.resolve).to be_nil
expect(final.redirected?).to eq(true)
expect(final.status).to eq(:too_many_redirects)
end
end
@ -88,10 +93,10 @@ describe FinalDestination do
end
it "returns the final url" do
fd = FinalDestination.new('https://eviltrout.com', opts)
expect(fd.resolve).to be_nil
expect(fd.redirected?).to eq(true)
expect(fd.status).to eq(:invalid_address)
final = FinalDestination.new('https://eviltrout.com', opts)
expect(final.resolve).to be_nil
expect(final.redirected?).to eq(true)
expect(final.status).to eq(:invalid_address)
end
end
end
@ -99,63 +104,76 @@ describe FinalDestination do
describe '.validate_uri' do
context "host lookups" do
it "works for various hosts" do
expect(FinalDestination.new('https://private-host.com', opts).validate_uri).to eq(false)
expect(FinalDestination.new('https://eviltrout.com:443', opts).validate_uri).to eq(true)
expect(fd('https://private-host.com').validate_uri).to eq(false)
expect(fd('https://eviltrout.com:443').validate_uri).to eq(true)
end
end
end
describe ".validate_url_format" do
it "supports http urls" do
expect(FinalDestination.new('http://eviltrout.com', opts).validate_uri_format).to eq(true)
expect(fd('http://eviltrout.com').validate_uri_format).to eq(true)
end
it "supports https urls" do
expect(FinalDestination.new('https://eviltrout.com', opts).validate_uri_format).to eq(true)
expect(fd('https://eviltrout.com').validate_uri_format).to eq(true)
end
it "doesn't support ftp urls" do
expect(FinalDestination.new('ftp://eviltrout.com', opts).validate_uri_format).to eq(false)
expect(fd('ftp://eviltrout.com').validate_uri_format).to eq(false)
end
it "returns false for schemeless URL" do
expect(FinalDestination.new('eviltrout.com', opts).validate_uri_format).to eq(false)
expect(fd('eviltrout.com').validate_uri_format).to eq(false)
end
it "returns false for nil URL" do
expect(FinalDestination.new(nil, opts).validate_uri_format).to eq(false)
expect(fd(nil).validate_uri_format).to eq(false)
end
it "returns false for invalid ports" do
expect(FinalDestination.new('http://eviltrout.com:21', opts).validate_uri_format).to eq(false)
expect(FinalDestination.new('https://eviltrout.com:8000', opts).validate_uri_format).to eq(false)
expect(fd('http://eviltrout.com:21').validate_uri_format).to eq(false)
expect(fd('https://eviltrout.com:8000').validate_uri_format).to eq(false)
end
it "returns true for valid ports" do
expect(FinalDestination.new('http://eviltrout.com:80', opts).validate_uri_format).to eq(true)
expect(FinalDestination.new('https://eviltrout.com:443',opts).validate_uri_format).to eq(true)
expect(fd('http://eviltrout.com:80').validate_uri_format).to eq(true)
expect(fd('https://eviltrout.com:443').validate_uri_format).to eq(true)
end
end
describe ".is_public" do
it "returns false for a valid ipv4" do
expect(FinalDestination.new("https://52.84.143.67", opts).is_public?).to eq(true)
expect(FinalDestination.new("https://104.25.153.10", opts).is_public?).to eq(true)
expect(fd("https://52.84.143.67").is_public?).to eq(true)
expect(fd("https://104.25.153.10").is_public?).to eq(true)
end
it "returns true for private ipv4" do
expect(FinalDestination.new("https://127.0.0.1", opts).is_public?).to eq(false)
expect(FinalDestination.new("https://192.168.1.3", opts).is_public?).to eq(false)
expect(FinalDestination.new("https://10.0.0.5", opts).is_public?).to eq(false)
expect(FinalDestination.new("https://172.16.0.1", opts).is_public?).to eq(false)
it "returns false for private ipv4" do
expect(fd("https://127.0.0.1").is_public?).to eq(false)
expect(fd("https://192.168.1.3").is_public?).to eq(false)
expect(fd("https://10.0.0.5").is_public?).to eq(false)
expect(fd("https://172.16.0.1").is_public?).to eq(false)
end
it "returns false for IPV6 via site settings" do
SiteSetting.blacklist_ip_blocks = '2001:abc:de::/48|2002:abc:de::/48'
expect(fd('https://[2001:abc:de:01:0:3f0:6a65:c2bf]').is_public?).to eq(false)
expect(fd('https://[2002:abc:de:01:0:3f0:6a65:c2bf]').is_public?).to eq(false)
expect(fd('https://internal-ipv6.com').is_public?).to eq(false)
expect(fd('https://[2003:abc:de:01:0:3f0:6a65:c2bf]').is_public?).to eq(true)
end
it "ignores invalid ranges" do
SiteSetting.blacklist_ip_blocks = '2001:abc:de::/48|eviltrout'
expect(fd('https://[2001:abc:de:01:0:3f0:6a65:c2bf]').is_public?).to eq(false)
end
it "returns true for public ipv6" do
expect(FinalDestination.new("https://[2001:470:1:3a8::251]", opts).is_public?).to eq(true)
expect(fd("https://[2001:470:1:3a8::251]").is_public?).to eq(true)
end
it "returns true for private ipv6" do
expect(FinalDestination.new("https://[fdd7:b450:d4d1:6b44::1]", opts).is_public?).to eq(false)
expect(fd("https://[fdd7:b450:d4d1:6b44::1]").is_public?).to eq(false)
end
end