SECURITY: Never crawl by IP

This commit is contained in:
Robin Ward 2017-05-23 13:07:18 -04:00
parent 93a5fc62bf
commit e5e7a15a85
2 changed files with 9 additions and 7 deletions

View File

@ -68,14 +68,11 @@ class FinalDestination
def validate_uri_format
return false unless @uri
return false unless ['https', 'http'].include?(@uri.scheme)
return false if @uri.scheme == 'http' && @uri.port != 80
return false if @uri.scheme == 'https' && @uri.port != 443
if @uri.scheme == 'http'
return @uri.port == 80
elsif @uri.scheme == 'https'
return @uri.port == 443
end
false
# Disallow IP based crawling
(IPAddr.new(@uri.hostname) rescue nil).nil?
end
def is_public?

View File

@ -123,6 +123,11 @@ describe FinalDestination do
expect(fd('ftp://eviltrout.com').validate_uri_format).to eq(false)
end
it "doesn't support IP urls" do
expect(fd('http://104.25.152.10').validate_uri_format).to eq(false)
expect(fd('https://[2001:abc:de:01:0:3f0:6a65:c2bf]').validate_uri_format).to eq(false)
end
it "returns false for schemeless URL" do
expect(fd('eviltrout.com').validate_uri_format).to eq(false)
end