FEATURE: Rate limit how often we'll crawl a destination IP

This commit is contained in:
Robin Ward 2017-05-23 15:03:04 -04:00
parent 36e477750c
commit b81e7be9a1
2 changed files with 20 additions and 18 deletions

View File

@ -1,6 +1,7 @@
require "socket"
require "ipaddr"
require 'excon'
require 'rate_limiter'
# Determine the final endpoint for a Web URI, following redirects
class FinalDestination
@ -76,10 +77,6 @@ class FinalDestination
end
def is_dest_valid?
is_public?
end
def is_public?
return false unless @uri && @uri.host
address_s = @opts[:lookup_ip].call(@uri.hostname)
@ -92,7 +89,12 @@ class FinalDestination
return false
end
# Rate limit how often this IP can be crawled
RateLimiter.new(nil, "crawl-destination-ip:#{address_s}", 100, 1.hour).performed!
true
rescue RateLimiter::LimitExceeded
false
end
def private_ranges

View File

@ -164,38 +164,38 @@ describe FinalDestination do
end
end
describe ".is_public" do
describe ".is_dest_valid" do
it "returns false for a valid ipv4" do
expect(fd("https://52.84.143.67").is_public?).to eq(true)
expect(fd("https://104.25.153.10").is_public?).to eq(true)
expect(fd("https://52.84.143.67").is_dest_valid?).to eq(true)
expect(fd("https://104.25.153.10").is_dest_valid?).to eq(true)
end
it "returns false for private ipv4" do
expect(fd("https://127.0.0.1").is_public?).to eq(false)
expect(fd("https://192.168.1.3").is_public?).to eq(false)
expect(fd("https://10.0.0.5").is_public?).to eq(false)
expect(fd("https://172.16.0.1").is_public?).to eq(false)
expect(fd("https://127.0.0.1").is_dest_valid?).to eq(false)
expect(fd("https://192.168.1.3").is_dest_valid?).to eq(false)
expect(fd("https://10.0.0.5").is_dest_valid?).to eq(false)
expect(fd("https://172.16.0.1").is_dest_valid?).to eq(false)
end
it "returns false for IPV6 via site settings" do
SiteSetting.blacklist_ip_blocks = '2001:abc:de::/48|2002:abc:de::/48'
expect(fd('https://[2001:abc:de:01:0:3f0:6a65:c2bf]').is_public?).to eq(false)
expect(fd('https://[2002:abc:de:01:0:3f0:6a65:c2bf]').is_public?).to eq(false)
expect(fd('https://internal-ipv6.com').is_public?).to eq(false)
expect(fd('https://[2003:abc:de:01:0:3f0:6a65:c2bf]').is_public?).to eq(true)
expect(fd('https://[2001:abc:de:01:0:3f0:6a65:c2bf]').is_dest_valid?).to eq(false)
expect(fd('https://[2002:abc:de:01:0:3f0:6a65:c2bf]').is_dest_valid?).to eq(false)
expect(fd('https://internal-ipv6.com').is_dest_valid?).to eq(false)
expect(fd('https://[2003:abc:de:01:0:3f0:6a65:c2bf]').is_dest_valid?).to eq(true)
end
it "ignores invalid ranges" do
SiteSetting.blacklist_ip_blocks = '2001:abc:de::/48|eviltrout'
expect(fd('https://[2001:abc:de:01:0:3f0:6a65:c2bf]').is_public?).to eq(false)
expect(fd('https://[2001:abc:de:01:0:3f0:6a65:c2bf]').is_dest_valid?).to eq(false)
end
it "returns true for public ipv6" do
expect(fd("https://[2001:470:1:3a8::251]").is_public?).to eq(true)
expect(fd("https://[2001:470:1:3a8::251]").is_dest_valid?).to eq(true)
end
it "returns true for private ipv6" do
expect(fd("https://[fdd7:b450:d4d1:6b44::1]").is_public?).to eq(false)
expect(fd("https://[fdd7:b450:d4d1:6b44::1]").is_dest_valid?).to eq(false)
end
end