From b81e7be9a163959a9f6f7283741193280bdb747c Mon Sep 17 00:00:00 2001 From: Robin Ward Date: Tue, 23 May 2017 15:03:04 -0400 Subject: [PATCH] FEATURE: Rate limit how often we'll crawl a destination IP --- lib/final_destination.rb | 10 ++++---- spec/components/final_destination_spec.rb | 28 +++++++++++------------ 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/lib/final_destination.rb b/lib/final_destination.rb index 4e0f4229d87..c3fa9be503c 100644 --- a/lib/final_destination.rb +++ b/lib/final_destination.rb @@ -1,6 +1,7 @@ require "socket" require "ipaddr" require 'excon' +require 'rate_limiter' # Determine the final endpoint for a Web URI, following redirects class FinalDestination @@ -76,10 +77,6 @@ class FinalDestination end def is_dest_valid? - is_public? - end - - def is_public? return false unless @uri && @uri.host address_s = @opts[:lookup_ip].call(@uri.hostname) @@ -92,7 +89,12 @@ class FinalDestination return false end + # Rate limit how often this IP can be crawled + RateLimiter.new(nil, "crawl-destination-ip:#{address_s}", 100, 1.hour).performed! + true + rescue RateLimiter::LimitExceeded + false end def private_ranges diff --git a/spec/components/final_destination_spec.rb b/spec/components/final_destination_spec.rb index 73611a5c45a..87e29a0e8a7 100644 --- a/spec/components/final_destination_spec.rb +++ b/spec/components/final_destination_spec.rb @@ -164,38 +164,38 @@ describe FinalDestination do end end - describe ".is_public" do + describe ".is_dest_valid" do it "returns false for a valid ipv4" do - expect(fd("https://52.84.143.67").is_public?).to eq(true) - expect(fd("https://104.25.153.10").is_public?).to eq(true) + expect(fd("https://52.84.143.67").is_dest_valid?).to eq(true) + expect(fd("https://104.25.153.10").is_dest_valid?).to eq(true) end it "returns false for private ipv4" do - expect(fd("https://127.0.0.1").is_public?).to eq(false) - expect(fd("https://192.168.1.3").is_public?).to eq(false) - expect(fd("https://10.0.0.5").is_public?).to eq(false) - expect(fd("https://172.16.0.1").is_public?).to eq(false) + expect(fd("https://127.0.0.1").is_dest_valid?).to eq(false) + expect(fd("https://192.168.1.3").is_dest_valid?).to eq(false) + expect(fd("https://10.0.0.5").is_dest_valid?).to eq(false) + expect(fd("https://172.16.0.1").is_dest_valid?).to eq(false) end it "returns false for IPV6 via site settings" do SiteSetting.blacklist_ip_blocks = '2001:abc:de::/48|2002:abc:de::/48' - expect(fd('https://[2001:abc:de:01:0:3f0:6a65:c2bf]').is_public?).to eq(false) - expect(fd('https://[2002:abc:de:01:0:3f0:6a65:c2bf]').is_public?).to eq(false) - expect(fd('https://internal-ipv6.com').is_public?).to eq(false) - expect(fd('https://[2003:abc:de:01:0:3f0:6a65:c2bf]').is_public?).to eq(true) + expect(fd('https://[2001:abc:de:01:0:3f0:6a65:c2bf]').is_dest_valid?).to eq(false) + expect(fd('https://[2002:abc:de:01:0:3f0:6a65:c2bf]').is_dest_valid?).to eq(false) + expect(fd('https://internal-ipv6.com').is_dest_valid?).to eq(false) + expect(fd('https://[2003:abc:de:01:0:3f0:6a65:c2bf]').is_dest_valid?).to eq(true) end it "ignores invalid ranges" do SiteSetting.blacklist_ip_blocks = '2001:abc:de::/48|eviltrout' - expect(fd('https://[2001:abc:de:01:0:3f0:6a65:c2bf]').is_public?).to eq(false) + expect(fd('https://[2001:abc:de:01:0:3f0:6a65:c2bf]').is_dest_valid?).to eq(false) end it "returns true for public ipv6" do - expect(fd("https://[2001:470:1:3a8::251]").is_public?).to eq(true) + expect(fd("https://[2001:470:1:3a8::251]").is_dest_valid?).to eq(true) end it "returns true for private ipv6" do - expect(fd("https://[fdd7:b450:d4d1:6b44::1]").is_public?).to eq(false) + expect(fd("https://[fdd7:b450:d4d1:6b44::1]").is_dest_valid?).to eq(false) end end