From e3702ecb30bf6c0c366e6761d130c6db83ff0bd4 Mon Sep 17 00:00:00 2001 From: Vikhyat Korrapati Date: Sat, 15 Mar 2014 20:01:46 +0530 Subject: [PATCH] Improved crawler detection: add Twitterbot, Facebook, curl, Bing, Baidu. --- lib/crawler_detection.rb | 2 +- spec/components/crawler_detection_spec.rb | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/crawler_detection.rb b/lib/crawler_detection.rb index 2287bc7c847..c1bdc9e75df 100644 --- a/lib/crawler_detection.rb +++ b/lib/crawler_detection.rb @@ -1,5 +1,5 @@ module CrawlerDetection def self.crawler?(user_agent) - !/Googlebot|Mediapartners|AdsBot/.match(user_agent).nil? + !/Googlebot|Mediapartners|AdsBot|curl|Twitterbot|facebookexternalhit|bingbot|Baiduspider/.match(user_agent).nil? end end diff --git a/spec/components/crawler_detection_spec.rb b/spec/components/crawler_detection_spec.rb index 2e8f80acef2..97a3f35e1cd 100644 --- a/spec/components/crawler_detection_spec.rb +++ b/spec/components/crawler_detection_spec.rb @@ -15,6 +15,10 @@ describe CrawlerDetection do described_class.crawler?("(compatible; Mediapartners-Google/2.1; +http://www.google.com/bot.html)").should == true described_class.crawler?("Mediapartners-Google").should == true described_class.crawler?("AdsBot-Google (+http://www.google.com/adsbot.html)").should == true + described_class.crawler?("Twitterbot").should == true + described_class.crawler?("facebookexternalhit/1.1 (+http(s)://www.facebook.com/externalhit_uatext.php)").should == true + described_class.crawler?("Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)").should == true + described_class.crawler?("Baiduspider+(+http://www.baidu.com/search/spider.htm)").should == true end it "returns false for non-crawler user agents" do