diff --git a/lib/middleware/request_tracker.rb b/lib/middleware/request_tracker.rb index aa23abd2cee..d22fbfb06dd 100644 --- a/lib/middleware/request_tracker.rb +++ b/lib/middleware/request_tracker.rb @@ -277,7 +277,10 @@ class Middleware::RequestTracker end def block_crawler(request) - !request.path.ends_with?('robots.txt') && + request.get? && + !request.xhr? && + request.env['HTTP_ACCEPT'] =~ /text\/html/ && + !request.path.ends_with?('robots.txt') && CrawlerDetection.is_blocked_crawler?(request.env['HTTP_USER_AGENT']) end diff --git a/spec/components/middleware/request_tracker_spec.rb b/spec/components/middleware/request_tracker_spec.rb index 1838859ffbf..57f5782121d 100644 --- a/spec/components/middleware/request_tracker_spec.rb +++ b/spec/components/middleware/request_tracker_spec.rb @@ -9,6 +9,7 @@ describe Middleware::RequestTracker do "HTTP_USER_AGENT" => "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36", "REQUEST_URI" => "/path?bla=1", "REQUEST_METHOD" => "GET", + "HTTP_ACCEPT" => "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", "rack.input" => "" }.merge(opts) end @@ -317,6 +318,14 @@ describe Middleware::RequestTracker do ApplicationRequest.write_cache! }.to_not change { ApplicationRequest.count } end + + it "allows json requests" do + SiteSetting.blacklisted_crawler_user_agents = 'Googlebot' + expect_success_response(*middleware.call(env( + 'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)', + 'HTTP_ACCEPT' => 'application/json' + ))) + end end end