From 4d12ff2e8a4c6e6d571e13a115f20cbf5b4e0e7d Mon Sep 17 00:00:00 2001 From: Neil Lalonde <neillalonde@gmail.com> Date: Tue, 27 Mar 2018 13:44:14 -0400 Subject: [PATCH] when writing cache, remove elements from the user agents list. also return a message and content type when blocking a crawler. --- app/models/web_crawler_request.rb | 6 +++--- lib/middleware/request_tracker.rb | 2 +- spec/components/middleware/request_tracker_spec.rb | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/app/models/web_crawler_request.rb b/app/models/web_crawler_request.rb index 3362259905f..5e418d1b1c1 100644 --- a/app/models/web_crawler_request.rb +++ b/app/models/web_crawler_request.rb @@ -30,9 +30,9 @@ class WebCrawlerRequest < ActiveRecord::Base self.last_flush = Time.now.utc date = date.to_date + ua_list_key = user_agent_list_key(date) - $redis.smembers(user_agent_list_key(date)).each do |user_agent, _| - + while user_agent = $redis.spop(ua_list_key) val = get_and_reset(redis_key(user_agent, date)) next if val == 0 @@ -57,7 +57,7 @@ class WebCrawlerRequest < ActiveRecord::Base $redis.del redis_key(user_agent, date) end - $redis.del list_key + $redis.del(list_key) end protected diff --git a/lib/middleware/request_tracker.rb b/lib/middleware/request_tracker.rb index d22fbfb06dd..e7ffb8869a4 100644 --- a/lib/middleware/request_tracker.rb +++ b/lib/middleware/request_tracker.rb @@ -167,7 +167,7 @@ class Middleware::RequestTracker if block_crawler(request) log_request = false - result = [403, {}, []] + result = [403, { 'Content-Type' => 'text/plain' }, ['Crawler is not allowed']] return result end diff --git a/spec/components/middleware/request_tracker_spec.rb b/spec/components/middleware/request_tracker_spec.rb index 57f5782121d..4faa9c094bc 100644 --- a/spec/components/middleware/request_tracker_spec.rb +++ b/spec/components/middleware/request_tracker_spec.rb @@ -291,7 +291,7 @@ describe Middleware::RequestTracker do def expect_blocked_response(status, _, response) expect(status).to eq(403) - expect(response).to be_blank + expect(response).to eq(['Crawler is not allowed']) end it "applies whitelisted_crawler_user_agents correctly" do