From 4d12ff2e8a4c6e6d571e13a115f20cbf5b4e0e7d Mon Sep 17 00:00:00 2001
From: Neil Lalonde <neillalonde@gmail.com>
Date: Tue, 27 Mar 2018 13:44:14 -0400
Subject: [PATCH] when writing cache, remove elements from the user agents
 list. also return a message and content type when blocking a crawler.

---
 app/models/web_crawler_request.rb                  | 6 +++---
 lib/middleware/request_tracker.rb                  | 2 +-
 spec/components/middleware/request_tracker_spec.rb | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/app/models/web_crawler_request.rb b/app/models/web_crawler_request.rb
index 3362259905f..5e418d1b1c1 100644
--- a/app/models/web_crawler_request.rb
+++ b/app/models/web_crawler_request.rb
@@ -30,9 +30,9 @@ class WebCrawlerRequest < ActiveRecord::Base
     self.last_flush = Time.now.utc
 
     date = date.to_date
+    ua_list_key = user_agent_list_key(date)
 
-    $redis.smembers(user_agent_list_key(date)).each do |user_agent, _|
-
+    while user_agent = $redis.spop(ua_list_key)
       val = get_and_reset(redis_key(user_agent, date))
 
       next if val == 0
@@ -57,7 +57,7 @@ class WebCrawlerRequest < ActiveRecord::Base
       $redis.del redis_key(user_agent, date)
     end
 
-    $redis.del list_key
+    $redis.del(list_key)
   end
 
   protected
diff --git a/lib/middleware/request_tracker.rb b/lib/middleware/request_tracker.rb
index d22fbfb06dd..e7ffb8869a4 100644
--- a/lib/middleware/request_tracker.rb
+++ b/lib/middleware/request_tracker.rb
@@ -167,7 +167,7 @@ class Middleware::RequestTracker
 
     if block_crawler(request)
       log_request = false
-      result = [403, {}, []]
+      result = [403, { 'Content-Type' => 'text/plain' }, ['Crawler is not allowed']]
       return result
     end
 
diff --git a/spec/components/middleware/request_tracker_spec.rb b/spec/components/middleware/request_tracker_spec.rb
index 57f5782121d..4faa9c094bc 100644
--- a/spec/components/middleware/request_tracker_spec.rb
+++ b/spec/components/middleware/request_tracker_spec.rb
@@ -291,7 +291,7 @@ describe Middleware::RequestTracker do
 
     def expect_blocked_response(status, _, response)
       expect(status).to eq(403)
-      expect(response).to be_blank
+      expect(response).to eq(['Crawler is not allowed'])
     end
 
     it "applies whitelisted_crawler_user_agents correctly" do