From c546111703a74eb2431154ace792702aec07c7b6 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Thu, 5 Dec 2024 16:24:21 -0500 Subject: [PATCH] DEV: add the notion of a 'crawler identifier' in anonymous_cache We identify and deny blocked crawlers here in anonymous_cache. Separating the notion of the crawler identifier here lets plugins perform an override if they perform more advanced detection. --- lib/middleware/anonymous_cache.rb | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/middleware/anonymous_cache.rb b/lib/middleware/anonymous_cache.rb index 1b472aef734..dae43888370 100644 --- a/lib/middleware/anonymous_cache.rb +++ b/lib/middleware/anonymous_cache.rb @@ -78,13 +78,17 @@ module Middleware @request = request || Rack::Request.new(@env) end + def crawler_identifier + @user_agent + end + def blocked_crawler? @request.get? && !@request.xhr? && !@request.path.ends_with?("robots.txt") && !@request.path.ends_with?("srv/status") && @request[Auth::DefaultCurrentUserProvider::API_KEY].nil? && @env[Auth::DefaultCurrentUserProvider::USER_API_KEY].nil? && @env[Auth::DefaultCurrentUserProvider::HEADER_API_KEY].nil? && - CrawlerDetection.is_blocked_crawler?(@user_agent) + CrawlerDetection.is_blocked_crawler?(crawler_identifier) end # rubocop:disable Lint/BooleanSymbol