discourse/lib/crawler_detection.rb
Sam Saffron 30990006a9 DEV: enable frozen string literal on all files
This reduces chances of errors where consumers of strings mutate inputs
and reduces memory usage of the app.

Test suite passes now, but there may be some stuff left, so we will run
a few sites on a branch prior to merging
2019-05-13 09:31:32 +08:00

60 lines
2.0 KiB
Ruby

# frozen_string_literal: true
module CrawlerDetection
def self.to_matcher(string, type: nil)
escaped = string.split('|').map { |agent| Regexp.escape(agent) }.join('|')
if type == :real && Rails.env == "test"
# we need this bypass so we properly render views
escaped << "|Rails Testing"
end
Regexp.new(escaped, Regexp::IGNORECASE)
end
def self.crawler?(user_agent)
return true if user_agent.nil?
# this is done to avoid regenerating regexes
@non_crawler_matchers ||= {}
@matchers ||= {}
possibly_real = (@non_crawler_matchers[SiteSetting.non_crawler_user_agents] ||= to_matcher(SiteSetting.non_crawler_user_agents, type: :real))
if user_agent.match?(possibly_real)
known_bots = (@matchers[SiteSetting.crawler_user_agents] ||= to_matcher(SiteSetting.crawler_user_agents))
if user_agent.match?(known_bots)
bypass = (@matchers[SiteSetting.crawler_check_bypass_agents] ||= to_matcher(SiteSetting.crawler_check_bypass_agents))
!user_agent.match?(bypass)
else
false
end
else
true
end
end
# Given a user_agent that returns true from crawler?, should its request be allowed?
def self.allow_crawler?(user_agent)
return true if SiteSetting.whitelisted_crawler_user_agents.blank? &&
SiteSetting.blacklisted_crawler_user_agents.blank?
@whitelisted_matchers ||= {}
@blacklisted_matchers ||= {}
if SiteSetting.whitelisted_crawler_user_agents.present?
whitelisted = @whitelisted_matchers[SiteSetting.whitelisted_crawler_user_agents] ||= to_matcher(SiteSetting.whitelisted_crawler_user_agents)
!user_agent.nil? && user_agent.match?(whitelisted)
else
blacklisted = @blacklisted_matchers[SiteSetting.blacklisted_crawler_user_agents] ||= to_matcher(SiteSetting.blacklisted_crawler_user_agents)
user_agent.nil? || !user_agent.match?(blacklisted)
end
end
def self.is_blocked_crawler?(user_agent)
crawler?(user_agent) && !allow_crawler?(user_agent)
end
end