discourse/app/controllers/robots_txt_controller.rb
Sam baa72d18f8 FIX: simplify so we ban all auth paths
previously plugins that have auth paths were not disallowed and robots
tend to call them
2018-08-16 19:16:47 +10:00

90 lines
2.3 KiB
Ruby

class RobotsTxtController < ApplicationController
layout false
skip_before_action :preload_json, :check_xhr, :redirect_to_login_if_required
# NOTE: order is important!
DISALLOWED_PATHS ||= %w{
/auth/
/assets/browser-update*.js
/users/
/u/
/my/
/badges/
/search
/search/
/tags
/tags/
/email/
/session
/session/
/admin
/admin/
/user-api-key
/user-api-key/
/*?api_key*
/*?*api_key*
/groups
/groups/
/t/*/*.rss
/tags/*.rss
/c/*.rss
}
def index
if SiteSetting.allow_index_in_robots_txt?
@robots_info = fetch_robots_info
render :index, content_type: 'text/plain'
else
render :no_index, content_type: 'text/plain'
end
end
# If you are hosting Discourse in a subfolder, you will need to create your robots.txt
# in the root of your web server with the appropriate paths. This method will return
# JSON that can be used by a script to create a robots.txt that works well with your
# existing site.
def builder
render json: fetch_robots_info
end
protected
def fetch_robots_info
deny_paths = DISALLOWED_PATHS.map { |p| Discourse.base_uri + p }
deny_all = [ "#{Discourse.base_uri}/" ]
result = {
header: "# See http://www.robotstxt.org/robotstxt.html for documentation on how to use the robots.txt file",
agents: []
}
if SiteSetting.whitelisted_crawler_user_agents.present?
SiteSetting.whitelisted_crawler_user_agents.split('|').each do |agent|
result[:agents] << { name: agent, disallow: deny_paths }
end
result[:agents] << { name: '*', disallow: deny_all }
elsif SiteSetting.blacklisted_crawler_user_agents.present?
result[:agents] << { name: '*', disallow: deny_paths }
SiteSetting.blacklisted_crawler_user_agents.split('|').each do |agent|
result[:agents] << { name: agent, disallow: deny_all }
end
else
result[:agents] << { name: '*', disallow: deny_paths }
end
if SiteSetting.slow_down_crawler_user_agents.present?
SiteSetting.slow_down_crawler_user_agents.split('|').each do |agent|
result[:agents] << {
name: agent,
delay: SiteSetting.slow_down_crawler_rate,
disallow: deny_paths
}
end
end
result
end
end