mirror of
https://github.com/discourse/discourse.git
synced 2024-11-23 06:29:30 +08:00
30990006a9
This reduces chances of errors where consumers of strings mutate inputs and reduces memory usage of the app. Test suite passes now, but there may be some stuff left, so we will run a few sites on a branch prior to merging
92 lines
2.3 KiB
Ruby
92 lines
2.3 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
class RobotsTxtController < ApplicationController
|
|
layout false
|
|
skip_before_action :preload_json, :check_xhr, :redirect_to_login_if_required
|
|
|
|
# NOTE: order is important!
|
|
DISALLOWED_PATHS ||= %w{
|
|
/auth/
|
|
/assets/browser-update*.js
|
|
/users/
|
|
/u/
|
|
/my/
|
|
/badges/
|
|
/search
|
|
/search/
|
|
/tags
|
|
/tags/
|
|
/email/
|
|
/session
|
|
/session/
|
|
/admin
|
|
/admin/
|
|
/user-api-key
|
|
/user-api-key/
|
|
/*?api_key*
|
|
/*?*api_key*
|
|
/groups
|
|
/groups/
|
|
/t/*/*.rss
|
|
/tags/*.rss
|
|
/c/*.rss
|
|
}
|
|
|
|
def index
|
|
if SiteSetting.allow_index_in_robots_txt?
|
|
@robots_info = fetch_robots_info
|
|
render :index, content_type: 'text/plain'
|
|
else
|
|
render :no_index, content_type: 'text/plain'
|
|
end
|
|
end
|
|
|
|
# If you are hosting Discourse in a subfolder, you will need to create your robots.txt
|
|
# in the root of your web server with the appropriate paths. This method will return
|
|
# JSON that can be used by a script to create a robots.txt that works well with your
|
|
# existing site.
|
|
def builder
|
|
render json: fetch_robots_info
|
|
end
|
|
|
|
protected
|
|
|
|
def fetch_robots_info
|
|
deny_paths = DISALLOWED_PATHS.map { |p| Discourse.base_uri + p }
|
|
deny_all = [ "#{Discourse.base_uri}/" ]
|
|
|
|
result = {
|
|
header: "# See http://www.robotstxt.org/robotstxt.html for documentation on how to use the robots.txt file",
|
|
agents: []
|
|
}
|
|
|
|
if SiteSetting.whitelisted_crawler_user_agents.present?
|
|
SiteSetting.whitelisted_crawler_user_agents.split('|').each do |agent|
|
|
result[:agents] << { name: agent, disallow: deny_paths }
|
|
end
|
|
|
|
result[:agents] << { name: '*', disallow: deny_all }
|
|
elsif SiteSetting.blacklisted_crawler_user_agents.present?
|
|
result[:agents] << { name: '*', disallow: deny_paths }
|
|
SiteSetting.blacklisted_crawler_user_agents.split('|').each do |agent|
|
|
result[:agents] << { name: agent, disallow: deny_all }
|
|
end
|
|
else
|
|
result[:agents] << { name: '*', disallow: deny_paths }
|
|
end
|
|
|
|
if SiteSetting.slow_down_crawler_user_agents.present?
|
|
SiteSetting.slow_down_crawler_user_agents.split('|').each do |agent|
|
|
result[:agents] << {
|
|
name: agent,
|
|
delay: SiteSetting.slow_down_crawler_rate,
|
|
disallow: deny_paths
|
|
}
|
|
end
|
|
end
|
|
|
|
result
|
|
end
|
|
|
|
end
|