2019-05-03 06:17:27 +08:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2013-02-11 08:02:57 +08:00
|
|
|
class RobotsTxtController < ApplicationController
|
|
|
|
layout false
|
2017-08-31 12:06:56 +08:00
|
|
|
skip_before_action :preload_json, :check_xhr, :redirect_to_login_if_required
|
2013-02-11 08:02:57 +08:00
|
|
|
|
2019-07-16 01:47:44 +08:00
|
|
|
OVERRIDDEN_HEADER = "# This robots.txt file has been customized at /admin/customize/robots\n"
|
|
|
|
|
2018-04-12 04:05:02 +08:00
|
|
|
# NOTE: order is important!
|
|
|
|
DISALLOWED_PATHS ||= %w[
|
2020-12-23 05:51:14 +08:00
|
|
|
/admin/
|
2018-08-16 17:16:47 +08:00
|
|
|
/auth/
|
2018-04-12 04:05:02 +08:00
|
|
|
/assets/browser-update*.js
|
|
|
|
/email/
|
|
|
|
/session
|
|
|
|
/user-api-key
|
|
|
|
/*?api_key*
|
|
|
|
/*?*api_key*
|
2023-01-09 20:20:10 +08:00
|
|
|
]
|
2018-04-12 04:05:02 +08:00
|
|
|
|
2020-12-23 05:51:14 +08:00
|
|
|
DISALLOWED_WITH_HEADER_PATHS ||= %w[/badges /u/ /my /search /tag/*/l /g /t/*/*.rss /c/*.rss]
|
|
|
|
|
2013-02-11 08:02:57 +08:00
|
|
|
def index
|
2019-07-16 01:47:44 +08:00
|
|
|
if (overridden = SiteSetting.overridden_robots_txt.dup).present?
|
|
|
|
overridden.prepend(OVERRIDDEN_HEADER) if guardian.is_admin? && !is_api?
|
|
|
|
render plain: overridden
|
|
|
|
return
|
|
|
|
end
|
2018-04-17 03:43:20 +08:00
|
|
|
if SiteSetting.allow_index_in_robots_txt?
|
2019-07-16 01:47:44 +08:00
|
|
|
@robots_info = self.class.fetch_default_robots_info
|
2018-04-17 03:43:20 +08:00
|
|
|
render :index, content_type: "text/plain"
|
|
|
|
else
|
|
|
|
render :no_index, content_type: "text/plain"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
# If you are hosting Discourse in a subfolder, you will need to create your robots.txt
|
|
|
|
# in the root of your web server with the appropriate paths. This method will return
|
|
|
|
# JSON that can be used by a script to create a robots.txt that works well with your
|
|
|
|
# existing site.
|
|
|
|
def builder
|
2019-07-16 01:47:44 +08:00
|
|
|
result = self.class.fetch_default_robots_info
|
|
|
|
overridden = SiteSetting.overridden_robots_txt
|
|
|
|
result[:overridden] = overridden if overridden.present?
|
|
|
|
render json: result
|
2018-04-17 03:43:20 +08:00
|
|
|
end
|
|
|
|
|
2019-07-16 01:47:44 +08:00
|
|
|
def self.fetch_default_robots_info
|
2020-12-23 05:51:14 +08:00
|
|
|
deny_paths_googlebot = DISALLOWED_PATHS.map { |p| Discourse.base_path + p }
|
|
|
|
deny_paths =
|
|
|
|
deny_paths_googlebot + DISALLOWED_WITH_HEADER_PATHS.map { |p| Discourse.base_path + p }
|
2020-10-09 19:51:24 +08:00
|
|
|
deny_all = ["#{Discourse.base_path}/"]
|
2018-04-17 03:43:20 +08:00
|
|
|
|
|
|
|
result = {
|
|
|
|
header:
|
|
|
|
"# See http://www.robotstxt.org/robotstxt.html for documentation on how to use the robots.txt file",
|
|
|
|
agents: [],
|
|
|
|
}
|
|
|
|
|
2020-07-27 08:23:54 +08:00
|
|
|
if SiteSetting.allowed_crawler_user_agents.present?
|
|
|
|
SiteSetting
|
|
|
|
.allowed_crawler_user_agents
|
|
|
|
.split("|")
|
|
|
|
.each do |agent|
|
2020-12-23 05:51:14 +08:00
|
|
|
paths = agent == "Googlebot" ? deny_paths_googlebot : deny_paths
|
|
|
|
result[:agents] << { name: agent, disallow: paths }
|
2018-04-17 03:43:20 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
result[:agents] << { name: "*", disallow: deny_all }
|
2018-03-16 05:10:45 +08:00
|
|
|
else
|
2020-12-23 05:51:14 +08:00
|
|
|
if SiteSetting.blocked_crawler_user_agents.present?
|
|
|
|
SiteSetting
|
|
|
|
.blocked_crawler_user_agents
|
|
|
|
.split("|")
|
|
|
|
.each { |agent| result[:agents] << { name: agent, disallow: deny_all } }
|
|
|
|
end
|
|
|
|
|
2018-04-17 03:43:20 +08:00
|
|
|
result[:agents] << { name: "*", disallow: deny_paths }
|
2020-12-23 05:51:14 +08:00
|
|
|
|
|
|
|
result[:agents] << { name: "Googlebot", disallow: deny_paths_googlebot }
|
2018-03-16 05:10:45 +08:00
|
|
|
end
|
|
|
|
|
2022-07-13 01:52:55 +08:00
|
|
|
DiscourseEvent.trigger(:robots_info, result)
|
|
|
|
|
2018-04-17 03:43:20 +08:00
|
|
|
result
|
2013-02-11 08:02:57 +08:00
|
|
|
end
|
|
|
|
end
|