discourse/app/controllers/robots_txt_controller.rb
Alan Guo Xiang Tan 322a3be2db
DEV: Remove logical OR assignment of constants (#29201)
Constants should always be only assigned once. The logical OR assignment
of a constant is a relic of the past before we used zeitwerk for
autoloading and had bugs where a file could be loaded twice resulting in
constant redefinition warnings.
2024-10-16 10:09:07 +08:00

91 lines
2.8 KiB
Ruby

# frozen_string_literal: true
class RobotsTxtController < ApplicationController
layout false
skip_before_action :preload_json,
:check_xhr,
:redirect_to_login_if_required,
:redirect_to_profile_if_required
OVERRIDDEN_HEADER = "# This robots.txt file has been customized at /admin/customize/robots\n"
# NOTE: order is important!
DISALLOWED_PATHS = %w[
/admin/
/auth/
/assets/browser-update*.js
/email/
/session
/user-api-key
/*?api_key*
/*?*api_key*
]
DISALLOWED_WITH_HEADER_PATHS = %w[/badges /u/ /my /search /tag/*/l /g /t/*/*.rss /c/*.rss]
def index
if (overridden = SiteSetting.overridden_robots_txt.dup).present?
overridden.prepend(OVERRIDDEN_HEADER) if guardian.is_admin? && !is_api?
render plain: overridden
return
end
if SiteSetting.allow_index_in_robots_txt?
@robots_info = self.class.fetch_default_robots_info
render :index, content_type: "text/plain"
else
render :no_index, content_type: "text/plain"
end
end
# If you are hosting Discourse in a subfolder, you will need to create your robots.txt
# in the root of your web server with the appropriate paths. This method will return
# JSON that can be used by a script to create a robots.txt that works well with your
# existing site.
def builder
result = self.class.fetch_default_robots_info
overridden = SiteSetting.overridden_robots_txt
result[:overridden] = overridden if overridden.present?
render json: result
end
def self.fetch_default_robots_info
deny_paths_googlebot = DISALLOWED_PATHS.map { |p| Discourse.base_path + p }
deny_paths =
deny_paths_googlebot + DISALLOWED_WITH_HEADER_PATHS.map { |p| Discourse.base_path + p }
deny_all = ["#{Discourse.base_path}/"]
result = {
header:
"# See http://www.robotstxt.org/robotstxt.html for documentation on how to use the robots.txt file",
agents: [],
}
if SiteSetting.allowed_crawler_user_agents.present?
SiteSetting
.allowed_crawler_user_agents
.split("|")
.each do |agent|
paths = agent == "Googlebot" ? deny_paths_googlebot : deny_paths
result[:agents] << { name: agent, disallow: paths }
end
result[:agents] << { name: "*", disallow: deny_all }
else
if SiteSetting.blocked_crawler_user_agents.present?
SiteSetting
.blocked_crawler_user_agents
.split("|")
.each { |agent| result[:agents] << { name: agent, disallow: deny_all } }
end
result[:agents] << { name: "*", disallow: deny_paths }
result[:agents] << { name: "Googlebot", disallow: deny_paths_googlebot }
end
DiscourseEvent.trigger(:robots_info, result)
result
end
end