mirror of
https://github.com/discourse/discourse.git
synced 2024-12-14 07:43:43 +08:00
09f1ef6b05
This plugin API can be used to add to robots.txt. The event handler receives the complete robots information before it is converted into robots.txt.
92 lines
2.7 KiB
Ruby
92 lines
2.7 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
class RobotsTxtController < ApplicationController
|
|
layout false
|
|
skip_before_action :preload_json, :check_xhr, :redirect_to_login_if_required
|
|
|
|
OVERRIDDEN_HEADER = "# This robots.txt file has been customized at /admin/customize/robots\n"
|
|
|
|
# NOTE: order is important!
|
|
DISALLOWED_PATHS ||= %w{
|
|
/admin/
|
|
/auth/
|
|
/assets/browser-update*.js
|
|
/email/
|
|
/session
|
|
/user-api-key
|
|
/*?api_key*
|
|
/*?*api_key*
|
|
}
|
|
|
|
DISALLOWED_WITH_HEADER_PATHS ||= %w{
|
|
/badges
|
|
/u/
|
|
/my
|
|
/search
|
|
/tag/*/l
|
|
/g
|
|
/t/*/*.rss
|
|
/c/*.rss
|
|
}
|
|
|
|
def index
|
|
if (overridden = SiteSetting.overridden_robots_txt.dup).present?
|
|
overridden.prepend(OVERRIDDEN_HEADER) if guardian.is_admin? && !is_api?
|
|
render plain: overridden
|
|
return
|
|
end
|
|
if SiteSetting.allow_index_in_robots_txt?
|
|
@robots_info = self.class.fetch_default_robots_info
|
|
render :index, content_type: 'text/plain'
|
|
else
|
|
render :no_index, content_type: 'text/plain'
|
|
end
|
|
end
|
|
|
|
# If you are hosting Discourse in a subfolder, you will need to create your robots.txt
|
|
# in the root of your web server with the appropriate paths. This method will return
|
|
# JSON that can be used by a script to create a robots.txt that works well with your
|
|
# existing site.
|
|
def builder
|
|
result = self.class.fetch_default_robots_info
|
|
overridden = SiteSetting.overridden_robots_txt
|
|
result[:overridden] = overridden if overridden.present?
|
|
render json: result
|
|
end
|
|
|
|
def self.fetch_default_robots_info
|
|
deny_paths_googlebot = DISALLOWED_PATHS.map { |p| Discourse.base_path + p }
|
|
deny_paths = deny_paths_googlebot + DISALLOWED_WITH_HEADER_PATHS.map { |p| Discourse.base_path + p }
|
|
deny_all = [ "#{Discourse.base_path}/" ]
|
|
|
|
result = {
|
|
header: "# See http://www.robotstxt.org/robotstxt.html for documentation on how to use the robots.txt file",
|
|
agents: []
|
|
}
|
|
|
|
if SiteSetting.allowed_crawler_user_agents.present?
|
|
SiteSetting.allowed_crawler_user_agents.split('|').each do |agent|
|
|
paths = agent == "Googlebot" ? deny_paths_googlebot : deny_paths
|
|
result[:agents] << { name: agent, disallow: paths }
|
|
end
|
|
|
|
result[:agents] << { name: '*', disallow: deny_all }
|
|
else
|
|
|
|
if SiteSetting.blocked_crawler_user_agents.present?
|
|
SiteSetting.blocked_crawler_user_agents.split('|').each do |agent|
|
|
result[:agents] << { name: agent, disallow: deny_all }
|
|
end
|
|
end
|
|
|
|
result[:agents] << { name: '*', disallow: deny_paths }
|
|
|
|
result[:agents] << { name: 'Googlebot', disallow: deny_paths_googlebot }
|
|
end
|
|
|
|
DiscourseEvent.trigger(:robots_info, result)
|
|
|
|
result
|
|
end
|
|
end
|