mirror of
https://github.com/discourse/discourse.git
synced 2025-03-22 22:17:00 +08:00
prefix the robots.txt rules with the directory when using subfolder
This commit is contained in:
parent
3c8b43bb01
commit
df7970a6f6
@ -2,14 +2,46 @@ class RobotsTxtController < ApplicationController
|
||||
layout false
|
||||
skip_before_action :preload_json, :check_xhr, :redirect_to_login_if_required
|
||||
|
||||
# NOTE: order is important!
|
||||
DISALLOWED_PATHS ||= %w{
|
||||
/auth/cas
|
||||
/auth/facebook/callback
|
||||
/auth/twitter/callback
|
||||
/auth/google/callback
|
||||
/auth/yahoo/callback
|
||||
/auth/github/callback
|
||||
/auth/cas/callback
|
||||
/assets/browser-update*.js
|
||||
/users/
|
||||
/u/
|
||||
/badges/
|
||||
/search
|
||||
/search/
|
||||
/tags
|
||||
/tags/
|
||||
/email/
|
||||
/session
|
||||
/session/
|
||||
/admin
|
||||
/admin/
|
||||
/user-api-key
|
||||
/user-api-key/
|
||||
/*?api_key*
|
||||
/*?*api_key*
|
||||
/groups
|
||||
/groups/
|
||||
/t/*/*.rss
|
||||
/tags/*.rss
|
||||
/c/*.rss
|
||||
}
|
||||
|
||||
def index
|
||||
if SiteSetting.allow_index_in_robots_txt
|
||||
path = :index
|
||||
@crawler_delayed_agents = []
|
||||
|
||||
SiteSetting.slow_down_crawler_user_agents.split('|').each do |agent|
|
||||
@crawler_delayed_agents << [agent, SiteSetting.slow_down_crawler_rate]
|
||||
end
|
||||
@crawler_delayed_agents = SiteSetting.slow_down_crawler_user_agents.split('|').map { |agent|
|
||||
[agent, SiteSetting.slow_down_crawler_rate]
|
||||
}
|
||||
|
||||
if SiteSetting.whitelisted_crawler_user_agents.present?
|
||||
@allowed_user_agents = SiteSetting.whitelisted_crawler_user_agents.split('|')
|
||||
|
@ -3,40 +3,14 @@
|
||||
<% @allowed_user_agents.each do |user_agent| %>
|
||||
User-agent: <%= user_agent %>
|
||||
<% end %>
|
||||
Disallow: /auth/cas
|
||||
Disallow: /auth/facebook/callback
|
||||
Disallow: /auth/twitter/callback
|
||||
Disallow: /auth/google/callback
|
||||
Disallow: /auth/yahoo/callback
|
||||
Disallow: /auth/github/callback
|
||||
Disallow: /auth/cas/callback
|
||||
Disallow: /assets/browser-update*.js
|
||||
Disallow: /users/
|
||||
Disallow: /u/
|
||||
Disallow: /badges/
|
||||
Disallow: /search
|
||||
Disallow: /search/
|
||||
Disallow: /tags
|
||||
Disallow: /tags/
|
||||
Disallow: /email/
|
||||
Disallow: /session
|
||||
Disallow: /session/
|
||||
Disallow: /admin
|
||||
Disallow: /admin/
|
||||
Disallow: /user-api-key
|
||||
Disallow: /user-api-key/
|
||||
Disallow: /*?api_key*
|
||||
Disallow: /*?*api_key*
|
||||
Disallow: /groups
|
||||
Disallow: /groups/
|
||||
Disallow: /t/*/*.rss
|
||||
Disallow: /tags/*.rss
|
||||
Disallow: /c/*.rss
|
||||
<% RobotsTxtController::DISALLOWED_PATHS.each do |path| %>
|
||||
Disallow: <%= Discourse.base_uri + path %>
|
||||
<% end %>
|
||||
|
||||
<% if @disallowed_user_agents %>
|
||||
<% @disallowed_user_agents.each do |user_agent| %>
|
||||
User-agent: <%= user_agent %>
|
||||
Disallow: /
|
||||
Disallow: <%= Discourse.base_uri + "/" %>
|
||||
|
||||
<% end %>
|
||||
<% end %>
|
||||
|
@ -1,4 +1,4 @@
|
||||
# See http://www.robotstxt.org/wc/norobots.html for documentation on how to use the robots.txt file
|
||||
#
|
||||
User-agent: *
|
||||
Disallow: /
|
||||
Disallow: <%= Discourse.base_uri + "/" %>
|
||||
|
@ -3,6 +3,14 @@ require 'rails_helper'
|
||||
RSpec.describe RobotsTxtController do
|
||||
describe '#index' do
|
||||
|
||||
context 'subfolder' do
|
||||
it 'prefixes the rules with the directory' do
|
||||
Discourse.stubs(:base_uri).returns('/forum')
|
||||
get '/robots.txt'
|
||||
expect(response.body).to include("\nDisallow: /forum/admin")
|
||||
end
|
||||
end
|
||||
|
||||
context 'crawl delay' do
|
||||
it 'allows you to set crawl delay on particular bots' do
|
||||
SiteSetting.allow_index_in_robots_txt = true
|
||||
|
Loading…
x
Reference in New Issue
Block a user