mirror of
https://github.com/discourse/discourse.git
synced 2024-11-25 04:52:52 +08:00
489c22d93c
This stops crawlers from hitting tags and category rss feeds to discover new content, instead they should focus on latest/posts if they need to consume something regular
50 lines
1.1 KiB
Plaintext
50 lines
1.1 KiB
Plaintext
# See http://www.robotstxt.org/wc/norobots.html for documentation on how to use the robots.txt file
|
|
#
|
|
<% @allowed_user_agents.each do |user_agent| %>
|
|
User-agent: <%= user_agent %>
|
|
<% end %>
|
|
Disallow: /auth/cas
|
|
Disallow: /auth/facebook/callback
|
|
Disallow: /auth/twitter/callback
|
|
Disallow: /auth/google/callback
|
|
Disallow: /auth/yahoo/callback
|
|
Disallow: /auth/github/callback
|
|
Disallow: /auth/cas/callback
|
|
Disallow: /assets/browser-update*.js
|
|
Disallow: /users/
|
|
Disallow: /u/
|
|
Disallow: /badges/
|
|
Disallow: /search
|
|
Disallow: /search/
|
|
Disallow: /tags
|
|
Disallow: /tags/
|
|
Disallow: /email/
|
|
Disallow: /session
|
|
Disallow: /session/
|
|
Disallow: /admin
|
|
Disallow: /admin/
|
|
Disallow: /user-api-key
|
|
Disallow: /user-api-key/
|
|
Disallow: /*?api_key*
|
|
Disallow: /*?*api_key*
|
|
Disallow: /groups
|
|
Disallow: /groups/
|
|
Disallow: /t/*/*.rss
|
|
Disallow: /tags/*.rss
|
|
Disallow: /c/*.rss
|
|
|
|
<% if @disallowed_user_agents %>
|
|
<% @disallowed_user_agents.each do |user_agent| %>
|
|
User-agent: <%= user_agent %>
|
|
Disallow: /
|
|
|
|
<% end %>
|
|
<% end %>
|
|
|
|
<%= server_plugin_outlet "robots_txt_index" %>
|
|
|
|
<% @crawler_delayed_agents.each do |agent, delay| %>
|
|
User-agent: <%= agent %>
|
|
Crawl-delay: <%= delay %>
|
|
<% end %>
|