discourse/spec/requests/application_controller_spec.rb
Sam 7f98ed69cd FIX: move crawler blocking to app controller
We need access to site settings in multisite, we do not have access
yet if we attempt to get them in request tracker middleware
2018-07-04 10:30:50 +10:00

104 lines
3.0 KiB
Ruby

require 'rails_helper'
RSpec.describe ApplicationController do
describe '#redirect_to_login_if_required' do
let(:admin) { Fabricate(:admin) }
before do
admin # to skip welcome wizard at home page `/`
SiteSetting.login_required = true
end
it "should carry-forward authComplete param to login page redirect" do
get "/?authComplete=true"
expect(response).to redirect_to('/login?authComplete=true')
end
end
describe 'build_not_found_page' do
describe 'topic not found' do
it 'should return 404 and show Google search' do
get "/t/nope-nope/99999999"
expect(response.status).to eq(404)
expect(response.body).to include(I18n.t('page_not_found.search_google'))
end
it 'should not include Google search if login_required is enabled' do
SiteSetting.login_required = true
sign_in(Fabricate(:user))
get "/t/nope-nope/99999999"
expect(response.status).to eq(404)
expect(response.body).to_not include('google.com/search')
end
end
end
context "crawler blocking" do
let :non_crawler do
{
"HTTP_USER_AGENT" =>
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
}
end
it "applies whitelisted_crawler_user_agents correctly" do
SiteSetting.whitelisted_crawler_user_agents = 'Googlebot'
get '/srv/status', headers: {
'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
}
expect(response.status).to eq(200)
get '/srv/status', headers: {
'HTTP_USER_AGENT' => 'Anotherbot/2.1 (+http://www.notgoogle.com/bot.html)'
}
expect(response.status).to eq(403)
get '/srv/status', headers: non_crawler
expect(response.status).to eq(200)
end
it "applies blacklisted_crawler_user_agents correctly" do
SiteSetting.blacklisted_crawler_user_agents = 'Googlebot'
get '/srv/status', headers: non_crawler
expect(response.status).to eq(200)
get '/srv/status', headers: {
'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
}
expect(response.status).to eq(403)
get '/srv/status', headers: {
'HTTP_USER_AGENT' => 'Twitterbot/2.1 (+http://www.notgoogle.com/bot.html)'
}
expect(response.status).to eq(200)
end
it "blocked crawlers shouldn't log page views" do
ApplicationRequest.clear_cache!
SiteSetting.blacklisted_crawler_user_agents = 'Googlebot'
expect {
get '/srv/status', headers: {
'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
}
ApplicationRequest.write_cache!
}.to_not change { ApplicationRequest.count }
end
it "blocks json requests" do
SiteSetting.blacklisted_crawler_user_agents = 'Googlebot'
get '/srv/status.json', headers: {
'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
}
expect(response.status).to eq(403)
end
end
end