2019-04-30 08:27:42 +08:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2015-10-11 17:41:23 +08:00
|
|
|
require "rails_helper"
|
2013-10-16 13:39:18 +08:00
|
|
|
|
2020-08-03 14:15:59 +08:00
|
|
|
describe Middleware::AnonymousCache do
|
|
|
|
let(:middleware) { Middleware::AnonymousCache.new(lambda { |_| [200, {}, []] }) }
|
2013-10-16 13:39:18 +08:00
|
|
|
|
2017-07-28 09:20:09 +08:00
|
|
|
def env(opts = {})
|
2020-08-03 14:15:59 +08:00
|
|
|
Rack::MockRequest.env_for("http://test.com/path?bla=1").merge(opts)
|
2015-02-04 13:14:56 +08:00
|
|
|
end
|
|
|
|
|
2020-08-03 14:15:59 +08:00
|
|
|
describe Middleware::AnonymousCache::Helper do
|
|
|
|
def new_helper(opts = {})
|
|
|
|
Middleware::AnonymousCache::Helper.new(env(opts))
|
2013-10-16 13:39:18 +08:00
|
|
|
end
|
|
|
|
|
2020-08-03 14:15:59 +08:00
|
|
|
context "cachable?" do
|
|
|
|
it "true by default" do
|
|
|
|
expect(new_helper.cacheable?).to eq(true)
|
|
|
|
end
|
|
|
|
|
|
|
|
it "is false for non GET" do
|
|
|
|
expect(new_helper("ANON_CACHE_DURATION" => 10, "REQUEST_METHOD" => "POST").cacheable?).to eq(false)
|
|
|
|
end
|
|
|
|
|
|
|
|
it "is false if it has an auth cookie" do
|
|
|
|
expect(new_helper("HTTP_COOKIE" => "jack=1; _t=#{"1" * 32}; jill=2").cacheable?).to eq(false)
|
|
|
|
end
|
2013-10-16 13:39:18 +08:00
|
|
|
end
|
2013-10-17 07:37:06 +08:00
|
|
|
|
2020-08-03 14:15:59 +08:00
|
|
|
context "per theme cache" do
|
|
|
|
it "handles theme keys" do
|
|
|
|
theme = Fabricate(:theme, user_selectable: true)
|
|
|
|
|
|
|
|
with_bad_theme_key = new_helper("HTTP_COOKIE" => "theme_ids=abc").cache_key
|
|
|
|
with_no_theme_key = new_helper().cache_key
|
|
|
|
|
|
|
|
expect(with_bad_theme_key).to eq(with_no_theme_key)
|
|
|
|
|
|
|
|
with_good_theme_key = new_helper("HTTP_COOKIE" => "theme_ids=#{theme.id}").cache_key
|
|
|
|
|
|
|
|
expect(with_good_theme_key).not_to eq(with_no_theme_key)
|
|
|
|
end
|
2013-10-17 07:37:06 +08:00
|
|
|
end
|
2013-10-16 13:39:18 +08:00
|
|
|
|
2020-08-03 14:15:59 +08:00
|
|
|
context "cached" do
|
|
|
|
let!(:helper) do
|
|
|
|
new_helper("ANON_CACHE_DURATION" => 10)
|
|
|
|
end
|
|
|
|
|
|
|
|
let!(:crawler) do
|
|
|
|
new_helper("ANON_CACHE_DURATION" => 10, "HTTP_USER_AGENT" => "AdsBot-Google (+http://www.google.com/adsbot.html)")
|
|
|
|
end
|
|
|
|
|
|
|
|
after do
|
|
|
|
helper.clear_cache
|
|
|
|
crawler.clear_cache
|
|
|
|
end
|
2017-06-15 21:36:27 +08:00
|
|
|
|
2020-08-03 14:15:59 +08:00
|
|
|
before do
|
|
|
|
global_setting :anon_cache_store_threshold, 1
|
|
|
|
end
|
2017-06-15 21:36:27 +08:00
|
|
|
|
2020-08-03 14:15:59 +08:00
|
|
|
it "compresses body on demand" do
|
|
|
|
global_setting :compress_anon_cache, true
|
2017-06-15 21:36:27 +08:00
|
|
|
|
2020-08-03 14:15:59 +08:00
|
|
|
payload = "x" * 1000
|
|
|
|
helper.cache([200, { "HELLO" => "WORLD" }, [payload]])
|
2017-06-15 21:36:27 +08:00
|
|
|
|
2020-08-03 14:15:59 +08:00
|
|
|
helper = new_helper("ANON_CACHE_DURATION" => 10)
|
|
|
|
expect(helper.cached).to eq([200, { "X-Discourse-Cached" => "true", "HELLO" => "WORLD" }, [payload]])
|
|
|
|
|
|
|
|
# depends on i7z implementation, but lets assume it is stable unless we discover
|
|
|
|
# otherwise
|
|
|
|
expect(Discourse.redis.get(helper.cache_key_body).length).to eq(16)
|
|
|
|
end
|
|
|
|
|
|
|
|
it "handles brotli switching" do
|
|
|
|
helper.cache([200, { "HELLO" => "WORLD" }, ["hello ", "my world"]])
|
|
|
|
|
|
|
|
helper = new_helper("ANON_CACHE_DURATION" => 10)
|
|
|
|
expect(helper.cached).to eq([200, { "X-Discourse-Cached" => "true", "HELLO" => "WORLD" }, ["hello my world"]])
|
|
|
|
|
|
|
|
helper = new_helper("ANON_CACHE_DURATION" => 10, "HTTP_ACCEPT_ENCODING" => "gz, br")
|
|
|
|
expect(helper.cached).to eq(nil)
|
|
|
|
end
|
|
|
|
|
|
|
|
it "returns cached data for cached requests" do
|
|
|
|
helper.is_mobile = true
|
|
|
|
expect(helper.cached).to eq(nil)
|
|
|
|
helper.cache([200, { "HELLO" => "WORLD" }, ["hello ", "my world"]])
|
|
|
|
|
|
|
|
helper = new_helper("ANON_CACHE_DURATION" => 10)
|
|
|
|
helper.is_mobile = true
|
|
|
|
expect(helper.cached).to eq([200, { "X-Discourse-Cached" => "true", "HELLO" => "WORLD" }, ["hello my world"]])
|
|
|
|
|
|
|
|
expect(crawler.cached).to eq(nil)
|
|
|
|
crawler.cache([200, { "HELLO" => "WORLD" }, ["hello ", "world"]])
|
|
|
|
expect(crawler.cached).to eq([200, { "X-Discourse-Cached" => "true", "HELLO" => "WORLD" }, ["hello world"]])
|
|
|
|
end
|
2017-06-15 21:36:27 +08:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2018-04-18 14:58:40 +08:00
|
|
|
context 'force_anonymous!' do
|
|
|
|
before do
|
|
|
|
RateLimiter.enable
|
|
|
|
end
|
|
|
|
|
|
|
|
after do
|
|
|
|
RateLimiter.disable
|
|
|
|
end
|
|
|
|
|
|
|
|
it 'will revert to anonymous once we reach the limit' do
|
|
|
|
|
|
|
|
RateLimiter.clear_all!
|
|
|
|
|
|
|
|
is_anon = false
|
|
|
|
|
|
|
|
app = Middleware::AnonymousCache.new(
|
|
|
|
lambda do |env|
|
|
|
|
is_anon = env["HTTP_COOKIE"].nil?
|
|
|
|
[200, {}, ["ok"]]
|
|
|
|
end
|
|
|
|
)
|
|
|
|
|
|
|
|
global_setting :force_anonymous_min_per_10_seconds, 2
|
|
|
|
global_setting :force_anonymous_min_queue_seconds, 1
|
|
|
|
|
|
|
|
env = {
|
|
|
|
"HTTP_COOKIE" => "_t=#{SecureRandom.hex}",
|
|
|
|
"HOST" => "site.com",
|
|
|
|
"REQUEST_METHOD" => "GET",
|
|
|
|
"REQUEST_URI" => "/somewhere/rainbow",
|
|
|
|
"REQUEST_QUEUE_SECONDS" => 2.1,
|
|
|
|
"rack.input" => StringIO.new
|
|
|
|
}
|
|
|
|
|
2018-04-23 09:54:58 +08:00
|
|
|
is_anon = false
|
|
|
|
app.call(env.dup)
|
2018-04-18 14:58:40 +08:00
|
|
|
expect(is_anon).to eq(false)
|
|
|
|
|
2018-04-23 09:54:58 +08:00
|
|
|
is_anon = false
|
|
|
|
app.call(env.dup)
|
2018-04-18 14:58:40 +08:00
|
|
|
expect(is_anon).to eq(false)
|
|
|
|
|
2018-04-23 09:54:58 +08:00
|
|
|
is_anon = false
|
|
|
|
app.call(env.dup)
|
2018-04-18 14:58:40 +08:00
|
|
|
expect(is_anon).to eq(true)
|
|
|
|
|
2018-04-23 09:54:58 +08:00
|
|
|
is_anon = false
|
|
|
|
_status, headers, _body = app.call(env.dup)
|
2018-04-18 14:58:40 +08:00
|
|
|
expect(is_anon).to eq(true)
|
2018-04-24 11:24:26 +08:00
|
|
|
expect(headers['Set-Cookie']).to eq('dosp=1; Path=/')
|
2018-04-23 09:54:58 +08:00
|
|
|
|
|
|
|
# tricky change, a 50ms delay still will trigger protection
|
|
|
|
# once it is tripped
|
|
|
|
|
|
|
|
env["REQUEST_QUEUE_SECONDS"] = 0.05
|
|
|
|
is_anon = false
|
|
|
|
|
|
|
|
app.call(env.dup)
|
|
|
|
expect(is_anon).to eq(true)
|
|
|
|
|
|
|
|
is_anon = false
|
|
|
|
env["REQUEST_QUEUE_SECONDS"] = 0.01
|
|
|
|
|
|
|
|
app.call(env.dup)
|
|
|
|
expect(is_anon).to eq(false)
|
2018-04-18 14:58:40 +08:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-08-03 14:11:17 +08:00
|
|
|
context 'invalid request payload' do
|
|
|
|
it 'returns 413 for GET request with payload' do
|
|
|
|
status, _, _ = middleware.call(env.tap do |environment|
|
|
|
|
environment[Rack::RACK_INPUT].write("test")
|
|
|
|
end)
|
|
|
|
|
|
|
|
expect(status).to eq(413)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2018-07-04 09:14:43 +08:00
|
|
|
context "crawler blocking" do
|
|
|
|
let :non_crawler do
|
|
|
|
{
|
|
|
|
"HTTP_USER_AGENT" =>
|
|
|
|
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def get(path, options)
|
|
|
|
@env = env({
|
|
|
|
"REQUEST_URI" => path,
|
|
|
|
"PATH_INFO" => path,
|
|
|
|
"REQUEST_PATH" => path
|
|
|
|
}.merge(options[:headers]))
|
2018-09-15 03:39:24 +08:00
|
|
|
@status, @response_header, @response = middleware.call(@env)
|
2018-07-04 09:14:43 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
it "applies whitelisted_crawler_user_agents correctly" do
|
|
|
|
SiteSetting.whitelisted_crawler_user_agents = 'Googlebot'
|
|
|
|
|
2018-07-18 10:33:06 +08:00
|
|
|
get '/', headers: {
|
2018-07-04 09:14:43 +08:00
|
|
|
'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
|
|
|
|
}
|
|
|
|
|
|
|
|
expect(@status).to eq(200)
|
|
|
|
|
2018-07-18 10:33:06 +08:00
|
|
|
get '/', headers: {
|
2018-07-04 09:14:43 +08:00
|
|
|
'HTTP_USER_AGENT' => 'Anotherbot/2.1 (+http://www.notgoogle.com/bot.html)'
|
|
|
|
}
|
|
|
|
|
|
|
|
expect(@status).to eq(403)
|
2018-09-15 03:39:24 +08:00
|
|
|
expect(@response).to be_an(Array)
|
2018-07-04 09:14:43 +08:00
|
|
|
|
2018-07-18 10:33:06 +08:00
|
|
|
get '/', headers: non_crawler
|
2018-07-04 09:14:43 +08:00
|
|
|
expect(@status).to eq(200)
|
|
|
|
end
|
|
|
|
|
2018-09-15 03:34:21 +08:00
|
|
|
it "doesn't block api requests" do
|
|
|
|
SiteSetting.whitelisted_crawler_user_agents = 'Googlebot'
|
|
|
|
api_key = Fabricate(:api_key)
|
|
|
|
|
|
|
|
get "/latest?api_key=#{api_key.key}&api_username=system", headers: {
|
|
|
|
"QUERY_STRING" => "api_key=#{api_key.key}&api_username=system"
|
|
|
|
}
|
|
|
|
expect(@status).to eq(200)
|
|
|
|
end
|
|
|
|
|
2018-07-04 09:14:43 +08:00
|
|
|
it "applies blacklisted_crawler_user_agents correctly" do
|
|
|
|
SiteSetting.blacklisted_crawler_user_agents = 'Googlebot'
|
|
|
|
|
2018-07-18 10:33:06 +08:00
|
|
|
get '/', headers: non_crawler
|
2018-07-04 09:14:43 +08:00
|
|
|
expect(@status).to eq(200)
|
|
|
|
|
2018-07-18 10:33:06 +08:00
|
|
|
get '/', headers: {
|
2018-07-04 09:14:43 +08:00
|
|
|
'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
|
|
|
|
}
|
|
|
|
|
|
|
|
expect(@status).to eq(403)
|
|
|
|
|
2018-07-18 10:33:06 +08:00
|
|
|
get '/', headers: {
|
2018-07-04 09:14:43 +08:00
|
|
|
'HTTP_USER_AGENT' => 'Twitterbot/2.1 (+http://www.notgoogle.com/bot.html)'
|
|
|
|
}
|
|
|
|
|
|
|
|
expect(@status).to eq(200)
|
|
|
|
end
|
|
|
|
|
|
|
|
it "should never block robots.txt" do
|
|
|
|
SiteSetting.blacklisted_crawler_user_agents = 'Googlebot'
|
|
|
|
|
|
|
|
get '/robots.txt', headers: {
|
|
|
|
'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
|
|
|
|
}
|
|
|
|
|
|
|
|
expect(@status).to eq(200)
|
|
|
|
end
|
|
|
|
|
2018-07-18 10:33:06 +08:00
|
|
|
it "should never block srv/status" do
|
2018-07-04 09:14:43 +08:00
|
|
|
SiteSetting.blacklisted_crawler_user_agents = 'Googlebot'
|
|
|
|
|
|
|
|
get '/srv/status', headers: {
|
|
|
|
'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
|
|
|
|
}
|
|
|
|
|
2018-07-18 10:33:06 +08:00
|
|
|
expect(@status).to eq(200)
|
|
|
|
end
|
|
|
|
|
|
|
|
it "blocked crawlers shouldn't log page views" do
|
|
|
|
SiteSetting.blacklisted_crawler_user_agents = 'Googlebot'
|
|
|
|
|
|
|
|
get '/', headers: {
|
|
|
|
'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
|
|
|
|
}
|
|
|
|
|
2018-07-04 09:14:43 +08:00
|
|
|
expect(@env["discourse.request_tracker.skip"]).to eq(true)
|
|
|
|
end
|
|
|
|
|
|
|
|
it "blocks json requests" do
|
|
|
|
SiteSetting.blacklisted_crawler_user_agents = 'Googlebot'
|
|
|
|
|
|
|
|
get '/srv/status.json', headers: {
|
|
|
|
'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
|
|
|
|
}
|
|
|
|
|
|
|
|
expect(@status).to eq(403)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2013-10-16 13:39:18 +08:00
|
|
|
end
|