discourse/spec/lib/middleware/anonymous_cache_spec.rb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

425 lines
13 KiB
Ruby
Raw Normal View History

# frozen_string_literal: true
RSpec.describe Middleware::AnonymousCache do
let(:middleware) { Middleware::AnonymousCache.new(lambda { |_| [200, {}, []] }) }
before { Middleware::AnonymousCache.enable_anon_cache }
def env(opts = {})
create_request_env(path: opts.delete(:path) || "http://test.com/path?bla=1").merge(opts)
end
describe Middleware::AnonymousCache::Helper do
def new_helper(opts = {})
Middleware::AnonymousCache::Helper.new(env(opts))
end
describe "#cacheable?" do
it "true by default" do
expect(new_helper.cacheable?).to eq(true)
end
it "is false for non GET" do
expect(
new_helper("ANON_CACHE_DURATION" => 10, "REQUEST_METHOD" => "POST").cacheable?,
).to eq(false)
end
FEATURE: Apply rate limits per user instead of IP for trusted users (#14706) Currently, Discourse rate limits all incoming requests by the IP address they originate from regardless of the user making the request. This can be frustrating if there are multiple users using Discourse simultaneously while sharing the same IP address (e.g. employees in an office). This commit implements a new feature to make Discourse apply rate limits by user id rather than IP address for users at or higher than the configured trust level (1 is the default). For example, let's say a Discourse instance is configured to allow 200 requests per minute per IP address, and we have 10 users at trust level 4 using Discourse simultaneously from the same IP address. Before this feature, the 10 users could only make a total of 200 requests per minute before they got rate limited. But with the new feature, each user is allowed to make 200 requests per minute because the rate limits are applied on user id rather than the IP address. The minimum trust level for applying user-id-based rate limits can be configured by the `skip_per_ip_rate_limit_trust_level` global setting. The default is 1, but it can be changed by either adding the `DISCOURSE_SKIP_PER_IP_RATE_LIMIT_TRUST_LEVEL` environment variable with the desired value to your `app.yml`, or changing the setting's value in the `discourse.conf` file. Requests made with API keys are still rate limited by IP address and the relevant global settings that control API keys rate limits. Before this commit, Discourse's auth cookie (`_t`) was simply a 32 characters string that Discourse used to lookup the current user from the database and the cookie contained no additional information about the user. However, we had to change the cookie content in this commit so we could identify the user from the cookie without making a database query before the rate limits logic and avoid introducing a bottleneck on busy sites. Besides the 32 characters auth token, the cookie now includes the user id, trust level and the cookie's generation date, and we encrypt/sign the cookie to prevent tampering. Internal ticket number: t54739.
2021-11-18 04:27:30 +08:00
it "is false if it has a valid auth cookie" do
cookie = create_auth_cookie(token: SecureRandom.hex)
expect(new_helper("HTTP_COOKIE" => "jack=1; _t=#{cookie}; jill=2").cacheable?).to eq(false)
end
it "is true if it has an invalid auth cookie" do
cookie = create_auth_cookie(token: SecureRandom.hex, issued_at: 5.minutes.ago)
cookie = swap_2_different_characters(cookie)
cookie.prepend("%a0%a1") # an invalid byte sequence
FEATURE: Apply rate limits per user instead of IP for trusted users (#14706) Currently, Discourse rate limits all incoming requests by the IP address they originate from regardless of the user making the request. This can be frustrating if there are multiple users using Discourse simultaneously while sharing the same IP address (e.g. employees in an office). This commit implements a new feature to make Discourse apply rate limits by user id rather than IP address for users at or higher than the configured trust level (1 is the default). For example, let's say a Discourse instance is configured to allow 200 requests per minute per IP address, and we have 10 users at trust level 4 using Discourse simultaneously from the same IP address. Before this feature, the 10 users could only make a total of 200 requests per minute before they got rate limited. But with the new feature, each user is allowed to make 200 requests per minute because the rate limits are applied on user id rather than the IP address. The minimum trust level for applying user-id-based rate limits can be configured by the `skip_per_ip_rate_limit_trust_level` global setting. The default is 1, but it can be changed by either adding the `DISCOURSE_SKIP_PER_IP_RATE_LIMIT_TRUST_LEVEL` environment variable with the desired value to your `app.yml`, or changing the setting's value in the `discourse.conf` file. Requests made with API keys are still rate limited by IP address and the relevant global settings that control API keys rate limits. Before this commit, Discourse's auth cookie (`_t`) was simply a 32 characters string that Discourse used to lookup the current user from the database and the cookie contained no additional information about the user. However, we had to change the cookie content in this commit so we could identify the user from the cookie without making a database query before the rate limits logic and avoid introducing a bottleneck on busy sites. Besides the 32 characters auth token, the cookie now includes the user id, trust level and the cookie's generation date, and we encrypt/sign the cookie to prevent tampering. Internal ticket number: t54739.
2021-11-18 04:27:30 +08:00
expect(new_helper("HTTP_COOKIE" => "jack=1; _t=#{cookie}; jill=2").cacheable?).to eq(true)
end
it "is false for srv/status routes" do
expect(new_helper("PATH_INFO" => "/srv/status").cacheable?).to eq(false)
end
it "is false for API requests using header" do
expect(new_helper("HTTP_API_KEY" => "abcde").cacheable?).to eq(false)
end
it "is false for API requests using parameter" do
expect(new_helper(path: "/path?api_key=abc").cacheable?).to eq(false)
end
it "is false for User API requests using header" do
expect(new_helper("HTTP_USER_API_KEY" => "abcde").cacheable?).to eq(false)
end
end
describe "per theme cache" do
it "handles theme keys" do
theme = Fabricate(:theme, user_selectable: true)
with_bad_theme_key = new_helper("HTTP_COOKIE" => "theme_ids=abc").cache_key
with_no_theme_key = new_helper().cache_key
expect(with_bad_theme_key).to eq(with_no_theme_key)
with_good_theme_key = new_helper("HTTP_COOKIE" => "theme_ids=#{theme.id}").cache_key
expect(with_good_theme_key).not_to eq(with_no_theme_key)
end
end
context "with header or cookie based custom locale" do
it "handles different languages" do
# Normally does not check the language header
french1 = new_helper("HTTP_ACCEPT_LANGUAGE" => "fr").cache_key
french2 = new_helper("HTTP_ACCEPT_LANGUAGE" => "FR").cache_key
english = new_helper("HTTP_ACCEPT_LANGUAGE" => SiteSetting.default_locale).cache_key
none = new_helper.cache_key
expect(none).to eq(french1)
expect(none).to eq(french2)
expect(none).to eq(english)
SiteSetting.allow_user_locale = true
SiteSetting.set_locale_from_accept_language_header = true
french1 = new_helper("HTTP_ACCEPT_LANGUAGE" => "fr").cache_key
french2 = new_helper("HTTP_ACCEPT_LANGUAGE" => "FR").cache_key
english = new_helper("HTTP_ACCEPT_LANGUAGE" => SiteSetting.default_locale).cache_key
none = new_helper.cache_key
expect(none).to eq(english)
expect(french1).to eq(french2)
expect(french1).not_to eq(none)
SiteSetting.set_locale_from_cookie = true
expect(new_helper("HTTP_COOKIE" => "locale=es;").cache_key).to include("l=es")
end
end
it "handles old browsers" do
SiteSetting.browser_update_user_agents = "my_old_browser"
key1 = new_helper("HTTP_USER_AGENT" => "my_old_browser").cache_key
key2 = new_helper("HTTP_USER_AGENT" => "my_new_browser").cache_key
expect(key1).not_to eq(key2)
end
it "handles modern mobile browsers" do
key1 = new_helper("HTTP_USER_AGENT" => "Safari (iPhone OS 7)").cache_key
key2 = new_helper("HTTP_USER_AGENT" => "Safari (iPhone OS 15)").cache_key
expect(key1).not_to eq(key2)
end
it "handles user agents with invalid bytes" do
agent = (+"Evil Googlebot String \xc3\x28").force_encoding("ASCII")
expect {
key1 = new_helper("HTTP_USER_AGENT" => agent).cache_key
key2 =
new_helper(
"HTTP_USER_AGENT" => agent.encode("utf-8", invalid: :replace, undef: :replace),
).cache_key
expect(key1).to eq(key2)
}.not_to raise_error
end
context "when cached" do
let!(:helper) { new_helper("ANON_CACHE_DURATION" => 10) }
let!(:crawler) do
new_helper(
"ANON_CACHE_DURATION" => 10,
"HTTP_USER_AGENT" => "AdsBot-Google (+http://www.google.com/adsbot.html)",
)
end
after do
helper.clear_cache
crawler.clear_cache
end
before { global_setting :anon_cache_store_threshold, 1 }
it "compresses body on demand" do
global_setting :compress_anon_cache, true
payload = "x" * 1000
helper.cache([200, { "HELLO" => "WORLD" }, [payload]])
helper = new_helper("ANON_CACHE_DURATION" => 10)
expect(helper.cached).to eq(
[200, { "X-Discourse-Cached" => "true", "HELLO" => "WORLD" }, [payload]],
)
# depends on i7z implementation, but lets assume it is stable unless we discover
# otherwise
expect(Discourse.redis.get(helper.cache_key_body).length).to eq(16)
end
it "handles brotli switching" do
helper.cache([200, { "HELLO" => "WORLD" }, ["hello ", "my world"]])
helper = new_helper("ANON_CACHE_DURATION" => 10)
expect(helper.cached).to eq(
[200, { "X-Discourse-Cached" => "true", "HELLO" => "WORLD" }, ["hello my world"]],
)
helper = new_helper("ANON_CACHE_DURATION" => 10, "HTTP_ACCEPT_ENCODING" => "gz, br")
expect(helper.cached).to eq(nil)
end
it "returns cached data for cached requests" do
helper.is_mobile = true
expect(helper.cached).to eq(nil)
helper.cache([200, { "HELLO" => "WORLD" }, ["hello ", "my world"]])
helper = new_helper("ANON_CACHE_DURATION" => 10)
helper.is_mobile = true
expect(helper.cached).to eq(
[200, { "X-Discourse-Cached" => "true", "HELLO" => "WORLD" }, ["hello my world"]],
)
expect(crawler.cached).to eq(nil)
crawler.cache([200, { "HELLO" => "WORLD" }, ["hello ", "world"]])
expect(crawler.cached).to eq(
[200, { "X-Discourse-Cached" => "true", "HELLO" => "WORLD" }, ["hello world"]],
)
end
end
end
describe "background request rate limit" do
it "will rate limit background requests" do
app = Middleware::AnonymousCache.new(lambda { |env| [200, {}, ["ok"]] })
global_setting :background_requests_max_queue_length, "0.5"
FEATURE: Apply rate limits per user instead of IP for trusted users (#14706) Currently, Discourse rate limits all incoming requests by the IP address they originate from regardless of the user making the request. This can be frustrating if there are multiple users using Discourse simultaneously while sharing the same IP address (e.g. employees in an office). This commit implements a new feature to make Discourse apply rate limits by user id rather than IP address for users at or higher than the configured trust level (1 is the default). For example, let's say a Discourse instance is configured to allow 200 requests per minute per IP address, and we have 10 users at trust level 4 using Discourse simultaneously from the same IP address. Before this feature, the 10 users could only make a total of 200 requests per minute before they got rate limited. But with the new feature, each user is allowed to make 200 requests per minute because the rate limits are applied on user id rather than the IP address. The minimum trust level for applying user-id-based rate limits can be configured by the `skip_per_ip_rate_limit_trust_level` global setting. The default is 1, but it can be changed by either adding the `DISCOURSE_SKIP_PER_IP_RATE_LIMIT_TRUST_LEVEL` environment variable with the desired value to your `app.yml`, or changing the setting's value in the `discourse.conf` file. Requests made with API keys are still rate limited by IP address and the relevant global settings that control API keys rate limits. Before this commit, Discourse's auth cookie (`_t`) was simply a 32 characters string that Discourse used to lookup the current user from the database and the cookie contained no additional information about the user. However, we had to change the cookie content in this commit so we could identify the user from the cookie without making a database query before the rate limits logic and avoid introducing a bottleneck on busy sites. Besides the 32 characters auth token, the cookie now includes the user id, trust level and the cookie's generation date, and we encrypt/sign the cookie to prevent tampering. Internal ticket number: t54739.
2021-11-18 04:27:30 +08:00
cookie = create_auth_cookie(token: SecureRandom.hex)
env =
create_request_env.merge(
"HTTP_COOKIE" => "_t=#{cookie}",
"HOST" => "site.com",
"REQUEST_METHOD" => "GET",
"REQUEST_URI" => "/somewhere/rainbow",
"REQUEST_QUEUE_SECONDS" => 2.1,
"rack.input" => StringIO.new,
FEATURE: Apply rate limits per user instead of IP for trusted users (#14706) Currently, Discourse rate limits all incoming requests by the IP address they originate from regardless of the user making the request. This can be frustrating if there are multiple users using Discourse simultaneously while sharing the same IP address (e.g. employees in an office). This commit implements a new feature to make Discourse apply rate limits by user id rather than IP address for users at or higher than the configured trust level (1 is the default). For example, let's say a Discourse instance is configured to allow 200 requests per minute per IP address, and we have 10 users at trust level 4 using Discourse simultaneously from the same IP address. Before this feature, the 10 users could only make a total of 200 requests per minute before they got rate limited. But with the new feature, each user is allowed to make 200 requests per minute because the rate limits are applied on user id rather than the IP address. The minimum trust level for applying user-id-based rate limits can be configured by the `skip_per_ip_rate_limit_trust_level` global setting. The default is 1, but it can be changed by either adding the `DISCOURSE_SKIP_PER_IP_RATE_LIMIT_TRUST_LEVEL` environment variable with the desired value to your `app.yml`, or changing the setting's value in the `discourse.conf` file. Requests made with API keys are still rate limited by IP address and the relevant global settings that control API keys rate limits. Before this commit, Discourse's auth cookie (`_t`) was simply a 32 characters string that Discourse used to lookup the current user from the database and the cookie contained no additional information about the user. However, we had to change the cookie content in this commit so we could identify the user from the cookie without making a database query before the rate limits logic and avoid introducing a bottleneck on busy sites. Besides the 32 characters auth token, the cookie now includes the user id, trust level and the cookie's generation date, and we encrypt/sign the cookie to prevent tampering. Internal ticket number: t54739.
2021-11-18 04:27:30 +08:00
)
# non background ... long request
env["REQUEST_QUEUE_SECONDS"] = 2
status, _ = app.call(env.dup)
expect(status).to eq(200)
env["HTTP_DISCOURSE_BACKGROUND"] = "true"
status, headers, body = app.call(env.dup)
expect(status).to eq(429)
expect(headers["content-type"]).to eq("application/json; charset=utf-8")
json = JSON.parse(body.join)
expect(json["extras"]["wait_seconds"]).to be > 4.9
env["REQUEST_QUEUE_SECONDS"] = 0.4
status, _ = app.call(env.dup)
expect(status).to eq(200)
end
end
describe "#force_anonymous!" do
before { RateLimiter.enable }
use_redis_snapshotting
it "will revert to anonymous once we reach the limit" do
is_anon = false
app =
Middleware::AnonymousCache.new(
lambda do |env|
is_anon = env["HTTP_COOKIE"].nil? && env["HTTP_DISCOURSE_LOGGED_IN"].nil?
[200, {}, ["ok"]]
end,
)
global_setting :force_anonymous_min_per_10_seconds, 2
global_setting :force_anonymous_min_queue_seconds, 1
FEATURE: Apply rate limits per user instead of IP for trusted users (#14706) Currently, Discourse rate limits all incoming requests by the IP address they originate from regardless of the user making the request. This can be frustrating if there are multiple users using Discourse simultaneously while sharing the same IP address (e.g. employees in an office). This commit implements a new feature to make Discourse apply rate limits by user id rather than IP address for users at or higher than the configured trust level (1 is the default). For example, let's say a Discourse instance is configured to allow 200 requests per minute per IP address, and we have 10 users at trust level 4 using Discourse simultaneously from the same IP address. Before this feature, the 10 users could only make a total of 200 requests per minute before they got rate limited. But with the new feature, each user is allowed to make 200 requests per minute because the rate limits are applied on user id rather than the IP address. The minimum trust level for applying user-id-based rate limits can be configured by the `skip_per_ip_rate_limit_trust_level` global setting. The default is 1, but it can be changed by either adding the `DISCOURSE_SKIP_PER_IP_RATE_LIMIT_TRUST_LEVEL` environment variable with the desired value to your `app.yml`, or changing the setting's value in the `discourse.conf` file. Requests made with API keys are still rate limited by IP address and the relevant global settings that control API keys rate limits. Before this commit, Discourse's auth cookie (`_t`) was simply a 32 characters string that Discourse used to lookup the current user from the database and the cookie contained no additional information about the user. However, we had to change the cookie content in this commit so we could identify the user from the cookie without making a database query before the rate limits logic and avoid introducing a bottleneck on busy sites. Besides the 32 characters auth token, the cookie now includes the user id, trust level and the cookie's generation date, and we encrypt/sign the cookie to prevent tampering. Internal ticket number: t54739.
2021-11-18 04:27:30 +08:00
cookie = create_auth_cookie(token: SecureRandom.hex)
env =
create_request_env.merge(
"HTTP_COOKIE" => "_t=#{cookie}",
"HTTP_DISCOURSE_LOGGED_IN" => "true",
"HOST" => "site.com",
"REQUEST_METHOD" => "GET",
"REQUEST_URI" => "/somewhere/rainbow",
"REQUEST_QUEUE_SECONDS" => 2.1,
"rack.input" => StringIO.new,
FEATURE: Apply rate limits per user instead of IP for trusted users (#14706) Currently, Discourse rate limits all incoming requests by the IP address they originate from regardless of the user making the request. This can be frustrating if there are multiple users using Discourse simultaneously while sharing the same IP address (e.g. employees in an office). This commit implements a new feature to make Discourse apply rate limits by user id rather than IP address for users at or higher than the configured trust level (1 is the default). For example, let's say a Discourse instance is configured to allow 200 requests per minute per IP address, and we have 10 users at trust level 4 using Discourse simultaneously from the same IP address. Before this feature, the 10 users could only make a total of 200 requests per minute before they got rate limited. But with the new feature, each user is allowed to make 200 requests per minute because the rate limits are applied on user id rather than the IP address. The minimum trust level for applying user-id-based rate limits can be configured by the `skip_per_ip_rate_limit_trust_level` global setting. The default is 1, but it can be changed by either adding the `DISCOURSE_SKIP_PER_IP_RATE_LIMIT_TRUST_LEVEL` environment variable with the desired value to your `app.yml`, or changing the setting's value in the `discourse.conf` file. Requests made with API keys are still rate limited by IP address and the relevant global settings that control API keys rate limits. Before this commit, Discourse's auth cookie (`_t`) was simply a 32 characters string that Discourse used to lookup the current user from the database and the cookie contained no additional information about the user. However, we had to change the cookie content in this commit so we could identify the user from the cookie without making a database query before the rate limits logic and avoid introducing a bottleneck on busy sites. Besides the 32 characters auth token, the cookie now includes the user id, trust level and the cookie's generation date, and we encrypt/sign the cookie to prevent tampering. Internal ticket number: t54739.
2021-11-18 04:27:30 +08:00
)
is_anon = false
app.call(env.dup)
expect(is_anon).to eq(false)
is_anon = false
app.call(env.dup)
expect(is_anon).to eq(false)
is_anon = false
app.call(env.dup)
expect(is_anon).to eq(true)
is_anon = false
_status, headers, _body = app.call(env.dup)
expect(is_anon).to eq(true)
2018-04-24 11:24:26 +08:00
expect(headers["Set-Cookie"]).to eq("dosp=1; Path=/")
# tricky change, a 50ms delay still will trigger protection
# once it is tripped
env["REQUEST_QUEUE_SECONDS"] = 0.05
is_anon = false
app.call(env.dup)
expect(is_anon).to eq(true)
is_anon = false
env["REQUEST_QUEUE_SECONDS"] = 0.01
app.call(env.dup)
expect(is_anon).to eq(false)
end
end
describe "invalid request payload" do
it "returns 413 for GET request with payload" do
status, headers, _ =
middleware.call(env.tap { |environment| environment[Rack::RACK_INPUT].write("test") })
expect(status).to eq(413)
expect(headers["Cache-Control"]).to eq("private, max-age=0, must-revalidate")
end
end
describe "crawler blocking" do
let :non_crawler do
{
"HTTP_USER_AGENT" =>
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36",
}
end
def get(path, options)
@env =
env(
{ "REQUEST_URI" => path, "PATH_INFO" => path, "REQUEST_PATH" => path }.merge(
options[:headers],
),
)
@status, @response_header, @response = middleware.call(@env)
end
it "applies allowed_crawler_user_agents correctly" do
SiteSetting.allowed_crawler_user_agents = "Googlebot"
get "/", headers: { "HTTP_USER_AGENT" => "Googlebot/2.1 (+http://www.google.com/bot.html)" }
expect(@status).to eq(200)
get "/",
headers: {
"HTTP_USER_AGENT" => "Anotherbot/2.1 (+http://www.notgoogle.com/bot.html)",
}
expect(@status).to eq(403)
expect(@response).to be_an(Array)
get "/", headers: non_crawler
expect(@status).to eq(200)
end
it "doesn't block api requests" do
SiteSetting.allowed_crawler_user_agents = "Googlebot"
api_key = Fabricate(:api_key)
get "/latest?api_key=#{api_key.key}&api_username=system",
headers: {
"QUERY_STRING" => "api_key=#{api_key.key}&api_username=system",
}
expect(@status).to eq(200)
get "/latest", headers: { "HTTP_API_KEY" => api_key.key, "HTTP_API_USERNAME" => "system" }
expect(@status).to eq(200)
end
it "applies blocked_crawler_user_agents correctly" do
SiteSetting.blocked_crawler_user_agents = "Googlebot"
get "/", headers: non_crawler
expect(@status).to eq(200)
get "/", headers: { "HTTP_USER_AGENT" => "Googlebot/2.1 (+http://www.google.com/bot.html)" }
expect(@status).to eq(403)
expect {
get "/",
headers: {
"HTTP_USER_AGENT" => (+"Evil Googlebot String \xc3\x28").force_encoding("ASCII"),
}
expect(@status).to eq(403)
}.not_to raise_error
get "/",
headers: {
"HTTP_USER_AGENT" => "Twitterbot/2.1 (+http://www.notgoogle.com/bot.html)",
}
expect(@status).to eq(200)
end
it "should never block robots.txt" do
SiteSetting.blocked_crawler_user_agents = "Googlebot"
get "/robots.txt",
headers: {
"HTTP_USER_AGENT" => "Googlebot/2.1 (+http://www.google.com/bot.html)",
}
expect(@status).to eq(200)
end
it "should never block srv/status" do
SiteSetting.blocked_crawler_user_agents = "Googlebot"
get "/srv/status",
headers: {
"HTTP_USER_AGENT" => "Googlebot/2.1 (+http://www.google.com/bot.html)",
}
expect(@status).to eq(200)
end
it "blocked crawlers shouldn't log page views" do
SiteSetting.blocked_crawler_user_agents = "Googlebot"
get "/", headers: { "HTTP_USER_AGENT" => "Googlebot/2.1 (+http://www.google.com/bot.html)" }
expect(@env["discourse.request_tracker.skip"]).to eq(true)
end
it "blocks json requests" do
SiteSetting.blocked_crawler_user_agents = "Googlebot"
get "/srv/status.json",
headers: {
"HTTP_USER_AGENT" => "Googlebot/2.1 (+http://www.google.com/bot.html)",
}
expect(@status).to eq(403)
end
end
end