2019-04-30 08:27:42 +08:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2022-07-28 10:27:38 +08:00
|
|
|
RSpec.describe Middleware::AnonymousCache do
|
2020-08-03 14:15:59 +08:00
|
|
|
let(:middleware) { Middleware::AnonymousCache.new(lambda { |_| [200, {}, []] }) }
|
2013-10-16 13:39:18 +08:00
|
|
|
|
2023-07-28 19:53:44 +08:00
|
|
|
before { Middleware::AnonymousCache.enable_anon_cache }
|
|
|
|
|
2015-02-04 13:14:56 +08:00
|
|
|
def env(opts = {})
|
2023-01-26 21:26:29 +08:00
|
|
|
create_request_env(path: opts.delete(:path) || "http://test.com/path?bla=1").merge(opts)
|
2015-02-04 13:14:56 +08:00
|
|
|
end
|
|
|
|
|
2020-08-03 14:15:59 +08:00
|
|
|
describe Middleware::AnonymousCache::Helper do
|
|
|
|
def new_helper(opts = {})
|
|
|
|
Middleware::AnonymousCache::Helper.new(env(opts))
|
2013-10-16 13:39:18 +08:00
|
|
|
end
|
|
|
|
|
2022-07-28 00:14:14 +08:00
|
|
|
describe "#cacheable?" do
|
2020-08-03 14:15:59 +08:00
|
|
|
it "true by default" do
|
|
|
|
expect(new_helper.cacheable?).to eq(true)
|
|
|
|
end
|
|
|
|
|
|
|
|
it "is false for non GET" do
|
|
|
|
expect(
|
|
|
|
new_helper("ANON_CACHE_DURATION" => 10, "REQUEST_METHOD" => "POST").cacheable?,
|
|
|
|
).to eq(false)
|
|
|
|
end
|
|
|
|
|
FEATURE: Apply rate limits per user instead of IP for trusted users (#14706)
Currently, Discourse rate limits all incoming requests by the IP address they
originate from regardless of the user making the request. This can be
frustrating if there are multiple users using Discourse simultaneously while
sharing the same IP address (e.g. employees in an office).
This commit implements a new feature to make Discourse apply rate limits by
user id rather than IP address for users at or higher than the configured trust
level (1 is the default).
For example, let's say a Discourse instance is configured to allow 200 requests
per minute per IP address, and we have 10 users at trust level 4 using
Discourse simultaneously from the same IP address. Before this feature, the 10
users could only make a total of 200 requests per minute before they got rate
limited. But with the new feature, each user is allowed to make 200 requests
per minute because the rate limits are applied on user id rather than the IP
address.
The minimum trust level for applying user-id-based rate limits can be
configured by the `skip_per_ip_rate_limit_trust_level` global setting. The
default is 1, but it can be changed by either adding the
`DISCOURSE_SKIP_PER_IP_RATE_LIMIT_TRUST_LEVEL` environment variable with the
desired value to your `app.yml`, or changing the setting's value in the
`discourse.conf` file.
Requests made with API keys are still rate limited by IP address and the
relevant global settings that control API keys rate limits.
Before this commit, Discourse's auth cookie (`_t`) was simply a 32 characters
string that Discourse used to lookup the current user from the database and the
cookie contained no additional information about the user. However, we had to
change the cookie content in this commit so we could identify the user from the
cookie without making a database query before the rate limits logic and avoid
introducing a bottleneck on busy sites.
Besides the 32 characters auth token, the cookie now includes the user id,
trust level and the cookie's generation date, and we encrypt/sign the cookie to
prevent tampering.
Internal ticket number: t54739.
2021-11-18 04:27:30 +08:00
|
|
|
it "is false if it has a valid auth cookie" do
|
|
|
|
cookie = create_auth_cookie(token: SecureRandom.hex)
|
|
|
|
expect(new_helper("HTTP_COOKIE" => "jack=1; _t=#{cookie}; jill=2").cacheable?).to eq(false)
|
|
|
|
end
|
|
|
|
|
|
|
|
it "is true if it has an invalid auth cookie" do
|
|
|
|
cookie = create_auth_cookie(token: SecureRandom.hex, issued_at: 5.minutes.ago)
|
|
|
|
cookie = swap_2_different_characters(cookie)
|
2022-06-07 19:00:25 +08:00
|
|
|
cookie.prepend("%a0%a1") # an invalid byte sequence
|
FEATURE: Apply rate limits per user instead of IP for trusted users (#14706)
Currently, Discourse rate limits all incoming requests by the IP address they
originate from regardless of the user making the request. This can be
frustrating if there are multiple users using Discourse simultaneously while
sharing the same IP address (e.g. employees in an office).
This commit implements a new feature to make Discourse apply rate limits by
user id rather than IP address for users at or higher than the configured trust
level (1 is the default).
For example, let's say a Discourse instance is configured to allow 200 requests
per minute per IP address, and we have 10 users at trust level 4 using
Discourse simultaneously from the same IP address. Before this feature, the 10
users could only make a total of 200 requests per minute before they got rate
limited. But with the new feature, each user is allowed to make 200 requests
per minute because the rate limits are applied on user id rather than the IP
address.
The minimum trust level for applying user-id-based rate limits can be
configured by the `skip_per_ip_rate_limit_trust_level` global setting. The
default is 1, but it can be changed by either adding the
`DISCOURSE_SKIP_PER_IP_RATE_LIMIT_TRUST_LEVEL` environment variable with the
desired value to your `app.yml`, or changing the setting's value in the
`discourse.conf` file.
Requests made with API keys are still rate limited by IP address and the
relevant global settings that control API keys rate limits.
Before this commit, Discourse's auth cookie (`_t`) was simply a 32 characters
string that Discourse used to lookup the current user from the database and the
cookie contained no additional information about the user. However, we had to
change the cookie content in this commit so we could identify the user from the
cookie without making a database query before the rate limits logic and avoid
introducing a bottleneck on busy sites.
Besides the 32 characters auth token, the cookie now includes the user id,
trust level and the cookie's generation date, and we encrypt/sign the cookie to
prevent tampering.
Internal ticket number: t54739.
2021-11-18 04:27:30 +08:00
|
|
|
expect(new_helper("HTTP_COOKIE" => "jack=1; _t=#{cookie}; jill=2").cacheable?).to eq(true)
|
2020-08-03 14:15:59 +08:00
|
|
|
end
|
2020-12-16 13:47:46 +08:00
|
|
|
|
|
|
|
it "is false for srv/status routes" do
|
|
|
|
expect(new_helper("PATH_INFO" => "/srv/status").cacheable?).to eq(false)
|
|
|
|
end
|
2023-01-26 21:26:29 +08:00
|
|
|
|
|
|
|
it "is false for API requests using header" do
|
|
|
|
expect(new_helper("HTTP_API_KEY" => "abcde").cacheable?).to eq(false)
|
|
|
|
end
|
|
|
|
|
|
|
|
it "is false for API requests using parameter" do
|
|
|
|
expect(new_helper(path: "/path?api_key=abc").cacheable?).to eq(false)
|
|
|
|
end
|
|
|
|
|
|
|
|
it "is false for User API requests using header" do
|
|
|
|
expect(new_helper("HTTP_USER_API_KEY" => "abcde").cacheable?).to eq(false)
|
|
|
|
end
|
2013-10-16 13:39:18 +08:00
|
|
|
end
|
2013-10-17 07:37:06 +08:00
|
|
|
|
2022-07-28 00:14:14 +08:00
|
|
|
describe "per theme cache" do
|
2020-08-03 14:15:59 +08:00
|
|
|
it "handles theme keys" do
|
|
|
|
theme = Fabricate(:theme, user_selectable: true)
|
|
|
|
|
|
|
|
with_bad_theme_key = new_helper("HTTP_COOKIE" => "theme_ids=abc").cache_key
|
|
|
|
with_no_theme_key = new_helper().cache_key
|
|
|
|
|
|
|
|
expect(with_bad_theme_key).to eq(with_no_theme_key)
|
|
|
|
|
|
|
|
with_good_theme_key = new_helper("HTTP_COOKIE" => "theme_ids=#{theme.id}").cache_key
|
|
|
|
|
|
|
|
expect(with_good_theme_key).not_to eq(with_no_theme_key)
|
|
|
|
end
|
2013-10-17 07:37:06 +08:00
|
|
|
end
|
2013-10-16 13:39:18 +08:00
|
|
|
|
2022-09-27 16:56:06 +08:00
|
|
|
context "with header or cookie based custom locale" do
|
2020-08-03 14:15:59 +08:00
|
|
|
it "handles different languages" do
|
|
|
|
# Normally does not check the language header
|
|
|
|
french1 = new_helper("HTTP_ACCEPT_LANGUAGE" => "fr").cache_key
|
|
|
|
french2 = new_helper("HTTP_ACCEPT_LANGUAGE" => "FR").cache_key
|
|
|
|
english = new_helper("HTTP_ACCEPT_LANGUAGE" => SiteSetting.default_locale).cache_key
|
|
|
|
none = new_helper.cache_key
|
2017-06-15 21:36:27 +08:00
|
|
|
|
2020-08-03 14:15:59 +08:00
|
|
|
expect(none).to eq(french1)
|
|
|
|
expect(none).to eq(french2)
|
|
|
|
expect(none).to eq(english)
|
2017-06-15 21:36:27 +08:00
|
|
|
|
2020-08-03 14:15:59 +08:00
|
|
|
SiteSetting.allow_user_locale = true
|
|
|
|
SiteSetting.set_locale_from_accept_language_header = true
|
2017-06-15 21:36:27 +08:00
|
|
|
|
2020-08-03 14:15:59 +08:00
|
|
|
french1 = new_helper("HTTP_ACCEPT_LANGUAGE" => "fr").cache_key
|
|
|
|
french2 = new_helper("HTTP_ACCEPT_LANGUAGE" => "FR").cache_key
|
|
|
|
english = new_helper("HTTP_ACCEPT_LANGUAGE" => SiteSetting.default_locale).cache_key
|
|
|
|
none = new_helper.cache_key
|
2017-06-15 21:36:27 +08:00
|
|
|
|
2020-08-03 14:15:59 +08:00
|
|
|
expect(none).to eq(english)
|
|
|
|
expect(french1).to eq(french2)
|
|
|
|
expect(french1).not_to eq(none)
|
2022-09-27 16:56:06 +08:00
|
|
|
|
|
|
|
SiteSetting.set_locale_from_cookie = true
|
|
|
|
expect(new_helper("HTTP_COOKIE" => "locale=es;").cache_key).to include("l=es")
|
2020-08-03 14:15:59 +08:00
|
|
|
end
|
2017-06-15 21:36:27 +08:00
|
|
|
end
|
|
|
|
|
2022-04-14 21:25:52 +08:00
|
|
|
it "handles old browsers" do
|
|
|
|
SiteSetting.browser_update_user_agents = "my_old_browser"
|
|
|
|
|
|
|
|
key1 = new_helper("HTTP_USER_AGENT" => "my_old_browser").cache_key
|
|
|
|
key2 = new_helper("HTTP_USER_AGENT" => "my_new_browser").cache_key
|
|
|
|
expect(key1).not_to eq(key2)
|
|
|
|
end
|
|
|
|
|
|
|
|
it "handles modern mobile browsers" do
|
|
|
|
key1 = new_helper("HTTP_USER_AGENT" => "Safari (iPhone OS 7)").cache_key
|
|
|
|
key2 = new_helper("HTTP_USER_AGENT" => "Safari (iPhone OS 15)").cache_key
|
|
|
|
expect(key1).not_to eq(key2)
|
|
|
|
end
|
|
|
|
|
2024-05-24 09:49:17 +08:00
|
|
|
it "handles user agents with invalid bytes" do
|
|
|
|
agent = (+"Evil Googlebot String \xc3\x28").force_encoding("ASCII")
|
|
|
|
expect {
|
|
|
|
key1 = new_helper("HTTP_USER_AGENT" => agent).cache_key
|
|
|
|
key2 =
|
|
|
|
new_helper(
|
|
|
|
"HTTP_USER_AGENT" => agent.encode("utf-8", invalid: :replace, undef: :replace),
|
|
|
|
).cache_key
|
|
|
|
expect(key1).to eq(key2)
|
|
|
|
}.not_to raise_error
|
|
|
|
end
|
|
|
|
|
2022-07-28 00:14:14 +08:00
|
|
|
context "when cached" do
|
2020-08-03 14:15:59 +08:00
|
|
|
let!(:helper) { new_helper("ANON_CACHE_DURATION" => 10) }
|
|
|
|
|
|
|
|
let!(:crawler) do
|
|
|
|
new_helper(
|
|
|
|
"ANON_CACHE_DURATION" => 10,
|
|
|
|
"HTTP_USER_AGENT" => "AdsBot-Google (+http://www.google.com/adsbot.html)",
|
|
|
|
)
|
|
|
|
end
|
|
|
|
|
|
|
|
after do
|
|
|
|
helper.clear_cache
|
|
|
|
crawler.clear_cache
|
|
|
|
end
|
|
|
|
|
|
|
|
before { global_setting :anon_cache_store_threshold, 1 }
|
|
|
|
|
|
|
|
it "compresses body on demand" do
|
|
|
|
global_setting :compress_anon_cache, true
|
|
|
|
|
|
|
|
payload = "x" * 1000
|
|
|
|
helper.cache([200, { "HELLO" => "WORLD" }, [payload]])
|
|
|
|
|
|
|
|
helper = new_helper("ANON_CACHE_DURATION" => 10)
|
|
|
|
expect(helper.cached).to eq(
|
|
|
|
[200, { "X-Discourse-Cached" => "true", "HELLO" => "WORLD" }, [payload]],
|
|
|
|
)
|
|
|
|
|
|
|
|
# depends on i7z implementation, but lets assume it is stable unless we discover
|
|
|
|
# otherwise
|
|
|
|
expect(Discourse.redis.get(helper.cache_key_body).length).to eq(16)
|
|
|
|
end
|
|
|
|
|
|
|
|
it "handles brotli switching" do
|
|
|
|
helper.cache([200, { "HELLO" => "WORLD" }, ["hello ", "my world"]])
|
|
|
|
|
|
|
|
helper = new_helper("ANON_CACHE_DURATION" => 10)
|
|
|
|
expect(helper.cached).to eq(
|
|
|
|
[200, { "X-Discourse-Cached" => "true", "HELLO" => "WORLD" }, ["hello my world"]],
|
|
|
|
)
|
|
|
|
|
|
|
|
helper = new_helper("ANON_CACHE_DURATION" => 10, "HTTP_ACCEPT_ENCODING" => "gz, br")
|
|
|
|
expect(helper.cached).to eq(nil)
|
|
|
|
end
|
|
|
|
|
|
|
|
it "returns cached data for cached requests" do
|
|
|
|
helper.is_mobile = true
|
|
|
|
expect(helper.cached).to eq(nil)
|
|
|
|
helper.cache([200, { "HELLO" => "WORLD" }, ["hello ", "my world"]])
|
|
|
|
|
|
|
|
helper = new_helper("ANON_CACHE_DURATION" => 10)
|
|
|
|
helper.is_mobile = true
|
|
|
|
expect(helper.cached).to eq(
|
|
|
|
[200, { "X-Discourse-Cached" => "true", "HELLO" => "WORLD" }, ["hello my world"]],
|
|
|
|
)
|
|
|
|
|
|
|
|
expect(crawler.cached).to eq(nil)
|
|
|
|
crawler.cache([200, { "HELLO" => "WORLD" }, ["hello ", "world"]])
|
|
|
|
expect(crawler.cached).to eq(
|
|
|
|
[200, { "X-Discourse-Cached" => "true", "HELLO" => "WORLD" }, ["hello world"]],
|
|
|
|
)
|
|
|
|
end
|
2020-07-23 01:00:07 +08:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2022-07-28 00:14:14 +08:00
|
|
|
describe "background request rate limit" do
|
2020-10-13 13:56:03 +08:00
|
|
|
it "will rate limit background requests" do
|
|
|
|
app = Middleware::AnonymousCache.new(lambda { |env| [200, {}, ["ok"]] })
|
|
|
|
|
2020-10-13 15:08:38 +08:00
|
|
|
global_setting :background_requests_max_queue_length, "0.5"
|
2020-10-13 13:56:03 +08:00
|
|
|
|
FEATURE: Apply rate limits per user instead of IP for trusted users (#14706)
Currently, Discourse rate limits all incoming requests by the IP address they
originate from regardless of the user making the request. This can be
frustrating if there are multiple users using Discourse simultaneously while
sharing the same IP address (e.g. employees in an office).
This commit implements a new feature to make Discourse apply rate limits by
user id rather than IP address for users at or higher than the configured trust
level (1 is the default).
For example, let's say a Discourse instance is configured to allow 200 requests
per minute per IP address, and we have 10 users at trust level 4 using
Discourse simultaneously from the same IP address. Before this feature, the 10
users could only make a total of 200 requests per minute before they got rate
limited. But with the new feature, each user is allowed to make 200 requests
per minute because the rate limits are applied on user id rather than the IP
address.
The minimum trust level for applying user-id-based rate limits can be
configured by the `skip_per_ip_rate_limit_trust_level` global setting. The
default is 1, but it can be changed by either adding the
`DISCOURSE_SKIP_PER_IP_RATE_LIMIT_TRUST_LEVEL` environment variable with the
desired value to your `app.yml`, or changing the setting's value in the
`discourse.conf` file.
Requests made with API keys are still rate limited by IP address and the
relevant global settings that control API keys rate limits.
Before this commit, Discourse's auth cookie (`_t`) was simply a 32 characters
string that Discourse used to lookup the current user from the database and the
cookie contained no additional information about the user. However, we had to
change the cookie content in this commit so we could identify the user from the
cookie without making a database query before the rate limits logic and avoid
introducing a bottleneck on busy sites.
Besides the 32 characters auth token, the cookie now includes the user id,
trust level and the cookie's generation date, and we encrypt/sign the cookie to
prevent tampering.
Internal ticket number: t54739.
2021-11-18 04:27:30 +08:00
|
|
|
cookie = create_auth_cookie(token: SecureRandom.hex)
|
|
|
|
env =
|
|
|
|
create_request_env.merge(
|
|
|
|
"HTTP_COOKIE" => "_t=#{cookie}",
|
2020-10-13 13:56:03 +08:00
|
|
|
"HOST" => "site.com",
|
|
|
|
"REQUEST_METHOD" => "GET",
|
|
|
|
"REQUEST_URI" => "/somewhere/rainbow",
|
|
|
|
"REQUEST_QUEUE_SECONDS" => 2.1,
|
|
|
|
"rack.input" => StringIO.new,
|
FEATURE: Apply rate limits per user instead of IP for trusted users (#14706)
Currently, Discourse rate limits all incoming requests by the IP address they
originate from regardless of the user making the request. This can be
frustrating if there are multiple users using Discourse simultaneously while
sharing the same IP address (e.g. employees in an office).
This commit implements a new feature to make Discourse apply rate limits by
user id rather than IP address for users at or higher than the configured trust
level (1 is the default).
For example, let's say a Discourse instance is configured to allow 200 requests
per minute per IP address, and we have 10 users at trust level 4 using
Discourse simultaneously from the same IP address. Before this feature, the 10
users could only make a total of 200 requests per minute before they got rate
limited. But with the new feature, each user is allowed to make 200 requests
per minute because the rate limits are applied on user id rather than the IP
address.
The minimum trust level for applying user-id-based rate limits can be
configured by the `skip_per_ip_rate_limit_trust_level` global setting. The
default is 1, but it can be changed by either adding the
`DISCOURSE_SKIP_PER_IP_RATE_LIMIT_TRUST_LEVEL` environment variable with the
desired value to your `app.yml`, or changing the setting's value in the
`discourse.conf` file.
Requests made with API keys are still rate limited by IP address and the
relevant global settings that control API keys rate limits.
Before this commit, Discourse's auth cookie (`_t`) was simply a 32 characters
string that Discourse used to lookup the current user from the database and the
cookie contained no additional information about the user. However, we had to
change the cookie content in this commit so we could identify the user from the
cookie without making a database query before the rate limits logic and avoid
introducing a bottleneck on busy sites.
Besides the 32 characters auth token, the cookie now includes the user id,
trust level and the cookie's generation date, and we encrypt/sign the cookie to
prevent tampering.
Internal ticket number: t54739.
2021-11-18 04:27:30 +08:00
|
|
|
)
|
2020-10-13 13:56:03 +08:00
|
|
|
|
|
|
|
# non background ... long request
|
|
|
|
env["REQUEST_QUEUE_SECONDS"] = 2
|
|
|
|
|
|
|
|
status, _ = app.call(env.dup)
|
|
|
|
expect(status).to eq(200)
|
|
|
|
|
|
|
|
env["HTTP_DISCOURSE_BACKGROUND"] = "true"
|
|
|
|
|
|
|
|
status, headers, body = app.call(env.dup)
|
|
|
|
expect(status).to eq(429)
|
|
|
|
expect(headers["content-type"]).to eq("application/json; charset=utf-8")
|
|
|
|
json = JSON.parse(body.join)
|
|
|
|
expect(json["extras"]["wait_seconds"]).to be > 4.9
|
|
|
|
|
2020-10-13 15:08:38 +08:00
|
|
|
env["REQUEST_QUEUE_SECONDS"] = 0.4
|
2020-10-13 13:56:03 +08:00
|
|
|
|
|
|
|
status, _ = app.call(env.dup)
|
|
|
|
expect(status).to eq(200)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2022-07-28 00:14:14 +08:00
|
|
|
describe "#force_anonymous!" do
|
2018-04-18 14:58:40 +08:00
|
|
|
before { RateLimiter.enable }
|
|
|
|
|
2023-06-16 10:44:35 +08:00
|
|
|
use_redis_snapshotting
|
2018-04-18 14:58:40 +08:00
|
|
|
|
2023-06-16 10:44:35 +08:00
|
|
|
it "will revert to anonymous once we reach the limit" do
|
2018-04-18 14:58:40 +08:00
|
|
|
is_anon = false
|
|
|
|
|
|
|
|
app =
|
|
|
|
Middleware::AnonymousCache.new(
|
|
|
|
lambda do |env|
|
2021-10-07 19:31:42 +08:00
|
|
|
is_anon = env["HTTP_COOKIE"].nil? && env["HTTP_DISCOURSE_LOGGED_IN"].nil?
|
2018-04-18 14:58:40 +08:00
|
|
|
[200, {}, ["ok"]]
|
|
|
|
end,
|
|
|
|
)
|
|
|
|
|
|
|
|
global_setting :force_anonymous_min_per_10_seconds, 2
|
|
|
|
global_setting :force_anonymous_min_queue_seconds, 1
|
|
|
|
|
FEATURE: Apply rate limits per user instead of IP for trusted users (#14706)
Currently, Discourse rate limits all incoming requests by the IP address they
originate from regardless of the user making the request. This can be
frustrating if there are multiple users using Discourse simultaneously while
sharing the same IP address (e.g. employees in an office).
This commit implements a new feature to make Discourse apply rate limits by
user id rather than IP address for users at or higher than the configured trust
level (1 is the default).
For example, let's say a Discourse instance is configured to allow 200 requests
per minute per IP address, and we have 10 users at trust level 4 using
Discourse simultaneously from the same IP address. Before this feature, the 10
users could only make a total of 200 requests per minute before they got rate
limited. But with the new feature, each user is allowed to make 200 requests
per minute because the rate limits are applied on user id rather than the IP
address.
The minimum trust level for applying user-id-based rate limits can be
configured by the `skip_per_ip_rate_limit_trust_level` global setting. The
default is 1, but it can be changed by either adding the
`DISCOURSE_SKIP_PER_IP_RATE_LIMIT_TRUST_LEVEL` environment variable with the
desired value to your `app.yml`, or changing the setting's value in the
`discourse.conf` file.
Requests made with API keys are still rate limited by IP address and the
relevant global settings that control API keys rate limits.
Before this commit, Discourse's auth cookie (`_t`) was simply a 32 characters
string that Discourse used to lookup the current user from the database and the
cookie contained no additional information about the user. However, we had to
change the cookie content in this commit so we could identify the user from the
cookie without making a database query before the rate limits logic and avoid
introducing a bottleneck on busy sites.
Besides the 32 characters auth token, the cookie now includes the user id,
trust level and the cookie's generation date, and we encrypt/sign the cookie to
prevent tampering.
Internal ticket number: t54739.
2021-11-18 04:27:30 +08:00
|
|
|
cookie = create_auth_cookie(token: SecureRandom.hex)
|
|
|
|
env =
|
|
|
|
create_request_env.merge(
|
|
|
|
"HTTP_COOKIE" => "_t=#{cookie}",
|
2021-10-07 19:31:42 +08:00
|
|
|
"HTTP_DISCOURSE_LOGGED_IN" => "true",
|
2018-04-18 14:58:40 +08:00
|
|
|
"HOST" => "site.com",
|
|
|
|
"REQUEST_METHOD" => "GET",
|
|
|
|
"REQUEST_URI" => "/somewhere/rainbow",
|
|
|
|
"REQUEST_QUEUE_SECONDS" => 2.1,
|
|
|
|
"rack.input" => StringIO.new,
|
FEATURE: Apply rate limits per user instead of IP for trusted users (#14706)
Currently, Discourse rate limits all incoming requests by the IP address they
originate from regardless of the user making the request. This can be
frustrating if there are multiple users using Discourse simultaneously while
sharing the same IP address (e.g. employees in an office).
This commit implements a new feature to make Discourse apply rate limits by
user id rather than IP address for users at or higher than the configured trust
level (1 is the default).
For example, let's say a Discourse instance is configured to allow 200 requests
per minute per IP address, and we have 10 users at trust level 4 using
Discourse simultaneously from the same IP address. Before this feature, the 10
users could only make a total of 200 requests per minute before they got rate
limited. But with the new feature, each user is allowed to make 200 requests
per minute because the rate limits are applied on user id rather than the IP
address.
The minimum trust level for applying user-id-based rate limits can be
configured by the `skip_per_ip_rate_limit_trust_level` global setting. The
default is 1, but it can be changed by either adding the
`DISCOURSE_SKIP_PER_IP_RATE_LIMIT_TRUST_LEVEL` environment variable with the
desired value to your `app.yml`, or changing the setting's value in the
`discourse.conf` file.
Requests made with API keys are still rate limited by IP address and the
relevant global settings that control API keys rate limits.
Before this commit, Discourse's auth cookie (`_t`) was simply a 32 characters
string that Discourse used to lookup the current user from the database and the
cookie contained no additional information about the user. However, we had to
change the cookie content in this commit so we could identify the user from the
cookie without making a database query before the rate limits logic and avoid
introducing a bottleneck on busy sites.
Besides the 32 characters auth token, the cookie now includes the user id,
trust level and the cookie's generation date, and we encrypt/sign the cookie to
prevent tampering.
Internal ticket number: t54739.
2021-11-18 04:27:30 +08:00
|
|
|
)
|
2018-04-18 14:58:40 +08:00
|
|
|
|
2018-04-23 09:54:58 +08:00
|
|
|
is_anon = false
|
|
|
|
app.call(env.dup)
|
2018-04-18 14:58:40 +08:00
|
|
|
expect(is_anon).to eq(false)
|
|
|
|
|
2018-04-23 09:54:58 +08:00
|
|
|
is_anon = false
|
|
|
|
app.call(env.dup)
|
2018-04-18 14:58:40 +08:00
|
|
|
expect(is_anon).to eq(false)
|
|
|
|
|
2018-04-23 09:54:58 +08:00
|
|
|
is_anon = false
|
|
|
|
app.call(env.dup)
|
2018-04-18 14:58:40 +08:00
|
|
|
expect(is_anon).to eq(true)
|
|
|
|
|
2018-04-23 09:54:58 +08:00
|
|
|
is_anon = false
|
|
|
|
_status, headers, _body = app.call(env.dup)
|
2018-04-18 14:58:40 +08:00
|
|
|
expect(is_anon).to eq(true)
|
2018-04-24 11:24:26 +08:00
|
|
|
expect(headers["Set-Cookie"]).to eq("dosp=1; Path=/")
|
2018-04-23 09:54:58 +08:00
|
|
|
|
|
|
|
# tricky change, a 50ms delay still will trigger protection
|
|
|
|
# once it is tripped
|
|
|
|
|
|
|
|
env["REQUEST_QUEUE_SECONDS"] = 0.05
|
|
|
|
is_anon = false
|
|
|
|
|
|
|
|
app.call(env.dup)
|
|
|
|
expect(is_anon).to eq(true)
|
|
|
|
|
|
|
|
is_anon = false
|
|
|
|
env["REQUEST_QUEUE_SECONDS"] = 0.01
|
|
|
|
|
|
|
|
app.call(env.dup)
|
|
|
|
expect(is_anon).to eq(false)
|
2018-04-18 14:58:40 +08:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2022-07-28 00:14:14 +08:00
|
|
|
describe "invalid request payload" do
|
2020-08-03 14:11:17 +08:00
|
|
|
it "returns 413 for GET request with payload" do
|
2021-11-13 02:52:25 +08:00
|
|
|
status, headers, _ =
|
2020-08-03 14:11:17 +08:00
|
|
|
middleware.call(env.tap { |environment| environment[Rack::RACK_INPUT].write("test") })
|
|
|
|
|
|
|
|
expect(status).to eq(413)
|
2021-11-13 02:52:25 +08:00
|
|
|
expect(headers["Cache-Control"]).to eq("private, max-age=0, must-revalidate")
|
2020-08-03 14:11:17 +08:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2022-07-28 00:14:14 +08:00
|
|
|
describe "crawler blocking" do
|
2018-07-04 09:14:43 +08:00
|
|
|
let :non_crawler do
|
|
|
|
{
|
|
|
|
"HTTP_USER_AGENT" =>
|
|
|
|
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36",
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def get(path, options)
|
|
|
|
@env =
|
|
|
|
env(
|
|
|
|
{ "REQUEST_URI" => path, "PATH_INFO" => path, "REQUEST_PATH" => path }.merge(
|
|
|
|
options[:headers],
|
2023-01-09 19:18:21 +08:00
|
|
|
),
|
2018-07-04 09:14:43 +08:00
|
|
|
)
|
2018-09-15 03:39:24 +08:00
|
|
|
@status, @response_header, @response = middleware.call(@env)
|
2018-07-04 09:14:43 +08:00
|
|
|
end
|
|
|
|
|
2020-07-27 08:23:54 +08:00
|
|
|
it "applies allowed_crawler_user_agents correctly" do
|
|
|
|
SiteSetting.allowed_crawler_user_agents = "Googlebot"
|
2018-07-04 09:14:43 +08:00
|
|
|
|
|
|
|
get "/", headers: { "HTTP_USER_AGENT" => "Googlebot/2.1 (+http://www.google.com/bot.html)" }
|
|
|
|
|
|
|
|
expect(@status).to eq(200)
|
|
|
|
|
2018-07-18 10:33:06 +08:00
|
|
|
get "/",
|
|
|
|
headers: {
|
2018-07-04 09:14:43 +08:00
|
|
|
"HTTP_USER_AGENT" => "Anotherbot/2.1 (+http://www.notgoogle.com/bot.html)",
|
|
|
|
}
|
|
|
|
|
|
|
|
expect(@status).to eq(403)
|
2018-09-15 03:39:24 +08:00
|
|
|
expect(@response).to be_an(Array)
|
2018-07-04 09:14:43 +08:00
|
|
|
|
2018-07-18 10:33:06 +08:00
|
|
|
get "/", headers: non_crawler
|
2018-07-04 09:14:43 +08:00
|
|
|
expect(@status).to eq(200)
|
|
|
|
end
|
|
|
|
|
2018-09-15 03:34:21 +08:00
|
|
|
it "doesn't block api requests" do
|
2020-07-27 08:23:54 +08:00
|
|
|
SiteSetting.allowed_crawler_user_agents = "Googlebot"
|
2018-09-15 03:34:21 +08:00
|
|
|
api_key = Fabricate(:api_key)
|
|
|
|
|
|
|
|
get "/latest?api_key=#{api_key.key}&api_username=system",
|
|
|
|
headers: {
|
|
|
|
"QUERY_STRING" => "api_key=#{api_key.key}&api_username=system",
|
|
|
|
}
|
|
|
|
expect(@status).to eq(200)
|
2023-01-26 21:26:29 +08:00
|
|
|
|
|
|
|
get "/latest", headers: { "HTTP_API_KEY" => api_key.key, "HTTP_API_USERNAME" => "system" }
|
|
|
|
expect(@status).to eq(200)
|
2018-09-15 03:34:21 +08:00
|
|
|
end
|
|
|
|
|
2020-07-27 08:23:54 +08:00
|
|
|
it "applies blocked_crawler_user_agents correctly" do
|
|
|
|
SiteSetting.blocked_crawler_user_agents = "Googlebot"
|
2018-07-04 09:14:43 +08:00
|
|
|
|
2018-07-18 10:33:06 +08:00
|
|
|
get "/", headers: non_crawler
|
2018-07-04 09:14:43 +08:00
|
|
|
expect(@status).to eq(200)
|
|
|
|
|
|
|
|
get "/", headers: { "HTTP_USER_AGENT" => "Googlebot/2.1 (+http://www.google.com/bot.html)" }
|
|
|
|
|
|
|
|
expect(@status).to eq(403)
|
|
|
|
|
2024-05-24 09:49:17 +08:00
|
|
|
expect {
|
|
|
|
get "/",
|
|
|
|
headers: {
|
|
|
|
"HTTP_USER_AGENT" => (+"Evil Googlebot String \xc3\x28").force_encoding("ASCII"),
|
|
|
|
}
|
|
|
|
|
|
|
|
expect(@status).to eq(403)
|
|
|
|
}.not_to raise_error
|
|
|
|
|
2018-07-18 10:33:06 +08:00
|
|
|
get "/",
|
|
|
|
headers: {
|
2018-07-04 09:14:43 +08:00
|
|
|
"HTTP_USER_AGENT" => "Twitterbot/2.1 (+http://www.notgoogle.com/bot.html)",
|
|
|
|
}
|
|
|
|
|
|
|
|
expect(@status).to eq(200)
|
|
|
|
end
|
|
|
|
|
|
|
|
it "should never block robots.txt" do
|
2020-07-27 08:23:54 +08:00
|
|
|
SiteSetting.blocked_crawler_user_agents = "Googlebot"
|
2018-07-04 09:14:43 +08:00
|
|
|
|
|
|
|
get "/robots.txt",
|
|
|
|
headers: {
|
|
|
|
"HTTP_USER_AGENT" => "Googlebot/2.1 (+http://www.google.com/bot.html)",
|
|
|
|
}
|
|
|
|
|
|
|
|
expect(@status).to eq(200)
|
|
|
|
end
|
|
|
|
|
2018-07-18 10:33:06 +08:00
|
|
|
it "should never block srv/status" do
|
2020-07-27 08:23:54 +08:00
|
|
|
SiteSetting.blocked_crawler_user_agents = "Googlebot"
|
2018-07-04 09:14:43 +08:00
|
|
|
|
|
|
|
get "/srv/status",
|
|
|
|
headers: {
|
|
|
|
"HTTP_USER_AGENT" => "Googlebot/2.1 (+http://www.google.com/bot.html)",
|
|
|
|
}
|
|
|
|
|
2018-07-18 10:33:06 +08:00
|
|
|
expect(@status).to eq(200)
|
|
|
|
end
|
|
|
|
|
|
|
|
it "blocked crawlers shouldn't log page views" do
|
2020-07-27 08:23:54 +08:00
|
|
|
SiteSetting.blocked_crawler_user_agents = "Googlebot"
|
2018-07-18 10:33:06 +08:00
|
|
|
|
|
|
|
get "/", headers: { "HTTP_USER_AGENT" => "Googlebot/2.1 (+http://www.google.com/bot.html)" }
|
|
|
|
|
2018-07-04 09:14:43 +08:00
|
|
|
expect(@env["discourse.request_tracker.skip"]).to eq(true)
|
|
|
|
end
|
|
|
|
|
|
|
|
it "blocks json requests" do
|
2020-07-27 08:23:54 +08:00
|
|
|
SiteSetting.blocked_crawler_user_agents = "Googlebot"
|
2018-07-04 09:14:43 +08:00
|
|
|
|
|
|
|
get "/srv/status.json",
|
|
|
|
headers: {
|
|
|
|
"HTTP_USER_AGENT" => "Googlebot/2.1 (+http://www.google.com/bot.html)",
|
|
|
|
}
|
|
|
|
|
|
|
|
expect(@status).to eq(403)
|
|
|
|
end
|
|
|
|
end
|
2013-10-16 13:39:18 +08:00
|
|
|
end
|