FEATURE: improve performance of anonymous cache

This commit introduces 2 features:

1. DISCOURSE_COMPRESS_ANON_CACHE (true|false, default false): this allows
you to optionally compress the anon cache body entries in Redis, can be
useful for high load sites with Redis that lives on a separate server to
to webs

2. DISCOURSE_ANON_CACHE_STORE_THRESHOLD (default 2), only pop entries into
redis if we observe them more than N times. This avoids situations where
a crawler can walk a big pile of topics and store them all in Redis never
to be used. Our default anon cache time for topics is only 60 seconds. Anon
cache is in place to avoid the "slashdot" effect where a single topic is
hit by 100s of people in one minute.
This commit is contained in:
Sam Saffron 2019-09-04 17:18:32 +10:00
parent 16681cb648
commit ed00f35306
6 changed files with 78 additions and 3 deletions

@ -210,6 +210,7 @@ gem 'rubyzip', require: false
gem 'sshkey', require: false gem 'sshkey', require: false
gem 'rchardet', require: false gem 'rchardet', require: false
gem 'lz4-ruby', require: false, platform: :mri
if ENV["IMPORT"] == "1" if ENV["IMPORT"] == "1"
gem 'mysql2' gem 'mysql2'

@ -174,6 +174,7 @@ GEM
crass (~> 1.0.2) crass (~> 1.0.2)
nokogiri (>= 1.5.9) nokogiri (>= 1.5.9)
lru_redux (1.1.0) lru_redux (1.1.0)
lz4-ruby (0.3.3)
mail (2.7.1) mail (2.7.1)
mini_mime (>= 0.1.1) mini_mime (>= 0.1.1)
maxminddb (0.1.22) maxminddb (0.1.22)
@ -467,6 +468,7 @@ DEPENDENCIES
logstash-logger logstash-logger
logster logster
lru_redux lru_redux
lz4-ruby
mail mail
maxminddb maxminddb
memory_profiler memory_profiler

@ -266,3 +266,15 @@ enable_js_error_reporting = true
# Having a high number here is very low risk. Regular jobs are limited in scope and scale. # Having a high number here is very low risk. Regular jobs are limited in scope and scale.
mini_scheduler_workers = 5 mini_scheduler_workers = 5
# enable compression on anonymous cache redis entries
# this slightly increases the cost of storing cache entries but can make it much
# cheaper to retrieve cache entries when redis is stores on a different machine to the one
# running the web
compress_anon_cache = false
# Only store entries in redis for anonymous cache if they are observed more than N times
# for a specific key
#
# This ensures there are no pathological cases where we keep storing data in anonymous cache
# never to use it, set to 1 to store immediately, set to 0 to disable anon cache
anon_cache_store_threshold = 2

@ -73,7 +73,7 @@ module Middleware
end end
def cache_key def cache_key
@cache_key ||= "ANON_CACHE_#{@env["HTTP_ACCEPT"]}_#{@env["HTTP_HOST"]}#{@env["REQUEST_URI"]}|m=#{is_mobile?}|c=#{is_crawler?}|b=#{has_brotli?}|t=#{theme_ids.join(",")}" @cache_key ||= "ANON_CACHE_#{@env["HTTP_ACCEPT"]}_#{@env["HTTP_HOST"]}#{@env["REQUEST_URI"]}|m=#{is_mobile?}|c=#{is_crawler?}|b=#{has_brotli?}|t=#{theme_ids.join(",")}#{GlobalSetting.compress_anon_cache}"
end end
def theme_ids def theme_ids
@ -86,6 +86,10 @@ module Middleware
end end
end end
def cache_key_count
@cache_key_count ||= "#{cache_key}_count"
end
def cache_key_body def cache_key_body
@cache_key_body ||= "#{cache_key}_body" @cache_key_body ||= "#{cache_key}_body"
end end
@ -154,8 +158,26 @@ module Middleware
!!(!has_auth_cookie? && get? && no_cache_bypass) !!(!has_auth_cookie? && get? && no_cache_bypass)
end end
def compress(val)
if val && GlobalSetting.compress_anon_cache
require "lz4-ruby" if !defined?(LZ4)
LZ4::compress(val)
else
val
end
end
def decompress(val)
if val && GlobalSetting.compress_anon_cache
require "lz4-ruby" if !defined?(LZ4)
LZ4::uncompress(val)
else
val
end
end
def cached(env = {}) def cached(env = {})
if body = $redis.get(cache_key_body) if body = decompress($redis.get(cache_key_body))
if other = $redis.get(cache_key_other) if other = $redis.get(cache_key_other)
other = JSON.parse(other) other = JSON.parse(other)
if req_params = other[1].delete(ADP) if req_params = other[1].delete(ADP)
@ -174,9 +196,27 @@ module Middleware
# that fills it up, this avoids a herd killing you, we can probably do this using a job or redis tricks # that fills it up, this avoids a herd killing you, we can probably do this using a job or redis tricks
# but coordinating this is tricky # but coordinating this is tricky
def cache(result, env = {}) def cache(result, env = {})
return result if GlobalSetting.anon_cache_store_threshold == 0
status, headers, response = result status, headers, response = result
if status == 200 && cache_duration if status == 200 && cache_duration
if GlobalSetting.anon_cache_store_threshold > 1
count = $redis.eval(<<~REDIS, [cache_key_count], [cache_duration])
local current = redis.call("incr", KEYS[1])
redis.call("expire",KEYS[1],ARGV[1])
return current
REDIS
# technically lua will cast for us, but might as well be
# prudent here, hence the to_i
if count.to_i < GlobalSetting.anon_cache_store_threshold
headers["X-Discourse-Cached"] = "skip"
return [status, headers, response]
end
end
headers_stripped = headers.dup.delete_if { |k, _| ["Set-Cookie", "X-MiniProfiler-Ids"].include? k } headers_stripped = headers.dup.delete_if { |k, _| ["Set-Cookie", "X-MiniProfiler-Ids"].include? k }
headers_stripped["X-Discourse-Cached"] = "true" headers_stripped["X-Discourse-Cached"] = "true"
parts = [] parts = []
@ -191,7 +231,7 @@ module Middleware
} }
end end
$redis.setex(cache_key_body, cache_duration, parts.join) $redis.setex(cache_key_body, cache_duration, compress(parts.join))
$redis.setex(cache_key_other, cache_duration, [status, headers_stripped].to_json) $redis.setex(cache_key_other, cache_duration, [status, headers_stripped].to_json)
headers["X-Discourse-Cached"] = "store" headers["X-Discourse-Cached"] = "store"

@ -129,6 +129,24 @@ describe Middleware::AnonymousCache::Helper do
crawler.clear_cache crawler.clear_cache
end end
before do
global_setting :anon_cache_store_threshold, 1
end
it "compresses body on demand" do
global_setting :compress_anon_cache, true
payload = "x" * 1000
helper.cache([200, { "HELLO" => "WORLD" }, [payload]])
helper = new_helper("ANON_CACHE_DURATION" => 10)
expect(helper.cached).to eq([200, { "X-Discourse-Cached" => "true", "HELLO" => "WORLD" }, [payload]])
# depends on i7z implementation, but lets assume it is stable unless we discover
# otherwise
expect($redis.get(helper.cache_key_body).length).to eq(16)
end
it "handles brotli switching" do it "handles brotli switching" do
helper.cache([200, { "HELLO" => "WORLD" }, ["hello ", "my world"]]) helper.cache([200, { "HELLO" => "WORLD" }, ["hello ", "my world"]])

@ -285,7 +285,9 @@ describe Middleware::RequestTracker do
} }
tracker.call(env("REQUEST_URI" => uri, "ANON_CACHE_DURATION" => 60, "action_dispatch.request.parameters" => request_params)) tracker.call(env("REQUEST_URI" => uri, "ANON_CACHE_DURATION" => 60, "action_dispatch.request.parameters" => request_params))
expect(@data[:cache]).to eq("skip")
tracker.call(env("REQUEST_URI" => uri, "ANON_CACHE_DURATION" => 60, "action_dispatch.request.parameters" => request_params))
expect(@data[:cache]).to eq("store") expect(@data[:cache]).to eq("store")
tracker.call(env("REQUEST_URI" => uri, "ANON_CACHE_DURATION" => 60)) tracker.call(env("REQUEST_URI" => uri, "ANON_CACHE_DURATION" => 60))