discourse/spec/models/web_crawler_request_spec.rb
David Taylor 94a47d037f
PERF: Reduce number of EXPIRE calls from CachedCounting (#15958)
Previously we were calling `EXPIRE` every time we incremented a given key. Instead, we can call EXPIRE once when the key is first populated. A LUA script is used to make this as efficient as possible.

Consumers of this Concern use daily keys. Since we're now calling EXPIRE only at the beginning of the day, rather than throughout the day, the expire time has been increased from 3 to 4 days.
2022-02-15 16:55:21 +00:00

119 lines
3.1 KiB
Ruby

# frozen_string_literal: true
require 'rails_helper'
describe WebCrawlerRequest do
before do
WebCrawlerRequest.last_flush = Time.now.utc
WebCrawlerRequest.clear_cache!
end
after do
WebCrawlerRequest.clear_cache!
end
def inc(user_agent, opts = nil)
WebCrawlerRequest.increment!(user_agent, opts)
end
def disable_date_flush!
freeze_time(Time.now)
WebCrawlerRequest.last_flush = Time.now.utc
end
def web_crawler_request(user_agent)
WebCrawlerRequest.where(user_agent: user_agent).first
end
it 'works even if redis is in readonly' do
disable_date_flush!
inc('Googlebot')
inc('Googlebot')
Discourse.redis.without_namespace.stubs(:eval).raises(Redis::CommandError.new("READONLY"))
Discourse.redis.without_namespace.stubs(:evalsha).raises(Redis::CommandError.new("READONLY"))
Discourse.redis.without_namespace.stubs(:set).raises(Redis::CommandError.new("READONLY"))
inc('Googlebot', autoflush: 3)
WebCrawlerRequest.write_cache!
Discourse.redis.without_namespace.unstub(:eval)
Discourse.redis.without_namespace.unstub(:evalsha)
Discourse.redis.without_namespace.unstub(:set)
inc('Googlebot', autoflush: 3)
expect(web_crawler_request('Googlebot').count).to eq(3)
end
it 'logs nothing for an unflushed increment' do
WebCrawlerRequest.increment!('Googlebot')
expect(WebCrawlerRequest.count).to eq(0)
end
it 'can automatically flush' do
disable_date_flush!
inc('Googlebot', autoflush: 3)
expect(web_crawler_request('Googlebot')).to_not be_present
expect(WebCrawlerRequest.count).to eq(0)
inc('Googlebot', autoflush: 3)
expect(web_crawler_request('Googlebot')).to_not be_present
inc('Googlebot', autoflush: 3)
expect(web_crawler_request('Googlebot').count).to eq(3)
expect(WebCrawlerRequest.count).to eq(1)
3.times { inc('Googlebot', autoflush: 3) }
expect(web_crawler_request('Googlebot').count).to eq(6)
expect(WebCrawlerRequest.count).to eq(1)
end
it 'can flush based on time' do
t1 = Time.now.utc.at_midnight
freeze_time(t1)
WebCrawlerRequest.write_cache!
inc('Googlebot')
expect(WebCrawlerRequest.count).to eq(0)
freeze_time(t1 + WebCrawlerRequest.autoflush_seconds + 1)
inc('Googlebot')
expect(WebCrawlerRequest.count).to eq(1)
end
it 'flushes yesterdays results' do
t1 = Time.now.utc.at_midnight
freeze_time(t1)
inc('Googlebot')
freeze_time(t1.tomorrow)
inc('Googlebot')
WebCrawlerRequest.write_cache!
expect(WebCrawlerRequest.count).to eq(2)
end
it 'clears cache correctly' do
inc('Googlebot')
inc('Twitterbot')
WebCrawlerRequest.clear_cache!
WebCrawlerRequest.write_cache!
expect(WebCrawlerRequest.count).to eq(0)
end
it 'logs a few counts once flushed' do
time = Time.now.at_midnight
freeze_time(time)
3.times { inc('Googlebot') }
2.times { inc('Twitterbot') }
4.times { inc('Bingbot') }
WebCrawlerRequest.write_cache!
expect(web_crawler_request('Googlebot').count).to eq(3)
expect(web_crawler_request('Twitterbot').count).to eq(2)
expect(web_crawler_request('Bingbot').count).to eq(4)
end
end