discourse/app/models/web_crawler_request.rb
Sam Saffron 30990006a9 DEV: enable frozen string literal on all files
This reduces chances of errors where consumers of strings mutate inputs
and reduces memory usage of the app.

Test suite passes now, but there may be some stuff left, so we will run
a few sites on a branch prior to merging
2019-05-13 09:31:32 +08:00

93 lines
2.1 KiB
Ruby

# frozen_string_literal: true
class WebCrawlerRequest < ActiveRecord::Base
include CachedCounting
# auto flush if older than this
self.autoflush_seconds = 1.hour
cattr_accessor :max_record_age, :max_records_per_day
# only keep the top records based on request count
self.max_records_per_day = 200
# delete records older than this
self.max_record_age = 30.days
def self.increment!(user_agent, opts = nil)
ua_list_key = user_agent_list_key
$redis.sadd(ua_list_key, user_agent)
$redis.expire(ua_list_key, 259200) # 3.days
perform_increment!(redis_key(user_agent), opts)
end
def self.write_cache!(date = nil)
if date.nil?
write_cache!(Time.now.utc)
write_cache!(Time.now.utc.yesterday)
return
end
self.last_flush = Time.now.utc
date = date.to_date
ua_list_key = user_agent_list_key(date)
while user_agent = $redis.spop(ua_list_key)
val = get_and_reset(redis_key(user_agent, date))
next if val == 0
self.where(id: req_id(date, user_agent)).update_all(["count = count + ?", val])
end
rescue Redis::CommandError => e
raise unless e.message =~ /READONLY/
nil
end
def self.clear_cache!(date = nil)
if date.nil?
clear_cache!(Time.now.utc)
clear_cache!(Time.now.utc.yesterday)
return
end
ua_list_key = user_agent_list_key(date)
while user_agent = $redis.spop(ua_list_key)
$redis.del redis_key(user_agent, date)
end
$redis.del(ua_list_key)
end
protected
def self.user_agent_list_key(time = Time.now.utc)
"crawl_ua_list:#{time.strftime('%Y%m%d')}"
end
def self.redis_key(user_agent, time = Time.now.utc)
"crawl_req:#{time.strftime('%Y%m%d')}:#{user_agent}"
end
def self.req_id(date, user_agent)
request_id(date: date, user_agent: user_agent)
end
end
# == Schema Information
#
# Table name: web_crawler_requests
#
# id :bigint not null, primary key
# date :date not null
# user_agent :string not null
# count :integer default(0), not null
#
# Indexes
#
# index_web_crawler_requests_on_date_and_user_agent (date,user_agent) UNIQUE
#