mirror of
https://github.com/discourse/discourse.git
synced 2024-12-18 19:23:53 +08:00
d4d3580761
Previously cached counting made redis calls in main thread and performed the flush in main thread. This could lead to pathological states in extreme heavy load. This refactor reduces load and cleans up the interface
51 lines
1.2 KiB
Ruby
51 lines
1.2 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
class WebCrawlerRequest < ActiveRecord::Base
|
|
include CachedCounting
|
|
|
|
cattr_accessor :max_record_age, :max_records_per_day
|
|
|
|
# only keep the top records based on request count
|
|
self.max_records_per_day = 200
|
|
|
|
# delete records older than this
|
|
self.max_record_age = 30.days
|
|
|
|
def self.increment!(user_agent)
|
|
perform_increment!(user_agent)
|
|
end
|
|
|
|
def self.write_cache!(user_agent, count, date)
|
|
where(id: request_id(date: date, user_agent: user_agent))
|
|
.update_all(["count = count + ?", count])
|
|
end
|
|
|
|
protected
|
|
|
|
def self.request_id(date:, user_agent:, retries: 0)
|
|
id = where(date: date, user_agent: user_agent).pluck_first(:id)
|
|
id ||= create!({ date: date, user_agent: user_agent }.merge(count: 0)).id
|
|
rescue # primary key violation
|
|
if retries == 0
|
|
request_id(date: date, user_agent: user_agent, retries: 1)
|
|
else
|
|
raise
|
|
end
|
|
end
|
|
|
|
end
|
|
|
|
# == Schema Information
|
|
#
|
|
# Table name: web_crawler_requests
|
|
#
|
|
# id :bigint not null, primary key
|
|
# date :date not null
|
|
# user_agent :string not null
|
|
# count :integer default(0), not null
|
|
#
|
|
# Indexes
|
|
#
|
|
# index_web_crawler_requests_on_date_and_user_agent (date,user_agent) UNIQUE
|
|
#
|