From c0293339b87c39bbd2b91f87e2d39179f40c9a9a Mon Sep 17 00:00:00 2001 From: David Taylor Date: Wed, 7 Oct 2020 15:30:38 +0100 Subject: [PATCH] PERF: Do not enqueue digest emails when attempted recently (#10849) Previously, Jobs::EnqueueDigestEmails would enqueue a digest job for every user, even if there are no topics to send. The digest job would exit, no email would send, and last_emailed_at would not change. 30 minutes later, Jobs::EnqueueDigestEmails would run again and re-enqueue jobs for the same users. 120fa8ad introduced a temporary mitigation for this issue, by randomly selecting a subset of those users each time. This commit adds a new `digest_attempted_at` column to the `user_stats` table. This column is updated every time a digest job completes for a user. Using this, we can avoid scheduling digest jobs for the same user every 30 minutes. This also removes the random user selection in 120fa8ad, and instead prioritizes users who had digests attempted the longest time ago. --- app/jobs/regular/user_email.rb | 9 +++++++ app/jobs/scheduled/enqueue_digest_emails.rb | 10 +++---- app/models/user_stat.rb | 1 + ...5_add_digest_attempted_at_to_user_stats.rb | 7 +++++ spec/jobs/enqueue_digest_emails_spec.rb | 27 ++++++++++++++----- spec/jobs/user_email_spec.rb | 4 +++ 6 files changed, 46 insertions(+), 12 deletions(-) create mode 100644 db/migrate/20201007124955_add_digest_attempted_at_to_user_stats.rb diff --git a/app/jobs/regular/user_email.rb b/app/jobs/regular/user_email.rb index f9d9fd6852f..191d16c13cb 100644 --- a/app/jobs/regular/user_email.rb +++ b/app/jobs/regular/user_email.rb @@ -22,6 +22,15 @@ module Jobs # of extra work when emails are disabled. return if quit_email_early? + send_user_email(args) + + if args[:user_id].present? && args[:type] == :digest + # Record every attempt at sending a digest email, even if it was skipped + UserStat.where(user_id: args[:user_id]).update_all(digest_attempted_at: Time.zone.now) + end + end + + def send_user_email(args) post = nil notification = nil type = args[:type] diff --git a/app/jobs/scheduled/enqueue_digest_emails.rb b/app/jobs/scheduled/enqueue_digest_emails.rb index d0fba281b92..4496f2548b9 100644 --- a/app/jobs/scheduled/enqueue_digest_emails.rb +++ b/app/jobs/scheduled/enqueue_digest_emails.rb @@ -6,13 +6,9 @@ module Jobs every 30.minutes def execute(args) - return if SiteSetting.disable_digest_emails? || SiteSetting.private_email? + return if SiteSetting.disable_digest_emails? || SiteSetting.private_email? || SiteSetting.disable_emails == 'yes' users = target_user_ids - if users.length > GlobalSetting.max_digests_enqueued_per_30_mins_per_site - users = users.shuffle[0...GlobalSetting.max_digests_enqueued_per_30_mins_per_site] - end - users.each do |user_id| ::Jobs.enqueue(:user_email, type: :digest, user_id: user_id) end @@ -30,12 +26,16 @@ module Jobs .where("user_stats.bounce_score < #{SiteSetting.bounce_score_threshold}") .where("user_emails.primary") .where("COALESCE(last_emailed_at, '2010-01-01') <= CURRENT_TIMESTAMP - ('1 MINUTE'::INTERVAL * user_options.digest_after_minutes)") + .where("COALESCE(user_stats.digest_attempted_at, '2010-01-01') <= CURRENT_TIMESTAMP - ('1 MINUTE'::INTERVAL * user_options.digest_after_minutes)") .where("COALESCE(last_seen_at, '2010-01-01') <= CURRENT_TIMESTAMP - ('1 MINUTE'::INTERVAL * user_options.digest_after_minutes)") .where("COALESCE(last_seen_at, '2010-01-01') >= CURRENT_TIMESTAMP - ('1 DAY'::INTERVAL * #{SiteSetting.suppress_digest_email_after_days})") + .order("user_stats.digest_attempted_at ASC NULLS FIRST") # If the site requires approval, make sure the user is approved query = query.where("approved OR moderator OR admin") if SiteSetting.must_approve_users? + query = query.limit(GlobalSetting.max_digests_enqueued_per_30_mins_per_site) + query.pluck(:id) end diff --git a/app/models/user_stat.rb b/app/models/user_stat.rb index bc2a9d251ca..364b4a2618e 100644 --- a/app/models/user_stat.rb +++ b/app/models/user_stat.rb @@ -290,4 +290,5 @@ end # flags_ignored :integer default(0), not null # first_unread_at :datetime not null # distinct_badge_count :integer default(0), not null +# digest_attempted_at :datetime # diff --git a/db/migrate/20201007124955_add_digest_attempted_at_to_user_stats.rb b/db/migrate/20201007124955_add_digest_attempted_at_to_user_stats.rb new file mode 100644 index 00000000000..496e69bc649 --- /dev/null +++ b/db/migrate/20201007124955_add_digest_attempted_at_to_user_stats.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +class AddDigestAttemptedAtToUserStats < ActiveRecord::Migration[6.0] + def change + add_column :user_stats, :digest_attempted_at, :timestamp + end +end diff --git a/spec/jobs/enqueue_digest_emails_spec.rb b/spec/jobs/enqueue_digest_emails_spec.rb index 97ab4e3854d..b207d8484e5 100644 --- a/spec/jobs/enqueue_digest_emails_spec.rb +++ b/spec/jobs/enqueue_digest_emails_spec.rb @@ -122,16 +122,29 @@ describe Jobs::EnqueueDigestEmails do let(:user) { Fabricate(:user) } it "limits jobs enqueued per max_digests_enqueued_per_30_mins_per_site" do - Fabricate(:user, last_seen_at: 2.months.ago, last_emailed_at: 2.months.ago) - Fabricate(:user, last_seen_at: 2.months.ago, last_emailed_at: 2.months.ago) + user1 = Fabricate(:user, last_seen_at: 2.months.ago, last_emailed_at: 2.months.ago) + user2 = Fabricate(:user, last_seen_at: 2.months.ago, last_emailed_at: 2.months.ago) + + user1.user_stat.update(digest_attempted_at: 2.week.ago) + user2.user_stat.update(digest_attempted_at: 3.weeks.ago) global_setting :max_digests_enqueued_per_30_mins_per_site, 1 - # I don't love fakes, but no point sending this fake email - Sidekiq::Testing.fake! do - expect do - Jobs::EnqueueDigestEmails.new.execute(nil) - end.to change(Jobs::UserEmail.jobs, :size).by (1) + expect_enqueued_with(job: :user_email, args: { type: :digest, user_id: user2.id }) do + expect { Jobs::EnqueueDigestEmails.new.execute(nil) }.to change(Jobs::UserEmail.jobs, :size).by (1) + end + + # The job didn't actually run, so fake the user_stat update + user2.user_stat.update(digest_attempted_at: Time.zone.now) + + expect_enqueued_with(job: :user_email, args: { type: :digest, user_id: user1.id }) do + expect { Jobs::EnqueueDigestEmails.new.execute(nil) }.to change(Jobs::UserEmail.jobs, :size).by (1) + end + + user1.user_stat.update(digest_attempted_at: Time.zone.now) + + expect_not_enqueued_with(job: :user_email) do + Jobs::EnqueueDigestEmails.new.execute(nil) end end diff --git a/spec/jobs/user_email_spec.rb b/spec/jobs/user_email_spec.rb index 4a2b40b24ab..8caff3c5c28 100644 --- a/spec/jobs/user_email_spec.rb +++ b/spec/jobs/user_email_spec.rb @@ -42,17 +42,20 @@ describe Jobs::UserEmail do context 'not emailed recently' do before do + freeze_time user.update!(last_emailed_at: 8.days.ago) end it "calls the mailer when the user exists" do Jobs::UserEmail.new.execute(type: :digest, user_id: user.id) expect(ActionMailer::Base.deliveries).to_not be_empty + expect(user.user_stat.reload.digest_attempted_at).to eq_time(Time.zone.now) end end context 'recently emailed' do before do + freeze_time user.update!(last_emailed_at: 2.hours.ago) user.user_option.update!(digest_after_minutes: 1.day.to_i / 60) end @@ -60,6 +63,7 @@ describe Jobs::UserEmail do it 'skips sending digest email' do Jobs::UserEmail.new.execute(type: :digest, user_id: user.id) expect(ActionMailer::Base.deliveries).to eq([]) + expect(user.user_stat.reload.digest_attempted_at).to eq_time(Time.zone.now) end end end