From 47e58357b6a1f9b8402a5fa7333237692da2e493 Mon Sep 17 00:00:00 2001 From: Ted Johansson Date: Fri, 3 Nov 2023 09:05:29 +0800 Subject: [PATCH] DEV: Parallel scheduled admin checks (#24190) This PR does some preparatory refactoring of scheduled admin checks in order for us to be able to do custom retry strategies for some of them. Instead of running all checks in sequence inside a single, scheduled job, the scheduled job spawns one new job per check. In order to be concurrency-safe, we need to change the existing Redis data structure from a string (of serialized JSON) to a list of strings (of serialized JSON). --- app/jobs/regular/problem_check.rb | 15 +++++ app/jobs/scheduled/problem_checks.rb | 2 + app/models/admin_dashboard_data.rb | 78 +++++++++++++----------- spec/jobs/problem_check_spec.rb | 68 +++++++++++++++++++++ spec/jobs/problem_checks_spec.rb | 66 +------------------- spec/models/admin_dashboard_data_spec.rb | 64 +++++++++++++------ 6 files changed, 173 insertions(+), 120 deletions(-) create mode 100644 app/jobs/regular/problem_check.rb create mode 100644 spec/jobs/problem_check_spec.rb diff --git a/app/jobs/regular/problem_check.rb b/app/jobs/regular/problem_check.rb new file mode 100644 index 00000000000..4ab9b9deb93 --- /dev/null +++ b/app/jobs/regular/problem_check.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +module Jobs + # This job runs a singular scheduled admin check. It is scheduled by + # the ProblemChecks (plural) scheduled job. + class ProblemCheck < ::Jobs::Base + sidekiq_options retry: false + + def execute(args) + identifier = args[:check_identifier] + + AdminDashboardData.execute_scheduled_check(identifier.to_sym) + end + end +end diff --git a/app/jobs/scheduled/problem_checks.rb b/app/jobs/scheduled/problem_checks.rb index 5135e20f027..c6540b2c542 100644 --- a/app/jobs/scheduled/problem_checks.rb +++ b/app/jobs/scheduled/problem_checks.rb @@ -5,6 +5,8 @@ module Jobs # on a regular basis. To add a problem check for this scheduled job run # call AdminDashboardData.add_scheduled_problem_check class ProblemChecks < ::Jobs::Scheduled + sidekiq_options retry: false + every 10.minutes def execute(_args) diff --git a/app/models/admin_dashboard_data.rb b/app/models/admin_dashboard_data.rb index d01589ae002..fe253be47fe 100644 --- a/app/models/admin_dashboard_data.rb +++ b/app/models/admin_dashboard_data.rb @@ -35,7 +35,7 @@ class AdminDashboardData GLOBAL_REPORTS ||= [] PROBLEM_MESSAGE_PREFIX = "admin-problem:" - SCHEDULED_PROBLEM_STORAGE_KEY = "admin-found-scheduled-problems" + SCHEDULED_PROBLEM_STORAGE_KEY = "admin-found-scheduled-problems-list" def initialize(opts = {}) @opts = opts @@ -89,12 +89,11 @@ class AdminDashboardData if problem.identifier.present? return if problems.find { |p| p.identifier == problem.identifier } end - problems << problem - set_found_scheduled_check_problems(problems) + set_found_scheduled_check_problem(problem) end - def self.set_found_scheduled_check_problems(problems) - Discourse.redis.setex(SCHEDULED_PROBLEM_STORAGE_KEY, 300, JSON.dump(problems.map(&:to_h))) + def self.set_found_scheduled_check_problem(problem) + Discourse.redis.rpush(SCHEDULED_PROBLEM_STORAGE_KEY, JSON.dump(problem.to_h)) end def self.clear_found_scheduled_check_problems @@ -103,21 +102,25 @@ class AdminDashboardData def self.clear_found_problem(identifier) problems = load_found_scheduled_check_problems - problems.reject! { |p| p.identifier == identifier } - set_found_scheduled_check_problems(problems) + problem = problems.find { |p| p.identifier == identifier } + Discourse.redis.lrem(SCHEDULED_PROBLEM_STORAGE_KEY, 1, JSON.dump(problem.to_h)) end def self.load_found_scheduled_check_problems - found_problems_json = Discourse.redis.get(SCHEDULED_PROBLEM_STORAGE_KEY) - return [] if found_problems_json.blank? - begin - JSON.parse(found_problems_json).map { |problem| Problem.from_h(problem) } - rescue JSON::ParserError => err - Discourse.warn_exception( - err, - message: "Error parsing found problem JSON in admin dashboard: #{found_problems_json}", - ) - [] + found_problems = Discourse.redis.lrange(SCHEDULED_PROBLEM_STORAGE_KEY, 0, -1) + + return [] if found_problems.blank? + + found_problems.filter_map do |problem| + begin + Problem.from_h(JSON.parse(problem)) + rescue JSON::ParserError => err + Discourse.warn_exception( + err, + message: "Error parsing found problem JSON in admin dashboard: #{problem}", + ) + nil + end end end @@ -145,28 +148,29 @@ class AdminDashboardData end def self.execute_scheduled_checks - found_problems = [] - problem_scheduled_check_blocks.each do |check_identifier, blk| - problems = nil + problem_scheduled_check_blocks.keys.each do |check_identifier| + Jobs.enqueue(:problem_check, check_identifier: check_identifier.to_s) + end + end - begin - problems = instance_exec(&blk) - rescue StandardError => err - Discourse.warn_exception( - err, - message: "A scheduled admin dashboard problem check (#{check_identifier}) errored.", - ) - # we don't want to hold up other checks because this one errored - next + def self.execute_scheduled_check(identifier) + check = problem_scheduled_check_blocks[identifier] + + problems = instance_exec(&check) + + Array + .wrap(problems) + .compact + .each do |problem| + next if !problem.is_a?(Problem) + + add_found_scheduled_check_problem(problem) end - - found_problems += Array.wrap(problems) - end - - found_problems.compact.each do |problem| - next if !problem.is_a?(Problem) - add_found_scheduled_check_problem(problem) - end + rescue StandardError => err + Discourse.warn_exception( + err, + message: "A scheduled admin dashboard problem check (#{identifier}) errored.", + ) end ## diff --git a/spec/jobs/problem_check_spec.rb b/spec/jobs/problem_check_spec.rb new file mode 100644 index 00000000000..1867b52f44f --- /dev/null +++ b/spec/jobs/problem_check_spec.rb @@ -0,0 +1,68 @@ +# frozen_string_literal: true + +RSpec.describe Jobs::ProblemCheck do + after do + Discourse.redis.flushdb + AdminDashboardData.reset_problem_checks + end + + it "runs the scheduled problem check that has been added and adds the messages to the load_found_scheduled_check_problems array" do + AdminDashboardData.add_scheduled_problem_check(:test_identifier) do + AdminDashboardData::Problem.new("big problem") + end + + described_class.new.execute(check_identifier: :test_identifier) + problems = AdminDashboardData.load_found_scheduled_check_problems + expect(problems.count).to eq(1) + expect(problems.first).to be_a(AdminDashboardData::Problem) + expect(problems.first.to_s).to eq("big problem") + end + + it "can handle the problem check returning multiple problems" do + AdminDashboardData.add_scheduled_problem_check(:test_identifier) do + [ + AdminDashboardData::Problem.new("big problem"), + AdminDashboardData::Problem.new( + "yuge problem", + priority: "high", + identifier: "config_is_a_mess", + ), + ] + end + + described_class.new.execute(check_identifier: :test_identifier) + problems = AdminDashboardData.load_found_scheduled_check_problems + expect(problems.map(&:to_s)).to match_array(["big problem", "yuge problem"]) + end + + it "does not add the same problem twice if the identifier already exists" do + AdminDashboardData.add_scheduled_problem_check(:test_identifier) do + [ + AdminDashboardData::Problem.new( + "yuge problem", + priority: "high", + identifier: "config_is_a_mess", + ), + AdminDashboardData::Problem.new( + "nasty problem", + priority: "high", + identifier: "config_is_a_mess", + ), + ] + end + + described_class.new.execute(check_identifier: :test_identifier) + problems = AdminDashboardData.load_found_scheduled_check_problems + expect(problems.map(&:to_s)).to match_array(["yuge problem"]) + end + + it "handles errors from a troublesome check" do + AdminDashboardData.add_scheduled_problem_check(:test_identifier) do + raise StandardError.new("something went wrong") + AdminDashboardData::Problem.new("polling issue") + end + + described_class.new.execute(check_identifier: :test_identifier) + expect(AdminDashboardData.load_found_scheduled_check_problems.count).to eq(0) + end +end diff --git a/spec/jobs/problem_checks_spec.rb b/spec/jobs/problem_checks_spec.rb index 39144f64fcf..7c78e16d194 100644 --- a/spec/jobs/problem_checks_spec.rb +++ b/spec/jobs/problem_checks_spec.rb @@ -1,63 +1,16 @@ # frozen_string_literal: true RSpec.describe Jobs::ProblemChecks do + before { Jobs.run_immediately! } + after do Discourse.redis.flushdb AdminDashboardData.reset_problem_checks end - it "runs the scheduled problem check that has been added and adds the messages to the load_found_scheduled_check_problems array" do - AdminDashboardData.add_scheduled_problem_check(:test_identifier) do - AdminDashboardData::Problem.new("big problem") - end - - described_class.new.execute(nil) - problems = AdminDashboardData.load_found_scheduled_check_problems - expect(problems.count).to eq(1) - expect(problems.first).to be_a(AdminDashboardData::Problem) - expect(problems.first.to_s).to eq("big problem") - end - - it "can handle the problem check returning multiple problems" do - AdminDashboardData.add_scheduled_problem_check(:test_identifier) do - [ - AdminDashboardData::Problem.new("big problem"), - AdminDashboardData::Problem.new( - "yuge problem", - priority: "high", - identifier: "config_is_a_mess", - ), - ] - end - - described_class.new.execute(nil) - problems = AdminDashboardData.load_found_scheduled_check_problems - expect(problems.map(&:to_s)).to match_array(["big problem", "yuge problem"]) - end - - it "does not add the same problem twice if the identifier already exists" do - AdminDashboardData.add_scheduled_problem_check(:test_identifier) do - [ - AdminDashboardData::Problem.new( - "yuge problem", - priority: "high", - identifier: "config_is_a_mess", - ), - AdminDashboardData::Problem.new( - "nasty problem", - priority: "high", - identifier: "config_is_a_mess", - ), - ] - end - - described_class.new.execute(nil) - problems = AdminDashboardData.load_found_scheduled_check_problems - expect(problems.map(&:to_s)).to match_array(["yuge problem"]) - end - it "starts with a blank slate every time the checks are run to avoid duplicate problems and to clear no longer firing problems" do problem_should_fire = true + AdminDashboardData.reset_problem_checks AdminDashboardData.add_scheduled_problem_check(:test_identifier) do if problem_should_fire problem_should_fire = false @@ -70,17 +23,4 @@ RSpec.describe Jobs::ProblemChecks do described_class.new.execute(nil) expect(AdminDashboardData.load_found_scheduled_check_problems.count).to eq(0) end - - it "handles errors from a troublesome check and proceeds with the rest" do - AdminDashboardData.add_scheduled_problem_check(:test_identifier) do - raise StandardError.new("something went wrong") - AdminDashboardData::Problem.new("polling issue") - end - AdminDashboardData.add_scheduled_problem_check(:test_identifier_2) do - AdminDashboardData::Problem.new("yuge problem", priority: "high") - end - - described_class.new.execute(nil) - expect(AdminDashboardData.load_found_scheduled_check_problems.count).to eq(1) - end end diff --git a/spec/models/admin_dashboard_data_spec.rb b/spec/models/admin_dashboard_data_spec.rb index ff156595a33..e8901a29d7c 100644 --- a/spec/models/admin_dashboard_data_spec.rb +++ b/spec/models/admin_dashboard_data_spec.rb @@ -48,25 +48,6 @@ RSpec.describe AdminDashboardData do end describe "adding scheduled checks" do - it "adds the passed block to the scheduled checks" do - called = false - AdminDashboardData.add_scheduled_problem_check(:test_identifier) { called = true } - - AdminDashboardData.execute_scheduled_checks - expect(called).to eq(true) - end - - it "adds a found problem from a scheduled check" do - AdminDashboardData.add_scheduled_problem_check(:test_identifier) do - AdminDashboardData::Problem.new("test problem") - end - - AdminDashboardData.execute_scheduled_checks - problems = AdminDashboardData.load_found_scheduled_check_problems - expect(problems.first).to be_a(AdminDashboardData::Problem) - expect(problems.first.message).to eq("test problem") - end - it "does not add duplicate problems with the same identifier" do prob1 = AdminDashboardData::Problem.new("test problem", identifier: "test") prob2 = AdminDashboardData::Problem.new("test problem 2", identifier: "test") @@ -78,7 +59,7 @@ RSpec.describe AdminDashboardData do end it "does not error when loading malformed problems saved in redis" do - Discourse.redis.set(AdminDashboardData::SCHEDULED_PROBLEM_STORAGE_KEY, "{ 'badjson") + Discourse.redis.rpush(AdminDashboardData::SCHEDULED_PROBLEM_STORAGE_KEY, "{ 'badjson") expect(AdminDashboardData.load_found_scheduled_check_problems).to eq([]) end @@ -101,6 +82,49 @@ RSpec.describe AdminDashboardData do include_examples "stats cacheable" end + describe ".execute_scheduled_checks" do + let(:blk) { -> {} } + + before { AdminDashboardData.add_scheduled_problem_check(:foo, &blk) } + after { AdminDashboardData.reset_problem_checks } + + it do + expect_enqueued_with(job: :problem_check, args: { check_identifier: :foo }) do + AdminDashboardData.execute_scheduled_checks + end + end + end + + describe ".execute_scheduled_check" do + context "when problems are found" do + let(:blk) { -> { self::Problem.new("Problem") } } + + before do + AdminDashboardData.add_scheduled_problem_check(:foo, &blk) + AdminDashboardData.expects(:add_found_scheduled_check_problem).once + end + + after { AdminDashboardData.reset_problem_checks } + + it do + expect(described_class.execute_scheduled_check(:foo)).to all(be_a(described_class::Problem)) + end + end + + context "when check errors out" do + let(:blk) { -> { raise StandardError } } + + before do + AdminDashboardData.add_scheduled_problem_check(:foo, &blk) + Discourse.expects(:warn_exception).once + end + + after { AdminDashboardData.reset_problem_checks } + + it { expect(described_class.execute_scheduled_check(:foo)).to eq(nil) } + end + end + describe "#problem_message_check" do let(:key) { AdminDashboardData.problem_messages.first }