PERF: optimise backfilling of topic_id (#13545)

Relying on large offsets can have uneven performance on huge table, new
implementation recovers more cleanly and avoids double updates
This commit is contained in:
Sam 2021-06-28 16:16:22 +10:00 committed by GitHub
parent a6b9289477
commit 14a0247301
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -5,27 +5,22 @@ class BackfillEmailLogTopicId < ActiveRecord::Migration[6.1]
BATCH_SIZE = 30_000
def up
offset = 0
email_log_count = DB.query_single("SELECT COUNT(*) FROM email_logs").first
loop do
DB.exec(<<~SQL, offset: offset, batch_size: BATCH_SIZE)
count = DB.exec(<<~SQL, batch_size: BATCH_SIZE)
WITH cte AS (
SELECT post_id
FROM email_logs
ORDER BY id
SELECT l1.id, p1.topic_id
FROM email_logs l1
INNER JOIN posts p1 ON p1.id = l1.post_id
WHERE l1.topic_id IS NULL AND p1.topic_id IS NOT NULL
LIMIT :batch_size
OFFSET :offset
)
UPDATE email_logs
SET topic_id = posts.topic_id
SET topic_id = cte.topic_id
FROM cte
INNER JOIN posts ON posts.id = cte.post_id
WHERE email_logs.post_id = cte.post_id
WHERE email_logs.id = cte.id
SQL
offset += BATCH_SIZE
break if offset > (email_log_count + BATCH_SIZE * 2)
break if count == 0
end
end