mirror of
https://github.com/discourse/discourse.git
synced 2025-03-21 04:35:47 +08:00
PERF: migrate normalized_emails in a migrations (#15166)
Old OnceOff job could perform pretty slowly on sites with millions of emails New implementation operates in batches in a migration, minimizing locking.
This commit is contained in:
parent
da9a226bcb
commit
3a73028a70
@ -1,11 +0,0 @@
|
|||||||
# frozen_string_literal: true
|
|
||||||
|
|
||||||
module Jobs
|
|
||||||
class MigrateNormalizedEmails < ::Jobs::Onceoff
|
|
||||||
def execute_onceoff(args)
|
|
||||||
::UserEmail.find_each do |user_email|
|
|
||||||
user_email.update(normalized_email: user_email.normalize_email)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
@ -0,0 +1,50 @@
|
|||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
class MigrateEmailToNormalizedEmail < ActiveRecord::Migration[6.1]
|
||||||
|
|
||||||
|
# minimize locking on user_email table
|
||||||
|
disable_ddl_transaction!
|
||||||
|
|
||||||
|
def up
|
||||||
|
|
||||||
|
min, max = DB.query_single "SELECT MIN(id), MAX(id) FROM user_emails"
|
||||||
|
# scaling is needed to compensate for "holes" where records were deleted
|
||||||
|
# and pathological cases where for some reason id 100_000_000 and 0 exist
|
||||||
|
|
||||||
|
# avoid doing any work on empty dbs
|
||||||
|
return if min.nil?
|
||||||
|
|
||||||
|
bounds = DB.query_single <<~SQL
|
||||||
|
SELECT t.id
|
||||||
|
FROM (
|
||||||
|
SELECT *, row_number() OVER(ORDER BY id ASC) AS row
|
||||||
|
FROM user_emails
|
||||||
|
) t
|
||||||
|
WHERE t.row % 100000 = 0
|
||||||
|
SQL
|
||||||
|
|
||||||
|
# subtle but loop does < not <=
|
||||||
|
# includes low, excludes high
|
||||||
|
bounds << (max + 1)
|
||||||
|
|
||||||
|
low_id = min
|
||||||
|
bounds.each do |high_id|
|
||||||
|
|
||||||
|
# using execute cause MiniSQL is not logging at the moment
|
||||||
|
# to_i is not needed, but specified so it is explicit there is no SQL injection
|
||||||
|
execute <<~SQL
|
||||||
|
UPDATE user_emails
|
||||||
|
SET normalized_email = REPLACE(REGEXP_REPLACE(email,'([+@].*)',''),'.','') || REGEXP_REPLACE(email, '[^@]*', '')
|
||||||
|
WHERE (normalized_email IS NULL OR normalized_email <> (REPLACE(REGEXP_REPLACE(email,'([+@].*)',''),'.','') || REGEXP_REPLACE(email, '[^@]*', '')))
|
||||||
|
AND (id >= #{low_id.to_i} AND id < #{high_id.to_i})
|
||||||
|
SQL
|
||||||
|
|
||||||
|
low_id = high_id
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
def down
|
||||||
|
execute "UPDATE user_emails SET normalized_email = null"
|
||||||
|
end
|
||||||
|
end
|
Loading…
x
Reference in New Issue
Block a user