PERF: Speed up S3 inventory updates (#19110)

The UPDATE statement could lock the `uploads` table for a very long time
when the `verification_status` of lots of uploads changed. Splitting up
and simplifying the UPDATE solves that problem.

Also, this change ensures that only the needed data from the inventory
gets inserted into the `TEMP TABLE`. For example, there's no need to
have records for optimized images in that table when the `uploads` table
gets updated.
This commit is contained in:
Gerhard Schlager 2022-11-20 21:52:30 +01:00 committed by GitHub
parent 723f8239df
commit a597ef7131
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -48,7 +48,10 @@ class S3Inventory
connection.copy_data("COPY #{table_name} FROM STDIN CSV") do
for_each_inventory_row do |row|
key = row[CSV_KEY_INDEX]
next if Rails.configuration.multisite && key.exclude?(multisite_prefix)
next if key.exclude?("/#{type}/")
url = File.join(Discourse.store.absolute_base_url, key)
connection.put_copy_data("#{url},#{row[CSV_ETAG_INDEX]}\n")
end
@ -76,29 +79,38 @@ class S3Inventory
if model == Upload
sql_params = {
inventory_date: inventory_date,
unchecked: Upload.verification_statuses[:unchecked],
invalid_etag: Upload.verification_statuses[:invalid_etag],
verified: Upload.verification_statuses[:verified]
verified: Upload.verification_statuses[:verified],
seeded_id_threshold: model::SEEDED_ID_THRESHOLD
}
DB.exec(<<~SQL, sql_params)
UPDATE #{model.table_name}
SET verification_status = CASE WHEN table_name_alias.etag IS NULL
THEN :invalid_etag
ELSE :verified
END
FROM #{model.table_name} AS model_table
LEFT JOIN #{table_name} AS table_name_alias ON
model_table.etag = table_name_alias.etag
WHERE model_table.id = #{model.table_name}.id
AND model_table.updated_at < :inventory_date
AND (
model_table.verification_status = :unchecked OR
model_table.verification_status <> CASE WHEN table_name_alias.etag IS NULL
THEN :invalid_etag
ELSE :verified
END
SET verification_status = :verified
WHERE etag IS NOT NULL
AND verification_status <> :verified
AND updated_at < :inventory_date
AND id > :seeded_id_threshold
AND EXISTS
(
SELECT 1
FROM #{table_name}
WHERE #{table_name}.etag = #{model.table_name}.etag
)
SQL
DB.exec(<<~SQL, sql_params)
UPDATE #{model.table_name}
SET verification_status = :invalid_etag
WHERE verification_status <> :invalid_etag
AND updated_at < :inventory_date
AND id > :seeded_id_threshold
AND NOT EXISTS
(
SELECT 1
FROM #{table_name}
WHERE #{table_name}.etag = #{model.table_name}.etag
)
AND model_table.id > #{model::SEEDED_ID_THRESHOLD}
SQL
end