mirror of
https://github.com/discourse/discourse.git
synced 2024-11-25 18:03:43 +08:00
PERF: Speed up S3 inventory updates (#19110)
The UPDATE statement could lock the `uploads` table for a very long time when the `verification_status` of lots of uploads changed. Splitting up and simplifying the UPDATE solves that problem. Also, this change ensures that only the needed data from the inventory gets inserted into the `TEMP TABLE`. For example, there's no need to have records for optimized images in that table when the `uploads` table gets updated.
This commit is contained in:
parent
723f8239df
commit
a597ef7131
|
@ -48,7 +48,10 @@ class S3Inventory
|
||||||
connection.copy_data("COPY #{table_name} FROM STDIN CSV") do
|
connection.copy_data("COPY #{table_name} FROM STDIN CSV") do
|
||||||
for_each_inventory_row do |row|
|
for_each_inventory_row do |row|
|
||||||
key = row[CSV_KEY_INDEX]
|
key = row[CSV_KEY_INDEX]
|
||||||
|
|
||||||
next if Rails.configuration.multisite && key.exclude?(multisite_prefix)
|
next if Rails.configuration.multisite && key.exclude?(multisite_prefix)
|
||||||
|
next if key.exclude?("/#{type}/")
|
||||||
|
|
||||||
url = File.join(Discourse.store.absolute_base_url, key)
|
url = File.join(Discourse.store.absolute_base_url, key)
|
||||||
connection.put_copy_data("#{url},#{row[CSV_ETAG_INDEX]}\n")
|
connection.put_copy_data("#{url},#{row[CSV_ETAG_INDEX]}\n")
|
||||||
end
|
end
|
||||||
|
@ -76,29 +79,38 @@ class S3Inventory
|
||||||
if model == Upload
|
if model == Upload
|
||||||
sql_params = {
|
sql_params = {
|
||||||
inventory_date: inventory_date,
|
inventory_date: inventory_date,
|
||||||
unchecked: Upload.verification_statuses[:unchecked],
|
|
||||||
invalid_etag: Upload.verification_statuses[:invalid_etag],
|
invalid_etag: Upload.verification_statuses[:invalid_etag],
|
||||||
verified: Upload.verification_statuses[:verified]
|
verified: Upload.verification_statuses[:verified],
|
||||||
|
seeded_id_threshold: model::SEEDED_ID_THRESHOLD
|
||||||
}
|
}
|
||||||
|
|
||||||
DB.exec(<<~SQL, sql_params)
|
DB.exec(<<~SQL, sql_params)
|
||||||
UPDATE #{model.table_name}
|
UPDATE #{model.table_name}
|
||||||
SET verification_status = CASE WHEN table_name_alias.etag IS NULL
|
SET verification_status = :verified
|
||||||
THEN :invalid_etag
|
WHERE etag IS NOT NULL
|
||||||
ELSE :verified
|
AND verification_status <> :verified
|
||||||
END
|
AND updated_at < :inventory_date
|
||||||
FROM #{model.table_name} AS model_table
|
AND id > :seeded_id_threshold
|
||||||
LEFT JOIN #{table_name} AS table_name_alias ON
|
AND EXISTS
|
||||||
model_table.etag = table_name_alias.etag
|
(
|
||||||
WHERE model_table.id = #{model.table_name}.id
|
SELECT 1
|
||||||
AND model_table.updated_at < :inventory_date
|
FROM #{table_name}
|
||||||
AND (
|
WHERE #{table_name}.etag = #{model.table_name}.etag
|
||||||
model_table.verification_status = :unchecked OR
|
)
|
||||||
model_table.verification_status <> CASE WHEN table_name_alias.etag IS NULL
|
SQL
|
||||||
THEN :invalid_etag
|
|
||||||
ELSE :verified
|
DB.exec(<<~SQL, sql_params)
|
||||||
END
|
UPDATE #{model.table_name}
|
||||||
)
|
SET verification_status = :invalid_etag
|
||||||
AND model_table.id > #{model::SEEDED_ID_THRESHOLD}
|
WHERE verification_status <> :invalid_etag
|
||||||
|
AND updated_at < :inventory_date
|
||||||
|
AND id > :seeded_id_threshold
|
||||||
|
AND NOT EXISTS
|
||||||
|
(
|
||||||
|
SELECT 1
|
||||||
|
FROM #{table_name}
|
||||||
|
WHERE #{table_name}.etag = #{model.table_name}.etag
|
||||||
|
)
|
||||||
SQL
|
SQL
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user