mirror of
https://github.com/discourse/discourse.git
synced 2025-01-18 17:12:45 +08:00
FEATURE: Mark bad uploads with :invalid_url (#29640)
A "bad upload" in this context is a upload with a mismatched URL. This can happen when changing the S3 bucket used for uploads and the upload records in the database have not been remapped correctly.
This commit is contained in:
parent
81e171070d
commit
5a00a041f1
|
@ -66,13 +66,25 @@ class Upload < ActiveRecord::Base
|
|||
scope :with_invalid_etag_verification_status,
|
||||
-> { where(verification_status: Upload.verification_statuses[:invalid_etag]) }
|
||||
|
||||
scope :with_invalid_url_verification_status,
|
||||
-> { where(verification_status: Upload.verification_statuses[:invalid_url]) }
|
||||
|
||||
def self.verification_statuses
|
||||
@verification_statuses ||=
|
||||
Enum.new(
|
||||
unchecked: 1,
|
||||
verified: 2,
|
||||
invalid_etag: 3, # Used by S3Inventory to mark S3 Upload records that have an invalid ETag value compared to the ETag value of the inventory file
|
||||
s3_file_missing_confirmed: 4, # Used by S3Inventory to skip S3 Upload records that are confirmed to not be backed by a file in the S3 file store
|
||||
# Used by S3Inventory to mark S3 Upload records that have an invalid ETag value compared to
|
||||
# the ETag value of the inventory file. A upload with invalid ETag is equivalent to "missing
|
||||
# upload file"
|
||||
invalid_etag: 3,
|
||||
# Used by S3Inventory to skip S3 Upload records that are confirmed to not be backed by a
|
||||
# file in the S3 file store
|
||||
s3_file_missing_confirmed: 4,
|
||||
# Used by S3Inventory to mark S3 Upload records that have an invalid url value compared to
|
||||
# the url value of the inventory file. A upload with invalid URL is equivalent to "file
|
||||
# exists (same ETag), but with a different URL"
|
||||
invalid_url: 5,
|
||||
)
|
||||
end
|
||||
|
||||
|
|
|
@ -85,7 +85,9 @@ class S3Inventory
|
|||
missing_uploads =
|
||||
uploads.joins(
|
||||
"LEFT JOIN #{tmp_table_name} ON #{tmp_table_name}.etag = #{table_name}.etag",
|
||||
).where("#{tmp_table_name}.etag IS NULL")
|
||||
).where(
|
||||
"#{tmp_table_name}.etag IS NULL OR #{tmp_table_name}.url != #{table_name}.url",
|
||||
)
|
||||
|
||||
exists_with_different_etag =
|
||||
missing_uploads
|
||||
|
@ -95,11 +97,20 @@ class S3Inventory
|
|||
.where("inventory2.etag IS NOT NULL")
|
||||
.pluck(:id)
|
||||
|
||||
exists_with_different_url =
|
||||
missing_uploads
|
||||
.joins(
|
||||
"LEFT JOIN #{tmp_table_name} inventory3 ON inventory3.etag = #{table_name}.etag",
|
||||
)
|
||||
.where("inventory3.url != #{table_name}.url")
|
||||
.pluck(:id)
|
||||
|
||||
# marking as verified/not verified
|
||||
if @model == Upload
|
||||
sql_params = {
|
||||
inventory_date: inventory_date,
|
||||
invalid_etag: Upload.verification_statuses[:invalid_etag],
|
||||
invalid_url: Upload.verification_statuses[:invalid_url],
|
||||
s3_file_missing_confirmed: Upload.verification_statuses[:s3_file_missing_confirmed],
|
||||
verified: Upload.verification_statuses[:verified],
|
||||
seeded_id_threshold: @model::SEEDED_ID_THRESHOLD,
|
||||
|
@ -135,6 +146,22 @@ class S3Inventory
|
|||
WHERE #{tmp_table_name}.etag = #{table_name}.etag
|
||||
)
|
||||
SQL
|
||||
|
||||
DB.exec(<<~SQL, sql_params)
|
||||
UPDATE #{table_name}
|
||||
SET verification_status = :invalid_url
|
||||
WHERE verification_status <> :invalid_url
|
||||
AND verification_status <> :invalid_etag
|
||||
AND verification_status <> :s3_file_missing_confirmed
|
||||
AND updated_at < :inventory_date
|
||||
AND id > :seeded_id_threshold
|
||||
AND NOT EXISTS
|
||||
(
|
||||
SELECT 1
|
||||
FROM #{tmp_table_name}
|
||||
WHERE #{tmp_table_name}.url = #{table_name}.url
|
||||
)
|
||||
SQL
|
||||
end
|
||||
|
||||
if (missing_count = missing_uploads.count) > 0
|
||||
|
@ -143,6 +170,8 @@ class S3Inventory
|
|||
.find_each do |upload|
|
||||
if exists_with_different_etag.include?(upload.id)
|
||||
log "#{upload.url} has different etag"
|
||||
elsif exists_with_different_url.include?(upload.id)
|
||||
log "#{upload.url} has different url"
|
||||
else
|
||||
log upload.url
|
||||
end
|
||||
|
@ -153,6 +182,10 @@ class S3Inventory
|
|||
log "#{exists_with_different_etag.count} of these are caused by differing etags"
|
||||
log "Null the etag column and re-run for automatic backfill"
|
||||
end
|
||||
if exists_with_different_url.present?
|
||||
log "#{exists_with_different_url.count} of these are caused by differing urls"
|
||||
log "Empty the url column and re-run for automatic backfill"
|
||||
end
|
||||
end
|
||||
|
||||
set_missing_s3_discourse_stats(missing_count)
|
||||
|
|
|
@ -74,20 +74,29 @@ RSpec.describe S3Inventory do
|
|||
differing_etag = Upload.find_by(etag: "defcaac0b4aca535c284e95f30d608d0")
|
||||
differing_etag.update_columns(etag: "somethingelse")
|
||||
|
||||
differing_url = Upload.find_by(etag: "0cdc623af39cde0adb382670a6dc702a")
|
||||
differing_url.update_columns(url: differing_url.url.gsub("default", "notdefault"))
|
||||
|
||||
output = capture_stdout { inventory.backfill_etags_and_list_missing }
|
||||
|
||||
expect(output).to eq(<<~TEXT)
|
||||
#{differing_etag.url} has different etag
|
||||
#{differing_url.url} has different url
|
||||
#{@upload_1.url}
|
||||
#{@no_etag.url}
|
||||
3 of 5 uploads are missing
|
||||
4 of 5 uploads are missing
|
||||
1 of these are caused by differing etags
|
||||
Null the etag column and re-run for automatic backfill
|
||||
1 of these are caused by differing urls
|
||||
Empty the url column and re-run for automatic backfill
|
||||
TEXT
|
||||
expect(Discourse.stats.get("missing_s3_uploads")).to eq(3)
|
||||
expect(Discourse.stats.get("missing_s3_uploads")).to eq(4)
|
||||
end
|
||||
|
||||
it "marks missing uploads as not verified and found uploads as verified. uploads not checked will be verified nil" do
|
||||
differing_url = Upload.find_by(etag: "0cdc623af39cde0adb382670a6dc702a")
|
||||
differing_url.update_columns(url: differing_url.url.gsub("default", "notdefault"))
|
||||
|
||||
expect(
|
||||
Upload.where(verification_status: Upload.verification_statuses[:unchecked]).count,
|
||||
).to eq(12)
|
||||
|
@ -96,9 +105,10 @@ RSpec.describe S3Inventory do
|
|||
verification_status = Upload.pluck(:verification_status)
|
||||
expect(
|
||||
Upload.where(verification_status: Upload.verification_statuses[:verified]).count,
|
||||
).to eq(3)
|
||||
).to eq(2)
|
||||
|
||||
expect(Upload.with_invalid_etag_verification_status.count).to eq(2)
|
||||
expect(Upload.with_invalid_url_verification_status.count).to eq(1)
|
||||
|
||||
expect(
|
||||
Upload.where(verification_status: Upload.verification_statuses[:unchecked]).count,
|
||||
|
@ -198,7 +208,12 @@ RSpec.describe S3Inventory do
|
|||
|
||||
CSV.foreach(csv_filename, headers: false) do |row|
|
||||
next if row[S3Inventory::CSV_KEY_INDEX].exclude?("default")
|
||||
Fabricate(:upload, etag: row[S3Inventory::CSV_ETAG_INDEX], updated_at: 2.days.ago)
|
||||
Fabricate(
|
||||
:upload,
|
||||
url: File.join(Discourse.store.absolute_base_url, row[S3Inventory::CSV_KEY_INDEX]),
|
||||
etag: row[S3Inventory::CSV_ETAG_INDEX],
|
||||
updated_at: 2.days.ago,
|
||||
)
|
||||
end
|
||||
|
||||
upload = Fabricate(:upload, etag: "ETag", updated_at: 1.days.ago)
|
||||
|
|
Loading…
Reference in New Issue
Block a user