DEV: Detect when s3 inventory failure is caused by etag difference (#10427)

This commit is contained in:
David Taylor 2020-08-13 00:30:28 +01:00 committed by GitHub
parent 451b9b245f
commit bd0a7553c4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 36 additions and 2 deletions

View File

@ -67,6 +67,10 @@ class S3Inventory
.joins("LEFT JOIN #{table_name} ON #{table_name}.etag = #{model.table_name}.etag")
.where("#{table_name}.etag IS NULL")
exists_with_different_etag = missing_uploads
.joins("LEFT JOIN #{table_name} inventory2 ON inventory2.url = #{model.table_name}.url")
.where("inventory2.etag IS NOT NULL").pluck(:id)
# marking as verified/not verified
id_threshold_clause = model == Upload ? " AND model_table.id > #{model::SEEDED_ID_THRESHOLD}" : ""
DB.exec(<<~SQL, inventory_date
@ -86,10 +90,18 @@ class S3Inventory
if (missing_count = missing_uploads.count) > 0
missing_uploads.select(:id, :url).find_each do |upload|
if exists_with_different_etag.include?(upload.id)
log "#{upload.url} has different etag"
else
log upload.url
end
end
log "#{missing_count} of #{uploads.count} #{model.name.underscore.pluralize} are missing"
if exists_with_different_etag.present?
log "#{exists_with_different_etag.count} of these are caused by differing etags"
log "Null the etag column and re-run for automatic backfill"
end
end
Discourse.stats.set("missing_s3_#{model.table_name}", missing_count)

View File

@ -64,7 +64,10 @@ describe "S3Inventory" do
CSV.foreach(csv_filename, headers: false) do |row|
next unless row[S3Inventory::CSV_KEY_INDEX].include?("default")
Fabricate(:upload, etag: row[S3Inventory::CSV_ETAG_INDEX], updated_at: 2.days.ago)
Fabricate(:upload,
etag: row[S3Inventory::CSV_ETAG_INDEX],
url: File.join(Discourse.store.absolute_base_url, row[S3Inventory::CSV_KEY_INDEX]),
updated_at: 2.days.ago)
end
@upload1 = Fabricate(:upload, etag: "ETag", updated_at: 1.days.ago)
@ -84,6 +87,25 @@ describe "S3Inventory" do
expect(Discourse.stats.get("missing_s3_uploads")).to eq(2)
end
it "should detect when a url match exists with a different etag" do
differing_etag = Upload.find_by(etag: "defcaac0b4aca535c284e95f30d608d0")
differing_etag.update_columns(etag: "somethingelse")
output = capture_stdout do
inventory.backfill_etags_and_list_missing
end
expect(output).to eq(<<~TEXT)
#{differing_etag.url} has different etag
#{@upload1.url}
#{@no_etag.url}
3 of 5 uploads are missing
1 of these are caused by differing etags
Null the etag column and re-run for automatic backfill
TEXT
expect(Discourse.stats.get("missing_s3_uploads")).to eq(3)
end
it "marks missing uploads as not verified and found uploads as verified. uploads not checked will be verified nil" do
expect(Upload.where(verified: nil).count).to eq(12)
output = capture_stdout do