PERF: Remove post_upload recovery in daily EnsureS3UploadsExistence job (#10173)

This is a very expensive process, and it should only be required in exceptional circumstances. It is possible to run a similar recovery using `rake uploads:recover` (5284d41a8e/lib/upload_recovery.rb (L135-L184))
This commit is contained in:
David Taylor 2020-07-06 16:26:40 +01:00 committed by GitHub
parent 194c962124
commit 7f2b5a446a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 1 additions and 65 deletions

View File

@ -61,8 +61,6 @@ class S3Inventory
WHERE #{model.table_name}.etag IS NULL AND
#{model.table_name}.url = #{table_name}.url")
list_missing_post_uploads if type == "original"
uploads = (model == Upload) ? model.by_users.where("updated_at < ?", inventory_date) : model
missing_uploads = uploads
.joins("LEFT JOIN #{table_name} ON #{table_name}.etag = #{model.table_name}.etag")
@ -87,45 +85,6 @@ class S3Inventory
end
end
def list_missing_post_uploads
log "Listing missing post uploads..."
missing = Post.find_missing_uploads(include_local_upload: false) do |post, _, _, sha1|
next if sha1.blank?
upload_id = nil
result = connection.exec("SELECT * FROM #{table_name} WHERE url LIKE '%original/%/#{sha1}%'")
if result.count >= 1
begin
url = result[0]["url"]
key = url.sub(/^#{Discourse.store.absolute_base_url}\//, "")
data = @s3_helper.object(key).data
filename = (data.content_disposition&.match(/filename=\"(.*)\"/) || [])[1]
upload = Upload.new(
user_id: Discourse.system_user.id,
original_filename: filename || File.basename(key),
filesize: data.content_length,
url: url,
sha1: sha1,
etag: result[0]["etag"]
)
upload.save!(validate: false)
upload_id = upload.id
post.link_post_uploads
rescue Aws::S3::Errors::NotFound
next
end
end
upload_id
end
Discourse.stats.set("missing_post_uploads", missing[:count])
log "#{missing[:count]} post uploads are missing."
end
def download_inventory_file_to_tmp_directory(file)
return if File.exists?(file[:filename])

View File

@ -76,7 +76,7 @@ describe "S3Inventory" do
inventory.backfill_etags_and_list_missing
end
expect(output).to eq("Listing missing post uploads...\n0 post uploads are missing.\n#{upload.url}\n1 of 5 uploads are missing\n")
expect(output).to eq("#{upload.url}\n1 of 5 uploads are missing\n")
expect(Discourse.stats.get("missing_s3_uploads")).to eq(1)
end
@ -95,27 +95,4 @@ describe "S3Inventory" do
expect(Upload.by_users.order(:url).pluck(:url, :etag)).to eq(files)
end
it "should recover missing uploads correctly" do
freeze_time
CSV.foreach(csv_filename, headers: false) do |row|
Fabricate(:upload, url: File.join(Discourse.store.absolute_base_url, row[S3Inventory::CSV_KEY_INDEX]), etag: row[S3Inventory::CSV_ETAG_INDEX], updated_at: 2.days.ago)
end
upload = Upload.last
etag = upload.etag
post = Fabricate(:post, raw: "![](#{upload.url})")
post.link_post_uploads
upload.delete
inventory.expects(:files).returns([{ key: "Key", filename: "#{csv_filename}.gz" }]).times(3)
output = capture_stdout do
inventory.backfill_etags_and_list_missing
end
expect(output).to eq("Listing missing post uploads...\n0 post uploads are missing.\n")
expect(post.uploads.first.etag).to eq(etag)
end
end