mirror of
https://github.com/discourse/discourse.git
synced 2024-11-22 11:23:25 +08:00
PERF: Remove post_upload recovery in daily EnsureS3UploadsExistence job (#10173)
This is a very expensive process, and it should only be required in exceptional circumstances. It is possible to run a similar recovery using `rake uploads:recover` (5284d41a8e/lib/upload_recovery.rb (L135-L184)
)
This commit is contained in:
parent
194c962124
commit
7f2b5a446a
|
@ -61,8 +61,6 @@ class S3Inventory
|
|||
WHERE #{model.table_name}.etag IS NULL AND
|
||||
#{model.table_name}.url = #{table_name}.url")
|
||||
|
||||
list_missing_post_uploads if type == "original"
|
||||
|
||||
uploads = (model == Upload) ? model.by_users.where("updated_at < ?", inventory_date) : model
|
||||
missing_uploads = uploads
|
||||
.joins("LEFT JOIN #{table_name} ON #{table_name}.etag = #{model.table_name}.etag")
|
||||
|
@ -87,45 +85,6 @@ class S3Inventory
|
|||
end
|
||||
end
|
||||
|
||||
def list_missing_post_uploads
|
||||
log "Listing missing post uploads..."
|
||||
|
||||
missing = Post.find_missing_uploads(include_local_upload: false) do |post, _, _, sha1|
|
||||
next if sha1.blank?
|
||||
|
||||
upload_id = nil
|
||||
result = connection.exec("SELECT * FROM #{table_name} WHERE url LIKE '%original/%/#{sha1}%'")
|
||||
|
||||
if result.count >= 1
|
||||
begin
|
||||
url = result[0]["url"]
|
||||
key = url.sub(/^#{Discourse.store.absolute_base_url}\//, "")
|
||||
data = @s3_helper.object(key).data
|
||||
filename = (data.content_disposition&.match(/filename=\"(.*)\"/) || [])[1]
|
||||
|
||||
upload = Upload.new(
|
||||
user_id: Discourse.system_user.id,
|
||||
original_filename: filename || File.basename(key),
|
||||
filesize: data.content_length,
|
||||
url: url,
|
||||
sha1: sha1,
|
||||
etag: result[0]["etag"]
|
||||
)
|
||||
upload.save!(validate: false)
|
||||
upload_id = upload.id
|
||||
post.link_post_uploads
|
||||
rescue Aws::S3::Errors::NotFound
|
||||
next
|
||||
end
|
||||
end
|
||||
|
||||
upload_id
|
||||
end
|
||||
|
||||
Discourse.stats.set("missing_post_uploads", missing[:count])
|
||||
log "#{missing[:count]} post uploads are missing."
|
||||
end
|
||||
|
||||
def download_inventory_file_to_tmp_directory(file)
|
||||
return if File.exists?(file[:filename])
|
||||
|
||||
|
|
|
@ -76,7 +76,7 @@ describe "S3Inventory" do
|
|||
inventory.backfill_etags_and_list_missing
|
||||
end
|
||||
|
||||
expect(output).to eq("Listing missing post uploads...\n0 post uploads are missing.\n#{upload.url}\n1 of 5 uploads are missing\n")
|
||||
expect(output).to eq("#{upload.url}\n1 of 5 uploads are missing\n")
|
||||
expect(Discourse.stats.get("missing_s3_uploads")).to eq(1)
|
||||
end
|
||||
|
||||
|
@ -95,27 +95,4 @@ describe "S3Inventory" do
|
|||
|
||||
expect(Upload.by_users.order(:url).pluck(:url, :etag)).to eq(files)
|
||||
end
|
||||
|
||||
it "should recover missing uploads correctly" do
|
||||
freeze_time
|
||||
|
||||
CSV.foreach(csv_filename, headers: false) do |row|
|
||||
Fabricate(:upload, url: File.join(Discourse.store.absolute_base_url, row[S3Inventory::CSV_KEY_INDEX]), etag: row[S3Inventory::CSV_ETAG_INDEX], updated_at: 2.days.ago)
|
||||
end
|
||||
|
||||
upload = Upload.last
|
||||
etag = upload.etag
|
||||
post = Fabricate(:post, raw: "![](#{upload.url})")
|
||||
post.link_post_uploads
|
||||
upload.delete
|
||||
|
||||
inventory.expects(:files).returns([{ key: "Key", filename: "#{csv_filename}.gz" }]).times(3)
|
||||
|
||||
output = capture_stdout do
|
||||
inventory.backfill_etags_and_list_missing
|
||||
end
|
||||
|
||||
expect(output).to eq("Listing missing post uploads...\n0 post uploads are missing.\n")
|
||||
expect(post.uploads.first.etag).to eq(etag)
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue
Block a user