PERF: Update s3:expire_missing_assets to delete in batches (#18908)

Some sites may have thousands of stale assets - deleting them one-by-one is very slow.

Followup to e8570b5cc9
This commit is contained in:
David Taylor 2022-11-07 12:53:14 +00:00 committed by GitHub
parent 3673d3359c
commit f30f9ec5d9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 30 additions and 2 deletions

View File

@ -105,6 +105,15 @@ class S3Helper
rescue Aws::S3::Errors::NoSuchKey, Aws::S3::Errors::NotFound
end
def delete_objects(keys)
s3_bucket.delete_objects({
delete: {
objects: keys.map { |k| { key: k } },
quiet: true,
},
})
end
def copy(source, destination, options: {})
if options[:apply_metadata_to_destination]
options = options.except(:apply_metadata_to_destination).merge(metadata_directive: "REPLACE")

View File

@ -204,6 +204,8 @@ end
task 's3:expire_missing_assets' => :environment do
ensure_s3_configured!
puts "Checking for stale S3 assets..."
assets_to_delete = existing_assets.dup
# Check that all current assets are uploaded, and remove them from the to_delete list
@ -217,13 +219,20 @@ task 's3:expire_missing_assets' => :environment do
if assets_to_delete.size > 0
puts "Found #{assets_to_delete.size} assets to delete..."
assets_to_delete.each do |to_delete|
if !to_delete.start_with?(prefix_s3_path("assets/"))
# Sanity check, this should never happen
raise "Attempted to delete a non-/asset S3 path (#{to_delete}). Aborting"
end
puts "Deleting #{to_delete}..."
helper.delete_object(to_delete)
end
assets_to_delete.each_slice(500) do |slice|
message = "Deleting #{slice.size} assets...\n"
message += slice.join("\n").indent(2)
puts message
helper.delete_objects(slice)
puts "... done"
end
else
puts "No stale assets found"

View File

@ -212,4 +212,14 @@ RSpec.describe "S3Helper" do
expect(s3_helper.ensure_cors!([S3CorsRulesets::BACKUP_DIRECT_UPLOAD])).to eq(false)
end
end
describe "#delete_objects" do
let(:s3_helper) { S3Helper.new("test-bucket", "", client: client) }
it "works" do
# The S3::Client with `stub_responses: true` includes validation of requests.
# If the request were invalid, this spec would raise an error
s3_helper.delete_objects(["object/one.txt", "object/two.txt"])
end
end
end