PERF: Update s3:expire_missing_assets to delete in batches (#18908)

Some sites may have thousands of stale assets - deleting them one-by-one is very slow.

Followup to e8570b5cc9
This commit is contained in:
David Taylor 2022-11-07 12:53:14 +00:00
parent 006a857272
commit b25f469018
3 changed files with 30 additions and 2 deletions

View File

@ -104,6 +104,15 @@ class S3Helper
rescue Aws::S3::Errors::NoSuchKey rescue Aws::S3::Errors::NoSuchKey
end end
def delete_objects(keys)
s3_bucket.delete_objects({
delete: {
objects: keys.map { |k| { key: k } },
quiet: true,
},
})
end
def copy(source, destination, options: {}) def copy(source, destination, options: {})
if options[:apply_metadata_to_destination] if options[:apply_metadata_to_destination]
options = options.except(:apply_metadata_to_destination).merge(metadata_directive: "REPLACE") options = options.except(:apply_metadata_to_destination).merge(metadata_directive: "REPLACE")

View File

@ -204,6 +204,8 @@ end
task 's3:expire_missing_assets' => :environment do task 's3:expire_missing_assets' => :environment do
ensure_s3_configured! ensure_s3_configured!
puts "Checking for stale S3 assets..."
assets_to_delete = existing_assets.dup assets_to_delete = existing_assets.dup
# Check that all current assets are uploaded, and remove them from the to_delete list # Check that all current assets are uploaded, and remove them from the to_delete list
@ -217,13 +219,20 @@ task 's3:expire_missing_assets' => :environment do
if assets_to_delete.size > 0 if assets_to_delete.size > 0
puts "Found #{assets_to_delete.size} assets to delete..." puts "Found #{assets_to_delete.size} assets to delete..."
assets_to_delete.each do |to_delete| assets_to_delete.each do |to_delete|
if !to_delete.start_with?(prefix_s3_path("assets/")) if !to_delete.start_with?(prefix_s3_path("assets/"))
# Sanity check, this should never happen # Sanity check, this should never happen
raise "Attempted to delete a non-/asset S3 path (#{to_delete}). Aborting" raise "Attempted to delete a non-/asset S3 path (#{to_delete}). Aborting"
end end
puts "Deleting #{to_delete}..." end
helper.delete_object(to_delete)
assets_to_delete.each_slice(500) do |slice|
message = "Deleting #{slice.size} assets...\n"
message += slice.join("\n").indent(2)
puts message
helper.delete_objects(slice)
puts "... done"
end end
else else
puts "No stale assets found" puts "No stale assets found"

View File

@ -184,4 +184,14 @@ describe "S3Helper" do
expect(s3_helper.ensure_cors!([S3CorsRulesets::BACKUP_DIRECT_UPLOAD])).to eq(false) expect(s3_helper.ensure_cors!([S3CorsRulesets::BACKUP_DIRECT_UPLOAD])).to eq(false)
end end
end end
describe "#delete_objects" do
let(:s3_helper) { S3Helper.new("test-bucket", "", client: client) }
it "works" do
# The S3::Client with `stub_responses: true` includes validation of requests.
# If the request were invalid, this spec would raise an error
s3_helper.delete_objects(["object/one.txt", "object/two.txt"])
end
end
end end