discourse/lib/tasks/s3.rake
Martin Brennan 9a72a0945f
FIX: Ensure CORS rules exist for S3 using rake task (#14802)
This commit introduces a new s3:ensure_cors_rules rake task
that is run as a prerequisite to s3:upload_assets. This rake
task calls out to the S3CorsRulesets class to ensure that
the 3 relevant sets of CORS rules are applied, depending on
site settings:

* assets
* direct S3 backups
* direct S3 uploads

This works for both Global S3 settings and Database S3 settings
(the latter set directly via SiteSetting).

As it is, only one rule can be applied, which is generally
the assets rule as it is called first. This commit changes
the ensure_cors! method to be able to apply new rules as
well as the existing ones.

This commit also slightly changes the existing rules to cover
direct S3 uploads via uppy, especially multipart, which requires
some more headers.
2021-11-08 09:16:38 +10:00

229 lines
5.7 KiB
Ruby

# frozen_string_literal: true
def brotli_s3_path(path)
ext = File.extname(path)
"#{path[0..-ext.length]}br#{ext}"
end
def gzip_s3_path(path)
ext = File.extname(path)
"#{path[0..-ext.length]}gz#{ext}"
end
def should_skip?(path)
return false if ENV['FORCE_S3_UPLOADS']
@existing_assets ||= Set.new(helper.list("assets/").map(&:key))
@existing_assets.include?(path)
end
def upload(path, remote_path, content_type, content_encoding = nil)
options = {
cache_control: 'max-age=31556952, public, immutable',
content_type: content_type,
acl: 'public-read'
}
if content_encoding
options[:content_encoding] = content_encoding
end
if should_skip?(remote_path)
puts "Skipping: #{remote_path}"
else
puts "Uploading: #{remote_path}"
File.open(path) do |file|
helper.upload(file, remote_path, options)
end
end
File.delete(path) if (File.exists?(path) && ENV["DELETE_ASSETS_AFTER_S3_UPLOAD"])
end
def use_db_s3_config
ENV["USE_DB_S3_CONFIG"]
end
def helper
@helper ||= S3Helper.build_from_config(use_db_s3_config: use_db_s3_config)
end
def assets
cached = Rails.application.assets&.cached
manifest = Sprockets::Manifest.new(cached, Rails.root + 'public/assets', Rails.application.config.assets.manifest)
results = []
manifest.assets.each do |_, path|
fullpath = (Rails.root + "public/assets/#{path}").to_s
# Ignore files we can't find the mime type of, like yarn.lock
if mime = MiniMime.lookup_by_filename(fullpath)
content_type = mime.content_type
asset_path = "assets/#{path}"
results << [fullpath, asset_path, content_type]
if File.exist?(fullpath + '.br')
results << [fullpath + '.br', brotli_s3_path(asset_path), content_type, 'br']
end
if File.exist?(fullpath + '.gz')
results << [fullpath + '.gz', gzip_s3_path(asset_path), content_type, 'gzip']
end
if File.exist?(fullpath + '.map')
results << [fullpath + '.map', asset_path + '.map', 'application/json']
end
end
end
results
end
def asset_paths
Set.new(assets.map { |_, asset_path| asset_path })
end
def ensure_s3_configured!
unless GlobalSetting.use_s3? || use_db_s3_config
STDERR.puts "ERROR: Ensure S3 is configured in config/discourse.conf or environment vars"
exit 1
end
end
task 's3:correct_acl' => :environment do
ensure_s3_configured!
puts "ensuring public-read is set on every upload and optimized image"
i = 0
base_url = Discourse.store.absolute_base_url
objects = Upload.pluck(:id, :url).map { |array| array << :upload }
objects.concat(OptimizedImage.pluck(:id, :url).map { |array| array << :optimized_image })
puts "#{objects.length} objects found"
objects.each do |id, url, type|
i += 1
if !url.start_with?(base_url)
puts "Skipping #{type} #{id} since it is not stored on s3, url is #{url}"
else
begin
key = url[(base_url.length + 1)..-1]
object = Discourse.store.s3_helper.object(key)
object.acl.put(acl: "public-read")
rescue => e
puts "Skipping #{type} #{id} url is #{url} #{e}"
end
end
if i % 100 == 0
puts "#{i} done"
end
end
end
task 's3:correct_cachecontrol' => :environment do
ensure_s3_configured!
puts "ensuring cache-control is set on every upload and optimized image"
i = 0
base_url = Discourse.store.absolute_base_url
cache_control = 'max-age=31556952, public, immutable'
objects = Upload.pluck(:id, :url).map { |array| array << :upload }
objects.concat(OptimizedImage.pluck(:id, :url).map { |array| array << :optimized_image })
puts "#{objects.length} objects found"
objects.each do |id, url, type|
i += 1
if !url.start_with?(base_url)
puts "Skipping #{type} #{id} since it is not stored on s3, url is #{url}"
else
begin
key = url[(base_url.length + 1)..-1]
object = Discourse.store.s3_helper.object(key)
object.copy_from(
copy_source: "#{object.bucket_name}/#{object.key}",
acl: "public-read",
cache_control: cache_control,
content_type: object.content_type,
content_disposition: object.content_disposition,
metadata_directive: 'REPLACE'
)
rescue => e
puts "Skipping #{type} #{id} url is #{url} #{e}"
end
end
if i % 100 == 0
puts "#{i} done"
end
end
end
task 's3:ensure_cors_rules' => :environment do
ensure_s3_configured!
puts "Installing CORS rules..."
result = S3CorsRulesets.sync(use_db_s3_config: use_db_s3_config)
if result[:assets_rules_applied]
puts "Assets rules did not exist and were applied."
else
puts "Assets rules already existed."
end
if result[:backup_rules_applied]
puts "Backup rules did not exist and were applied."
else
puts "Backup rules already existed."
end
if result[:direct_upload_rules_applied]
puts "Direct upload rules did not exist and were applied."
else
puts "Direct upload rules already existed."
end
end
task 's3:upload_assets' => [:environment, 's3:ensure_cors_rules'] do
assets.each do |asset|
upload(*asset)
end
end
task 's3:expire_missing_assets' => :environment do
ensure_s3_configured!
count = 0
keep = 0
in_manifest = asset_paths
puts "Ensuring AWS assets are tagged correctly for removal"
helper.list('assets/').each do |f|
if !in_manifest.include?(f.key)
helper.tag_file(f.key, old: true)
count += 1
else
# ensure we do not delete this by mistake
helper.tag_file(f.key, {})
keep += 1
end
end
puts "#{count} assets were flagged for removal in 10 days (#{keep} assets will be retained)"
puts "Ensuring AWS rule exists for purging old assets"
helper.update_lifecycle("delete_old_assets", 10, tag: { key: 'old', value: 'true' })
end