discourse/lib/upload_recovery.rb
Matt Palmer a98d2a8086
FEATURE: allow S3 ACLs to be disabled ()
AWS recommends running buckets without ACLs, and to use resource policies to manage access control instead.
This is not a bad idea, because S3 ACLs are whack, and while resource policies are also whack, they're a more constrained form of whack.
Further, some compliance regimes get antsy if you don't go with the vendor's recommended settings, and arguing that you need to enable ACLs on a bucket just to store images in there is more hassle than it's worth.
The new site setting (s3_use_acls) cannot be disabled when secure
uploads is enabled -- the latter relies on private ACLs for security
at this point in time. We may want to reexamine this in future.
2023-06-06 15:47:40 +10:00

193 lines
5.1 KiB
Ruby

# frozen_string_literal: true
class UploadRecovery
def initialize(dry_run: false, stop_on_error: false)
@dry_run = dry_run
@stop_on_error = stop_on_error
end
def recover(posts = Post)
posts.have_uploads.find_each { |post| recover_post post }
end
def recover_post(post)
begin
analyzer = PostAnalyzer.new(post.raw, post.topic_id)
analyzer
.cooked_stripped
.css("img", "a")
.each do |media|
if media.name == "img" && orig_src = media["data-orig-src"]
if dom_class = media["class"]
next if (Post.allowed_image_classes & dom_class.split).count > 0
end
if @dry_run
puts "#{post.full_url} #{orig_src}"
else
recover_post_upload(post, Upload.sha1_from_short_url(orig_src))
end
elsif url = (media["href"] || media["src"])
data = Upload.extract_url(url)
next unless data
upload = Upload.get_from_url(url)
if !upload || upload.verification_status == Upload.verification_statuses[:invalid_etag]
if @dry_run
puts "#{post.full_url} #{url}"
else
sha1 = data[2]
recover_post_upload(post, sha1)
end
end
end
end
rescue => e
raise e if @stop_on_error
puts "#{post.full_url} #{e.class}: #{e.message}"
end
end
private
def recover_post_upload(post, sha1)
return unless valid_sha1?(sha1)
attributes = { post: post, sha1: sha1 }
if Discourse.store.external?
recover_post_upload_from_s3(**attributes)
else
recover_post_upload_from_local(**attributes)
end
end
def ensure_upload!(post:, sha1:, upload:)
return if !upload.persisted?
if upload.sha1 != sha1
STDERR.puts "Warning #{post.url} had an incorrect #{sha1} should be #{upload.sha1} storing in custom field 'rake uploads:fix_relative_upload_links' can fix this"
sha_map = post.custom_fields["UPLOAD_SHA1_MAP"] || "{}"
sha_map = JSON.parse(sha_map)
sha_map[sha1] = upload.sha1
post.custom_fields["UPLOAD_SHA1_MAP"] = sha_map.to_json
post.save_custom_fields
end
post.rebake!
end
def recover_post_upload_from_local(post:, sha1:)
recover_from_local(sha1: sha1, user_id: post.user_id) do |upload|
ensure_upload!(post: post, sha1: sha1, upload: upload)
end
end
def recover_post_upload_from_s3(post:, sha1:)
recover_from_s3(sha1: sha1, user_id: post.user_id) do |upload|
ensure_upload!(post: post, sha1: sha1, upload: upload)
end
end
def recover_from_local(sha1:, user_id:)
@paths ||=
begin
Dir.glob(File.join(Discourse.store.tombstone_dir, "original", "**", "*.*")).concat(
Dir.glob(File.join(Discourse.store.upload_path, "original", "**", "*.*")),
)
end
@paths.each do |path|
if path =~ /#{sha1}/
begin
tmp = Tempfile.new
tmp.write(File.read(path))
tmp.rewind
upload = create_upload(tmp, File.basename(path), user_id)
yield upload if block_given?
ensure
tmp&.close
end
end
end
end
def recover_from_s3(sha1:, user_id:)
@object_keys ||=
begin
s3_helper = Discourse.store.s3_helper
if Rails.configuration.multisite
current_db = RailsMultisite::ConnectionManagement.current_db
s3_helper
.list("uploads/#{current_db}/original")
.map(&:key)
.concat(
s3_helper.list(
"uploads/#{FileStore::S3Store::TOMBSTONE_PREFIX}#{current_db}/original",
).map(&:key),
)
else
s3_helper
.list("original")
.map(&:key)
.concat(s3_helper.list("#{FileStore::S3Store::TOMBSTONE_PREFIX}original").map(&:key))
end
end
upload_exists = Upload.exists?(sha1: sha1)
@object_keys.each do |key|
if key =~ /#{sha1}/
tombstone_prefix = FileStore::S3Store::TOMBSTONE_PREFIX
if key.include?(tombstone_prefix)
old_key = key
key = key.sub(tombstone_prefix, "")
Discourse.store.s3_helper.copy(
old_key,
key,
options: {
acl: SiteSetting.s3_use_acls ? "public-read" : nil,
},
)
end
next if upload_exists
url = "https:#{SiteSetting.Upload.absolute_base_url}/#{key}"
begin
tmp =
FileHelper.download(
url,
max_file_size: SiteSetting.max_image_size_kb.kilobytes,
tmp_file_name: "recover_from_s3",
)
if tmp
upload = create_upload(tmp, File.basename(key), user_id)
yield upload if block_given?
end
ensure
tmp&.close
end
end
end
end
def create_upload(file, filename, user_id)
UploadCreator.new(file, filename).create_for(user_id)
end
def valid_sha1?(sha1)
sha1.present? && sha1.length == Upload::SHA1_LENGTH
end
end