mirror of
https://github.com/discourse/discourse.git
synced 2025-01-04 13:14:07 +08:00
a98d2a8086
AWS recommends running buckets without ACLs, and to use resource policies to manage access control instead. This is not a bad idea, because S3 ACLs are whack, and while resource policies are also whack, they're a more constrained form of whack. Further, some compliance regimes get antsy if you don't go with the vendor's recommended settings, and arguing that you need to enable ACLs on a bucket just to store images in there is more hassle than it's worth. The new site setting (s3_use_acls) cannot be disabled when secure uploads is enabled -- the latter relies on private ACLs for security at this point in time. We may want to reexamine this in future.
193 lines
5.1 KiB
Ruby
193 lines
5.1 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
class UploadRecovery
|
|
def initialize(dry_run: false, stop_on_error: false)
|
|
@dry_run = dry_run
|
|
@stop_on_error = stop_on_error
|
|
end
|
|
|
|
def recover(posts = Post)
|
|
posts.have_uploads.find_each { |post| recover_post post }
|
|
end
|
|
|
|
def recover_post(post)
|
|
begin
|
|
analyzer = PostAnalyzer.new(post.raw, post.topic_id)
|
|
|
|
analyzer
|
|
.cooked_stripped
|
|
.css("img", "a")
|
|
.each do |media|
|
|
if media.name == "img" && orig_src = media["data-orig-src"]
|
|
if dom_class = media["class"]
|
|
next if (Post.allowed_image_classes & dom_class.split).count > 0
|
|
end
|
|
|
|
if @dry_run
|
|
puts "#{post.full_url} #{orig_src}"
|
|
else
|
|
recover_post_upload(post, Upload.sha1_from_short_url(orig_src))
|
|
end
|
|
elsif url = (media["href"] || media["src"])
|
|
data = Upload.extract_url(url)
|
|
next unless data
|
|
|
|
upload = Upload.get_from_url(url)
|
|
|
|
if !upload || upload.verification_status == Upload.verification_statuses[:invalid_etag]
|
|
if @dry_run
|
|
puts "#{post.full_url} #{url}"
|
|
else
|
|
sha1 = data[2]
|
|
recover_post_upload(post, sha1)
|
|
end
|
|
end
|
|
end
|
|
end
|
|
rescue => e
|
|
raise e if @stop_on_error
|
|
puts "#{post.full_url} #{e.class}: #{e.message}"
|
|
end
|
|
end
|
|
|
|
private
|
|
|
|
def recover_post_upload(post, sha1)
|
|
return unless valid_sha1?(sha1)
|
|
|
|
attributes = { post: post, sha1: sha1 }
|
|
|
|
if Discourse.store.external?
|
|
recover_post_upload_from_s3(**attributes)
|
|
else
|
|
recover_post_upload_from_local(**attributes)
|
|
end
|
|
end
|
|
|
|
def ensure_upload!(post:, sha1:, upload:)
|
|
return if !upload.persisted?
|
|
|
|
if upload.sha1 != sha1
|
|
STDERR.puts "Warning #{post.url} had an incorrect #{sha1} should be #{upload.sha1} storing in custom field 'rake uploads:fix_relative_upload_links' can fix this"
|
|
|
|
sha_map = post.custom_fields["UPLOAD_SHA1_MAP"] || "{}"
|
|
sha_map = JSON.parse(sha_map)
|
|
sha_map[sha1] = upload.sha1
|
|
|
|
post.custom_fields["UPLOAD_SHA1_MAP"] = sha_map.to_json
|
|
post.save_custom_fields
|
|
end
|
|
|
|
post.rebake!
|
|
end
|
|
|
|
def recover_post_upload_from_local(post:, sha1:)
|
|
recover_from_local(sha1: sha1, user_id: post.user_id) do |upload|
|
|
ensure_upload!(post: post, sha1: sha1, upload: upload)
|
|
end
|
|
end
|
|
|
|
def recover_post_upload_from_s3(post:, sha1:)
|
|
recover_from_s3(sha1: sha1, user_id: post.user_id) do |upload|
|
|
ensure_upload!(post: post, sha1: sha1, upload: upload)
|
|
end
|
|
end
|
|
|
|
def recover_from_local(sha1:, user_id:)
|
|
@paths ||=
|
|
begin
|
|
Dir.glob(File.join(Discourse.store.tombstone_dir, "original", "**", "*.*")).concat(
|
|
Dir.glob(File.join(Discourse.store.upload_path, "original", "**", "*.*")),
|
|
)
|
|
end
|
|
|
|
@paths.each do |path|
|
|
if path =~ /#{sha1}/
|
|
begin
|
|
tmp = Tempfile.new
|
|
tmp.write(File.read(path))
|
|
tmp.rewind
|
|
|
|
upload = create_upload(tmp, File.basename(path), user_id)
|
|
yield upload if block_given?
|
|
ensure
|
|
tmp&.close
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
def recover_from_s3(sha1:, user_id:)
|
|
@object_keys ||=
|
|
begin
|
|
s3_helper = Discourse.store.s3_helper
|
|
|
|
if Rails.configuration.multisite
|
|
current_db = RailsMultisite::ConnectionManagement.current_db
|
|
s3_helper
|
|
.list("uploads/#{current_db}/original")
|
|
.map(&:key)
|
|
.concat(
|
|
s3_helper.list(
|
|
"uploads/#{FileStore::S3Store::TOMBSTONE_PREFIX}#{current_db}/original",
|
|
).map(&:key),
|
|
)
|
|
else
|
|
s3_helper
|
|
.list("original")
|
|
.map(&:key)
|
|
.concat(s3_helper.list("#{FileStore::S3Store::TOMBSTONE_PREFIX}original").map(&:key))
|
|
end
|
|
end
|
|
|
|
upload_exists = Upload.exists?(sha1: sha1)
|
|
|
|
@object_keys.each do |key|
|
|
if key =~ /#{sha1}/
|
|
tombstone_prefix = FileStore::S3Store::TOMBSTONE_PREFIX
|
|
|
|
if key.include?(tombstone_prefix)
|
|
old_key = key
|
|
key = key.sub(tombstone_prefix, "")
|
|
|
|
Discourse.store.s3_helper.copy(
|
|
old_key,
|
|
key,
|
|
options: {
|
|
acl: SiteSetting.s3_use_acls ? "public-read" : nil,
|
|
},
|
|
)
|
|
end
|
|
|
|
next if upload_exists
|
|
|
|
url = "https:#{SiteSetting.Upload.absolute_base_url}/#{key}"
|
|
|
|
begin
|
|
tmp =
|
|
FileHelper.download(
|
|
url,
|
|
max_file_size: SiteSetting.max_image_size_kb.kilobytes,
|
|
tmp_file_name: "recover_from_s3",
|
|
)
|
|
|
|
if tmp
|
|
upload = create_upload(tmp, File.basename(key), user_id)
|
|
yield upload if block_given?
|
|
end
|
|
ensure
|
|
tmp&.close
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
def create_upload(file, filename, user_id)
|
|
UploadCreator.new(file, filename).create_for(user_id)
|
|
end
|
|
|
|
def valid_sha1?(sha1)
|
|
sha1.present? && sha1.length == Upload::SHA1_LENGTH
|
|
end
|
|
end
|