mirror of
https://github.com/discourse/discourse.git
synced 2024-11-30 19:15:32 +08:00
891987a284
UploadRecovery only worked on missing Upload records. Now it also works with existing ones that have an invalid_etag status.
The specs (first that test the S3 path) are a bit of stub-a-palooza, but that's how much this class interacts with the the outside world. 🤷♂️
192 lines
4.9 KiB
Ruby
192 lines
4.9 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
class UploadRecovery
|
|
def initialize(dry_run: false, stop_on_error: false)
|
|
@dry_run = dry_run
|
|
@stop_on_error = stop_on_error
|
|
end
|
|
|
|
def recover(posts = Post)
|
|
posts.have_uploads.find_each { |post| recover_post post }
|
|
end
|
|
|
|
def recover_post(post)
|
|
begin
|
|
analyzer = PostAnalyzer.new(post.raw, post.topic_id)
|
|
|
|
analyzer.cooked_stripped.css("img", "a").each do |media|
|
|
if media.name == "img" && orig_src = media["data-orig-src"]
|
|
if dom_class = media["class"]
|
|
if (Post.allowed_image_classes & dom_class.split).count > 0
|
|
next
|
|
end
|
|
end
|
|
|
|
if @dry_run
|
|
puts "#{post.full_url} #{orig_src}"
|
|
else
|
|
recover_post_upload(post, Upload.sha1_from_short_url(orig_src))
|
|
end
|
|
elsif url = (media["href"] || media["src"])
|
|
data = Upload.extract_url(url)
|
|
next unless data
|
|
|
|
upload = Upload.get_from_url(url)
|
|
|
|
if !upload || upload.verification_status == Upload.verification_statuses[:invalid_etag]
|
|
if @dry_run
|
|
puts "#{post.full_url} #{url}"
|
|
else
|
|
sha1 = data[2]
|
|
recover_post_upload(post, sha1)
|
|
end
|
|
end
|
|
end
|
|
end
|
|
rescue => e
|
|
raise e if @stop_on_error
|
|
puts "#{post.full_url} #{e.class}: #{e.message}"
|
|
end
|
|
end
|
|
|
|
private
|
|
|
|
def recover_post_upload(post, sha1)
|
|
return unless valid_sha1?(sha1)
|
|
|
|
attributes = {
|
|
post: post,
|
|
sha1: sha1
|
|
}
|
|
|
|
if Discourse.store.external?
|
|
recover_post_upload_from_s3(attributes)
|
|
else
|
|
recover_post_upload_from_local(attributes)
|
|
end
|
|
end
|
|
|
|
def ensure_upload!(post:, sha1:, upload:)
|
|
return if !upload.persisted?
|
|
|
|
if upload.sha1 != sha1
|
|
STDERR.puts "Warning #{post.url} had an incorrect #{sha1} should be #{upload.sha1} storing in custom field 'rake uploads:fix_relative_upload_links' can fix this"
|
|
|
|
sha_map = post.custom_fields["UPLOAD_SHA1_MAP"] || "{}"
|
|
sha_map = JSON.parse(sha_map)
|
|
sha_map[sha1] = upload.sha1
|
|
|
|
post.custom_fields["UPLOAD_SHA1_MAP"] = sha_map.to_json
|
|
post.save_custom_fields
|
|
end
|
|
|
|
post.rebake!
|
|
end
|
|
|
|
def recover_post_upload_from_local(post:, sha1:)
|
|
recover_from_local(sha1: sha1, user_id: post.user_id) do |upload|
|
|
ensure_upload!(post: post, sha1: sha1, upload: upload)
|
|
end
|
|
end
|
|
|
|
def recover_post_upload_from_s3(post:, sha1:)
|
|
recover_from_s3(sha1: sha1, user_id: post.user_id) do |upload|
|
|
ensure_upload!(post: post, sha1: sha1, upload: upload)
|
|
end
|
|
end
|
|
|
|
def recover_from_local(sha1:, user_id:)
|
|
@paths ||= begin
|
|
Dir.glob(File.join(
|
|
Discourse.store.tombstone_dir,
|
|
'original',
|
|
'**',
|
|
'*.*'
|
|
)).concat(Dir.glob(File.join(
|
|
Discourse.store.upload_path,
|
|
'original',
|
|
'**',
|
|
'*.*'
|
|
)))
|
|
end
|
|
|
|
@paths.each do |path|
|
|
if path =~ /#{sha1}/
|
|
begin
|
|
tmp = Tempfile.new
|
|
tmp.write(File.read(path))
|
|
tmp.rewind
|
|
|
|
upload = create_upload(tmp, File.basename(path), user_id)
|
|
yield upload if block_given?
|
|
ensure
|
|
tmp&.close
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
def recover_from_s3(sha1:, user_id:)
|
|
@object_keys ||= begin
|
|
s3_helper = Discourse.store.s3_helper
|
|
|
|
if Rails.configuration.multisite
|
|
current_db = RailsMultisite::ConnectionManagement.current_db
|
|
s3_helper.list("uploads/#{current_db}/original").map(&:key).concat(
|
|
s3_helper.list("uploads/#{FileStore::S3Store::TOMBSTONE_PREFIX}#{current_db}/original").map(&:key)
|
|
)
|
|
else
|
|
s3_helper.list("original").map(&:key).concat(
|
|
s3_helper.list("#{FileStore::S3Store::TOMBSTONE_PREFIX}original").map(&:key)
|
|
)
|
|
end
|
|
end
|
|
|
|
upload_exists = Upload.exists?(sha1: sha1)
|
|
|
|
@object_keys.each do |key|
|
|
if key =~ /#{sha1}/
|
|
tombstone_prefix = FileStore::S3Store::TOMBSTONE_PREFIX
|
|
|
|
if key.include?(tombstone_prefix)
|
|
old_key = key
|
|
key = key.sub(tombstone_prefix, "")
|
|
|
|
Discourse.store.s3_helper.copy(
|
|
old_key,
|
|
key,
|
|
options: { acl: "public-read" }
|
|
)
|
|
end
|
|
|
|
next if upload_exists
|
|
|
|
url = "https:#{SiteSetting.Upload.absolute_base_url}/#{key}"
|
|
|
|
begin
|
|
tmp = FileHelper.download(
|
|
url,
|
|
max_file_size: SiteSetting.max_image_size_kb.kilobytes,
|
|
tmp_file_name: "recover_from_s3"
|
|
)
|
|
|
|
if tmp
|
|
upload = create_upload(tmp, File.basename(key), user_id)
|
|
yield upload if block_given?
|
|
end
|
|
ensure
|
|
tmp&.close
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
def create_upload(file, filename, user_id)
|
|
UploadCreator.new(file, filename).create_for(user_id)
|
|
end
|
|
|
|
def valid_sha1?(sha1)
|
|
sha1.present? && sha1.length == Upload::SHA1_LENGTH
|
|
end
|
|
end
|