mirror of
https://github.com/discourse/discourse.git
synced 2024-12-01 07:43:39 +08:00
30990006a9
This reduces chances of errors where consumers of strings mutate inputs and reduces memory usage of the app. Test suite passes now, but there may be some stuff left, so we will run a few sites on a branch prior to merging
168 lines
4.0 KiB
Ruby
168 lines
4.0 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
class UploadRecovery
|
|
def initialize(dry_run: false)
|
|
@dry_run = dry_run
|
|
end
|
|
|
|
def recover(posts = Post)
|
|
posts.have_uploads.find_each do |post|
|
|
|
|
begin
|
|
analyzer = PostAnalyzer.new(post.raw, post.topic_id)
|
|
|
|
analyzer.cooked_stripped.css("img", "a").each do |media|
|
|
if media.name == "img" && orig_src = media["data-orig-src"]
|
|
if dom_class = media["class"]
|
|
if (Post.white_listed_image_classes & dom_class.split).count > 0
|
|
next
|
|
end
|
|
end
|
|
|
|
if @dry_run
|
|
puts "#{post.full_url} #{orig_src}"
|
|
else
|
|
recover_post_upload(post, Upload.sha1_from_short_url(orig_src))
|
|
end
|
|
elsif url = (media["href"] || media["src"])
|
|
data = Upload.extract_url(url)
|
|
next unless data
|
|
|
|
sha1 = data[2]
|
|
|
|
unless upload = Upload.get_from_url(url)
|
|
if @dry_run
|
|
puts "#{post.full_url} #{url}"
|
|
else
|
|
recover_post_upload(post, sha1)
|
|
end
|
|
end
|
|
end
|
|
end
|
|
rescue => e
|
|
raise e unless @dry_run
|
|
puts "#{post.full_url} #{e.class}: #{e.message}"
|
|
end
|
|
end
|
|
end
|
|
|
|
private
|
|
|
|
def recover_post_upload(post, sha1)
|
|
return unless valid_sha1?(sha1)
|
|
|
|
attributes = {
|
|
post: post,
|
|
sha1: sha1
|
|
}
|
|
|
|
if Discourse.store.external?
|
|
recover_post_upload_from_s3(attributes)
|
|
else
|
|
recover_post_upload_from_local(attributes)
|
|
end
|
|
end
|
|
|
|
def recover_post_upload_from_local(post:, sha1:)
|
|
recover_from_local(sha1: sha1, user_id: post.user_id) do |upload|
|
|
post.rebake! if upload.persisted?
|
|
end
|
|
end
|
|
|
|
def recover_post_upload_from_s3(post:, sha1:)
|
|
recover_from_s3(sha1: sha1, user_id: post.user_id) do |upload|
|
|
post.rebake! if upload.persisted?
|
|
end
|
|
end
|
|
|
|
def recover_from_local(sha1:, user_id:)
|
|
public_path = Rails.root.join("public")
|
|
|
|
@paths ||= begin
|
|
Dir.glob(File.join(
|
|
public_path,
|
|
'uploads',
|
|
'tombstone',
|
|
RailsMultisite::ConnectionManagement.current_db,
|
|
'original',
|
|
'**',
|
|
'*.*'
|
|
)).concat(Dir.glob(File.join(
|
|
public_path,
|
|
'uploads',
|
|
RailsMultisite::ConnectionManagement.current_db,
|
|
'original',
|
|
'**',
|
|
'*.*'
|
|
)))
|
|
end
|
|
|
|
@paths.each do |path|
|
|
if path =~ /#{sha1}/
|
|
begin
|
|
tmp = Tempfile.new
|
|
tmp.write(File.read(path))
|
|
tmp.rewind
|
|
|
|
upload = create_upload(tmp, File.basename(path), user_id)
|
|
yield upload if block_given?
|
|
ensure
|
|
tmp&.close
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
def recover_from_s3(sha1:, user_id:)
|
|
@object_keys ||= begin
|
|
s3_helper = Discourse.store.s3_helper
|
|
|
|
s3_helper.list("original").map(&:key).concat(
|
|
s3_helper.list("#{FileStore::S3Store::TOMBSTONE_PREFIX}original").map(&:key)
|
|
)
|
|
end
|
|
|
|
@object_keys.each do |key|
|
|
if key =~ /#{sha1}/
|
|
tombstone_prefix = FileStore::S3Store::TOMBSTONE_PREFIX
|
|
|
|
if key.include?(tombstone_prefix)
|
|
old_key = key
|
|
key = key.sub(tombstone_prefix, "")
|
|
|
|
Discourse.store.s3_helper.copy(
|
|
old_key,
|
|
key,
|
|
options: { acl: "public-read" }
|
|
)
|
|
end
|
|
|
|
url = "https:#{SiteSetting.Upload.absolute_base_url}/#{key}"
|
|
|
|
begin
|
|
tmp = FileHelper.download(
|
|
url,
|
|
max_file_size: SiteSetting.max_image_size_kb.kilobytes,
|
|
tmp_file_name: "recover_from_s3"
|
|
)
|
|
|
|
if tmp
|
|
upload = create_upload(tmp, File.basename(key), user_id)
|
|
yield upload if block_given?
|
|
end
|
|
ensure
|
|
tmp&.close
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
def create_upload(file, filename, user_id)
|
|
UploadCreator.new(file, filename).create_for(user_id)
|
|
end
|
|
|
|
def valid_sha1?(sha1)
|
|
sha1.present? && sha1.length == Upload::SHA1_LENGTH
|
|
end
|
|
end
|