From c053f8ccf6bbb97fef59f9ac3ab764c1c3530149 Mon Sep 17 00:00:00 2001 From: Guo Xiang Tan Date: Wed, 12 Sep 2018 01:51:53 -0700 Subject: [PATCH] New rake task `uploads:recover`. --- lib/tasks/uploads.rake | 92 ++------------------------------- lib/upload_recovery.rb | 112 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+), 87 deletions(-) create mode 100644 lib/upload_recovery.rb diff --git a/lib/tasks/uploads.rake b/lib/tasks/uploads.rake index 242aa8e88be..ea6e2357067 100644 --- a/lib/tasks/uploads.rake +++ b/lib/tasks/uploads.rake @@ -716,96 +716,14 @@ task "uploads:fix_incorrect_extensions" => :environment do UploadFixer.fix_all_extensions end -task "uploads:list_posts_with_broken_images" => :environment do +task "uploads:recover" => :environment do + require_dependency "upload_recovery" + if ENV["RAILS_DB"] - list_broken_posts(recover_from_s3: !!ENV["RECOVER_FROM_S3"]) + UploadRecovery.new.recover else RailsMultisite::ConnectionManagement.each_connection do |db| - list_broken_posts(recover_from_s3: !!ENV["RECOVER_FROM_S3"]) - end - end -end - -def list_broken_posts(recover_from_s3: false) - object_keys = nil - - Post.where("raw LIKE '%upload:\/\/%'").find_each do |post| - begin - begin - analyzer = PostAnalyzer.new(post.raw, post.topic_id) - cooked_stripped = analyzer.send(:cooked_stripped) - end - - cooked_stripped.css("img").each do |img| - if dom_class = img["class"] - if (Post.white_listed_image_classes & dom_class.split).count > 0 - next - end - end - - if img["data-orig-src"] - puts "#{post.full_url} #{img["data-orig-src"]}" - - if recover_from_s3 && Discourse.store.external? - object_keys ||= begin - s3_helper = Discourse.store.s3_helper - - s3_helper.list("original").map(&:key).concat( - s3_helper.list("#{FileStore::S3Store::TOMBSTONE_PREFIX}original").map(&:key) - ) - end - - recover_from_s3_by_sha1( - post: post, - sha1: Upload.sha1_from_short_url(img["data-orig-src"]), - object_keys: object_keys - ) - end - end - end - rescue => e - puts "#{post.full_url} Error: #{e.class}: #{e.message}" - end - end -end - -def recover_from_s3_by_sha1(post:, sha1:, object_keys: []) - object_keys.each do |key| - if key =~ /#{sha1}/ - tombstone_prefix = FileStore::S3Store::TOMBSTONE_PREFIX - - if key.starts_with?(tombstone_prefix) - Discourse.store.s3_helper.copy( - key, - key.sub(tombstone_prefix, ""), - options: { acl: "public-read" } - ) - end - - url = "https:#{SiteSetting.Upload.absolute_base_url}/#{key}" - - begin - tmp = FileHelper.download( - url, - max_file_size: SiteSetting.max_image_size_kb.kilobytes, - tmp_file_name: "recover_from_s3" - ) - - if tmp - upload = UploadCreator.new( - tmp, - File.basename(key) - ).create_for(post.user_id) - - if upload.persisted? - post.rebake! - else - puts "#{post.full_url}: #{upload.errors.full_messages.join(", ")}" - end - end - ensure - tmp&.close - end + UploadRecovery.new.recover end end end diff --git a/lib/upload_recovery.rb b/lib/upload_recovery.rb new file mode 100644 index 00000000000..25f92eb3998 --- /dev/null +++ b/lib/upload_recovery.rb @@ -0,0 +1,112 @@ +class UploadRecovery + def recover + Post.where("raw LIKE '%upload:\/\/%'").find_each do |post| + analyzer = PostAnalyzer.new(post.raw, post.topic_id) + cooked_stripped = analyzer.send(:cooked_stripped) + + cooked_stripped.css("img").each do |img| + if dom_class = img["class"] + if (Post.white_listed_image_classes & dom_class.split).count > 0 + next + end + end + + if img["data-orig-src"] + recover_post_upload(post, img["data-orig-src"]) + end + end + end + end + + private + + def recover_post_upload(post, short_url) + attributes = { + post: post, + sha1: Upload.sha1_from_short_url(short_url) + } + + if Discourse.store.external? + recover_from_s3(attributes) + else + recover_from_local(attributes) + end + end + + def recover_from_local(post:, sha1:) + public_path = Rails.root.join("public") + + @paths ||= begin + Dir.glob(File.join( + public_path, + 'uploads', + 'tombstone', + RailsMultisite::ConnectionManagement.current_db, + 'original', + '**', + '*.*' + )).concat(Dir.glob(File.join( + public_path, + 'uploads', + RailsMultisite::ConnectionManagement.current_db, + 'original', + '**', + '*.*' + ))) + end + + @paths.each do |path| + if path =~ /#{sha1}/ + begin + file = File.open(path, "r") + create_upload(file, File.basename(path), post) + ensure + file&.close + end + end + end + end + + def recover_from_s3(post:, sha1:) + @object_keys ||= begin + s3_helper = Discourse.store.s3_helper + + s3_helper.list("original").map(&:key).concat( + s3_helper.list("#{FileStore::S3Store::TOMBSTONE_PREFIX}original").map(&:key) + ) + end + + @object_keys.each do |key| + if key =~ /#{sha1}/ + tombstone_prefix = FileStore::S3Store::TOMBSTONE_PREFIX + + if key.starts_with?(tombstone_prefix) + Discourse.store.s3_helper.copy( + key, + key.sub(tombstone_prefix, ""), + options: { acl: "public-read" } + ) + end + + url = "https:#{SiteSetting.Upload.absolute_base_url}/#{key}" + + begin + tmp = FileHelper.download( + url, + max_file_size: SiteSetting.max_image_size_kb.kilobytes, + tmp_file_name: "recover_from_s3" + ) + + create_upload(tmp, File.basename(key), post) if tmp + ensure + tmp&.close + end + end + end + end + + def create_upload(file, filename, post) + upload = UploadCreator.new(tmp, filename).create_for(post.user_id) + post.rebake! if upload.persisted? + end +end