2019-05-03 06:17:27 +08:00
# frozen_string_literal: true
2018-09-12 16:51:53 +08:00
class UploadRecovery
2019-08-02 02:24:06 +08:00
def initialize ( dry_run : false , stop_on_error : false )
2018-09-12 21:53:01 +08:00
@dry_run = dry_run
2019-08-02 02:24:06 +08:00
@stop_on_error = stop_on_error
2018-09-12 21:53:01 +08:00
end
2018-09-13 16:32:35 +08:00
def recover ( posts = Post )
2019-10-02 12:57:36 +08:00
posts . have_uploads . find_each { | post | recover_post post }
end
2019-04-02 11:29:26 +08:00
2019-10-02 12:57:36 +08:00
def recover_post ( post )
begin
analyzer = PostAnalyzer . new ( post . raw , post . topic_id )
2018-09-12 16:51:53 +08:00
2019-10-02 12:57:36 +08:00
analyzer
. cooked_stripped
. css ( " img " , " a " )
. each do | media |
if media . name == " img " && orig_src = media [ " data-orig-src " ]
if dom_class = media [ " class " ]
2020-07-27 08:23:54 +08:00
next if ( Post . allowed_image_classes & dom_class . split ) . count > 0
2018-09-13 11:57:51 +08:00
end
2018-09-19 10:44:36 +08:00
2019-10-02 12:57:36 +08:00
if @dry_run
puts " #{ post . full_url } #{ orig_src } "
else
recover_post_upload ( post , Upload . sha1_from_short_url ( orig_src ) )
2023-01-09 20:10:19 +08:00
end
2019-10-02 12:57:36 +08:00
elsif url = ( media [ " href " ] || media [ " src " ] )
data = Upload . extract_url ( url )
next unless data
2023-01-09 20:10:19 +08:00
2020-10-01 20:54:45 +08:00
upload = Upload . get_from_url ( url )
2023-01-09 20:10:19 +08:00
2020-10-01 20:54:45 +08:00
if ! upload || upload . verification_status == Upload . verification_statuses [ :invalid_etag ]
2019-10-02 12:57:36 +08:00
if @dry_run
puts " #{ post . full_url } #{ url } "
2023-01-09 20:10:19 +08:00
else
2020-10-01 20:54:45 +08:00
sha1 = data [ 2 ]
2019-10-02 12:57:36 +08:00
recover_post_upload ( post , sha1 )
2023-01-09 20:10:19 +08:00
end
2018-09-13 11:57:51 +08:00
end
2018-09-12 21:53:01 +08:00
end
2018-09-12 16:51:53 +08:00
end
2019-10-02 12:57:36 +08:00
rescue = > e
raise e if @stop_on_error
puts " #{ post . full_url } #{ e . class } : #{ e . message } "
2018-09-12 16:51:53 +08:00
end
end
private
2018-09-19 10:44:36 +08:00
def recover_post_upload ( post , sha1 )
2018-10-01 10:51:25 +08:00
return unless valid_sha1? ( sha1 )
2018-09-13 13:59:17 +08:00
2018-09-12 16:51:53 +08:00
attributes = { post : post , sha1 : sha1 }
if Discourse . store . external?
2021-09-27 20:45:05 +08:00
recover_post_upload_from_s3 ( ** attributes )
2018-09-12 16:51:53 +08:00
else
2021-09-27 20:45:05 +08:00
recover_post_upload_from_local ( ** attributes )
2018-10-01 10:51:25 +08:00
end
end
2019-05-22 13:24:36 +08:00
def ensure_upload! ( post : , sha1 : , upload : )
return if ! upload . persisted?
if upload . sha1 != sha1
2019-05-22 13:51:09 +08:00
STDERR . puts " Warning #{ post . url } had an incorrect #{ sha1 } should be #{ upload . sha1 } storing in custom field 'rake uploads:fix_relative_upload_links' can fix this "
sha_map = post . custom_fields [ " UPLOAD_SHA1_MAP " ] || " {} "
sha_map = JSON . parse ( sha_map )
sha_map [ sha1 ] = upload . sha1
post . custom_fields [ " UPLOAD_SHA1_MAP " ] = sha_map . to_json
post . save_custom_fields
2019-05-22 13:24:36 +08:00
end
post . rebake!
end
2018-10-01 10:51:25 +08:00
def recover_post_upload_from_local ( post : , sha1 : )
recover_from_local ( sha1 : sha1 , user_id : post . user_id ) do | upload |
2019-05-22 13:24:36 +08:00
ensure_upload! ( post : post , sha1 : sha1 , upload : upload )
2018-10-01 10:51:25 +08:00
end
end
def recover_post_upload_from_s3 ( post : , sha1 : )
recover_from_s3 ( sha1 : sha1 , user_id : post . user_id ) do | upload |
2019-05-22 13:24:36 +08:00
ensure_upload! ( post : post , sha1 : sha1 , upload : upload )
2018-09-12 16:51:53 +08:00
end
end
2018-10-01 10:51:25 +08:00
def recover_from_local ( sha1 : , user_id : )
2018-09-12 16:51:53 +08:00
@paths || =
begin
2020-08-27 21:57:10 +08:00
Dir . glob ( File . join ( Discourse . store . tombstone_dir , " original " , " ** " , " *.* " ) ) . concat (
Dir . glob ( File . join ( Discourse . store . upload_path , " original " , " ** " , " *.* " ) ) ,
2018-09-12 16:51:53 +08:00
)
end
@paths . each do | path |
if path =~ / #{ sha1 } /
begin
2018-09-19 15:46:23 +08:00
tmp = Tempfile . new
tmp . write ( File . read ( path ) )
tmp . rewind
2018-10-01 10:51:25 +08:00
upload = create_upload ( tmp , File . basename ( path ) , user_id )
yield upload if block_given?
2018-09-12 16:51:53 +08:00
ensure
2018-09-19 15:46:23 +08:00
tmp & . close
2018-09-12 16:51:53 +08:00
end
end
end
end
2018-10-01 10:51:25 +08:00
def recover_from_s3 ( sha1 : , user_id : )
2018-09-12 16:51:53 +08:00
@object_keys || =
begin
s3_helper = Discourse . store . s3_helper
2023-01-09 20:10:19 +08:00
2019-08-02 09:38:21 +08:00
if Rails . configuration . multisite
current_db = RailsMultisite :: ConnectionManagement . current_db
s3_helper
. list ( " uploads/ #{ current_db } /original " )
. map ( & :key )
. concat (
s3_helper . list (
" uploads/ #{ FileStore :: S3Store :: TOMBSTONE_PREFIX } #{ current_db } /original " ,
) . map ( & :key ) ,
)
else
s3_helper
. list ( " original " )
. map ( & :key )
. concat ( s3_helper . list ( " #{ FileStore :: S3Store :: TOMBSTONE_PREFIX } original " ) . map ( & :key ) )
2023-01-09 20:10:19 +08:00
end
2019-08-02 09:38:21 +08:00
end
2018-09-12 16:51:53 +08:00
2020-10-01 20:54:45 +08:00
upload_exists = Upload . exists? ( sha1 : sha1 )
2018-09-12 16:51:53 +08:00
@object_keys . each do | key |
if key =~ / #{ sha1 } /
tombstone_prefix = FileStore :: S3Store :: TOMBSTONE_PREFIX
2018-10-01 19:03:02 +08:00
if key . include? ( tombstone_prefix )
2018-09-13 09:19:45 +08:00
old_key = key
key = key . sub ( tombstone_prefix , " " )
2023-06-06 13:47:40 +08:00
Discourse . store . s3_helper . copy (
old_key ,
key ,
options : {
acl : SiteSetting . s3_use_acls ? " public-read " : nil ,
} ,
)
2018-09-12 16:51:53 +08:00
end
2020-10-01 20:54:45 +08:00
next if upload_exists
2018-09-12 16:51:53 +08:00
url = " https: #{ SiteSetting . Upload . absolute_base_url } / #{ key } "
begin
tmp =
FileHelper . download (
url ,
max_file_size : SiteSetting . max_image_size_kb . kilobytes ,
tmp_file_name : " recover_from_s3 " ,
)
2018-10-01 10:51:25 +08:00
if tmp
upload = create_upload ( tmp , File . basename ( key ) , user_id )
yield upload if block_given?
end
2018-09-12 16:51:53 +08:00
ensure
tmp & . close
end
end
end
end
2018-10-01 10:51:25 +08:00
def create_upload ( file , filename , user_id )
UploadCreator . new ( file , filename ) . create_for ( user_id )
end
def valid_sha1? ( sha1 )
sha1 . present? && sha1 . length == Upload :: SHA1_LENGTH
2018-09-12 16:51:53 +08:00
end
end