2019-05-03 06:17:27 +08:00
# frozen_string_literal: true
2018-09-12 16:51:53 +08:00
class UploadRecovery
2019-08-02 02:24:06 +08:00
def initialize ( dry_run : false , stop_on_error : false )
2018-09-12 21:53:01 +08:00
@dry_run = dry_run
2019-08-02 02:24:06 +08:00
@stop_on_error = stop_on_error
2018-09-12 21:53:01 +08:00
end
2018-09-13 16:32:35 +08:00
def recover ( posts = Post )
2019-04-10 16:22:35 +08:00
posts . have_uploads . find_each do | post |
2019-04-02 11:29:26 +08:00
2018-09-13 11:57:51 +08:00
begin
analyzer = PostAnalyzer . new ( post . raw , post . topic_id )
2018-09-12 16:51:53 +08:00
2018-09-19 10:44:36 +08:00
analyzer . cooked_stripped . css ( " img " , " a " ) . each do | media |
2019-04-02 11:41:00 +08:00
if media . name == " img " && orig_src = media [ " data-orig-src " ]
2018-09-19 10:44:36 +08:00
if dom_class = media [ " class " ]
if ( Post . white_listed_image_classes & dom_class . split ) . count > 0
next
end
2018-09-13 11:57:51 +08:00
end
2018-09-12 16:51:53 +08:00
2019-04-02 11:41:00 +08:00
if @dry_run
puts " #{ post . full_url } #{ orig_src } "
else
recover_post_upload ( post , Upload . sha1_from_short_url ( orig_src ) )
2018-09-19 10:44:36 +08:00
end
2019-04-02 11:41:00 +08:00
elsif url = ( media [ " href " ] || media [ " src " ] )
2019-04-09 04:55:26 +08:00
data = Upload . extract_url ( url )
2019-04-02 11:41:00 +08:00
next unless data
2018-09-19 10:44:36 +08:00
2019-04-02 11:41:00 +08:00
sha1 = data [ 2 ]
2018-09-19 10:44:36 +08:00
2019-04-02 11:41:00 +08:00
unless upload = Upload . get_from_url ( url )
if @dry_run
puts " #{ post . full_url } #{ url } "
else
recover_post_upload ( post , sha1 )
2018-09-19 10:44:36 +08:00
end
2018-09-13 11:57:51 +08:00
end
2018-09-12 21:53:01 +08:00
end
2018-09-12 16:51:53 +08:00
end
2018-09-13 11:57:51 +08:00
rescue = > e
2019-08-02 02:24:06 +08:00
raise e if @stop_on_error
2018-09-13 11:57:51 +08:00
puts " #{ post . full_url } #{ e . class } : #{ e . message } "
2018-09-12 16:51:53 +08:00
end
end
end
private
2018-09-19 10:44:36 +08:00
def recover_post_upload ( post , sha1 )
2018-10-01 10:51:25 +08:00
return unless valid_sha1? ( sha1 )
2018-09-13 13:59:17 +08:00
2018-09-12 16:51:53 +08:00
attributes = {
post : post ,
2018-09-13 13:59:17 +08:00
sha1 : sha1
2018-09-12 16:51:53 +08:00
}
if Discourse . store . external?
2018-10-01 10:51:25 +08:00
recover_post_upload_from_s3 ( attributes )
2018-09-12 16:51:53 +08:00
else
2018-10-01 10:51:25 +08:00
recover_post_upload_from_local ( attributes )
end
end
2019-05-22 13:24:36 +08:00
def ensure_upload! ( post : , sha1 : , upload : )
return if ! upload . persisted?
if upload . sha1 != sha1
2019-05-22 13:51:09 +08:00
STDERR . puts " Warning #{ post . url } had an incorrect #{ sha1 } should be #{ upload . sha1 } storing in custom field 'rake uploads:fix_relative_upload_links' can fix this "
sha_map = post . custom_fields [ " UPLOAD_SHA1_MAP " ] || " {} "
sha_map = JSON . parse ( sha_map )
sha_map [ sha1 ] = upload . sha1
post . custom_fields [ " UPLOAD_SHA1_MAP " ] = sha_map . to_json
post . save_custom_fields
2019-05-22 13:24:36 +08:00
end
post . rebake!
end
2018-10-01 10:51:25 +08:00
def recover_post_upload_from_local ( post : , sha1 : )
recover_from_local ( sha1 : sha1 , user_id : post . user_id ) do | upload |
2019-05-22 13:24:36 +08:00
ensure_upload! ( post : post , sha1 : sha1 , upload : upload )
2018-10-01 10:51:25 +08:00
end
end
def recover_post_upload_from_s3 ( post : , sha1 : )
recover_from_s3 ( sha1 : sha1 , user_id : post . user_id ) do | upload |
2019-05-22 13:24:36 +08:00
ensure_upload! ( post : post , sha1 : sha1 , upload : upload )
2018-09-12 16:51:53 +08:00
end
end
2018-10-01 10:51:25 +08:00
def recover_from_local ( sha1 : , user_id : )
2018-09-12 16:51:53 +08:00
public_path = Rails . root . join ( " public " )
@paths || = begin
Dir . glob ( File . join (
public_path ,
'uploads' ,
'tombstone' ,
RailsMultisite :: ConnectionManagement . current_db ,
'original' ,
'**' ,
'*.*'
) ) . concat ( Dir . glob ( File . join (
public_path ,
'uploads' ,
RailsMultisite :: ConnectionManagement . current_db ,
'original' ,
'**' ,
'*.*'
) ) )
end
@paths . each do | path |
if path =~ / #{ sha1 } /
begin
2018-09-19 15:46:23 +08:00
tmp = Tempfile . new
tmp . write ( File . read ( path ) )
tmp . rewind
2018-10-01 10:51:25 +08:00
upload = create_upload ( tmp , File . basename ( path ) , user_id )
yield upload if block_given?
2018-09-12 16:51:53 +08:00
ensure
2018-09-19 15:46:23 +08:00
tmp & . close
2018-09-12 16:51:53 +08:00
end
end
end
end
2018-10-01 10:51:25 +08:00
def recover_from_s3 ( sha1 : , user_id : )
2018-09-12 16:51:53 +08:00
@object_keys || = begin
s3_helper = Discourse . store . s3_helper
s3_helper . list ( " original " ) . map ( & :key ) . concat (
s3_helper . list ( " #{ FileStore :: S3Store :: TOMBSTONE_PREFIX } original " ) . map ( & :key )
)
end
@object_keys . each do | key |
if key =~ / #{ sha1 } /
tombstone_prefix = FileStore :: S3Store :: TOMBSTONE_PREFIX
2018-10-01 19:03:02 +08:00
if key . include? ( tombstone_prefix )
2018-09-13 09:19:45 +08:00
old_key = key
key = key . sub ( tombstone_prefix , " " )
2018-09-12 16:51:53 +08:00
Discourse . store . s3_helper . copy (
2018-09-13 09:19:45 +08:00
old_key ,
2018-09-12 16:51:53 +08:00
key ,
options : { acl : " public-read " }
)
end
url = " https: #{ SiteSetting . Upload . absolute_base_url } / #{ key } "
begin
tmp = FileHelper . download (
url ,
max_file_size : SiteSetting . max_image_size_kb . kilobytes ,
tmp_file_name : " recover_from_s3 "
)
2018-10-01 10:51:25 +08:00
if tmp
upload = create_upload ( tmp , File . basename ( key ) , user_id )
yield upload if block_given?
end
2018-09-12 16:51:53 +08:00
ensure
tmp & . close
end
end
end
end
2018-10-01 10:51:25 +08:00
def create_upload ( file , filename , user_id )
UploadCreator . new ( file , filename ) . create_for ( user_id )
end
def valid_sha1? ( sha1 )
sha1 . present? && sha1 . length == Upload :: SHA1_LENGTH
2018-09-12 16:51:53 +08:00
end
end