2019-05-03 06:17:27 +08:00
# frozen_string_literal: true
2018-01-20 00:51:42 +08:00
require " db_helper "
2013-06-15 17:29:20 +08:00
require " digest/sha1 "
2018-01-20 00:51:42 +08:00
require " base62 "
2013-06-15 17:29:20 +08:00
2016-04-12 02:42:40 +08:00
################################################################################
# gather #
################################################################################
2022-03-21 22:28:52 +08:00
require " rake_helpers "
2019-11-18 09:25:42 +08:00
2016-04-12 02:42:40 +08:00
task " uploads:gather " = > :environment do
2016-04-12 22:00:25 +08:00
ENV [ " RAILS_DB " ] ? gather_uploads : gather_uploads_for_all_sites
end
def gather_uploads_for_all_sites
RailsMultisite :: ConnectionManagement . each_connection { gather_uploads }
end
2016-04-13 22:33:00 +08:00
def file_exists? ( path )
2022-01-06 01:45:08 +08:00
File . exist? ( path ) && File . size ( path ) > 0
2016-04-13 22:33:00 +08:00
rescue
false
end
2016-04-12 22:00:25 +08:00
def gather_uploads
2016-04-12 02:42:40 +08:00
public_directory = " #{ Rails . root } /public "
current_db = RailsMultisite :: ConnectionManagement . current_db
puts " " , " Gathering uploads for ' #{ current_db } '... " , " "
2016-04-12 03:17:33 +08:00
Upload . where ( " url ~ '^ \ /uploads \ /' " )
. where ( " url !~ '^ \ /uploads \ / #{ current_db } ' " )
. find_each do | upload |
2016-04-12 02:42:40 +08:00
begin
old_db = upload . url [ / ^ \/ uploads \/ ([^ \/ ]+) \/ / , 1 ]
from = upload . url . dup
to = upload . url . sub ( " /uploads/ #{ old_db } / " , " /uploads/ #{ current_db } / " )
source = " #{ public_directory } #{ from } "
destination = " #{ public_directory } #{ to } "
2016-04-13 22:33:00 +08:00
# create destination directory & copy file unless it already exists
unless file_exists? ( destination )
` mkdir -p ' #{ File . dirname ( destination ) } ' `
` cp --link ' #{ source } ' ' #{ destination } ' `
end
2021-05-21 09:43:47 +08:00
# ensure file has been successfully copied over
2016-04-13 22:33:00 +08:00
raise unless file_exists? ( destination )
2016-04-12 02:42:40 +08:00
# remap links in db
DbHelper . remap ( from , to )
rescue
putc " ! "
else
putc " . "
end
end
puts " " , " Done! "
end
2015-05-25 23:59:00 +08:00
################################################################################
# backfill_shas #
################################################################################
2013-06-15 17:29:20 +08:00
task " uploads:backfill_shas " = > :environment do
RailsMultisite :: ConnectionManagement . each_connection do | db |
2015-06-10 23:19:58 +08:00
puts " Backfilling #{ db } ... "
Upload . where ( sha1 : nil ) . find_each do | u |
begin
path = Discourse . store . path_for ( u )
2020-01-16 11:50:27 +08:00
sha1 = Upload . generate_digest ( path )
u . sha1 = u . secure? ? SecureRandom . hex ( 20 ) : sha1
u . original_sha1 = u . secure? ? sha1 : nil
2015-06-10 23:19:58 +08:00
u . save!
2013-06-15 17:29:20 +08:00
putc " . "
2016-08-23 15:05:37 +08:00
rescue = > e
2016-08-29 10:30:10 +08:00
puts " Skipping #{ u . original_filename } ( #{ u . url } ) #{ e . message } "
2013-06-15 17:29:20 +08:00
end
end
end
2015-06-10 23:19:58 +08:00
puts " " , " Done "
2013-06-15 17:29:20 +08:00
end
2014-06-24 21:35:15 +08:00
2015-05-25 23:59:00 +08:00
################################################################################
# migrate_to_s3 #
################################################################################
task " uploads:migrate_to_s3 " = > :environment do
2021-01-19 00:12:47 +08:00
STDOUT . puts ( " Please note that migrating to S3 is currently not reversible! \n [CTRL+c] to cancel, [ENTER] to continue " )
STDIN . gets
2015-05-25 23:59:00 +08:00
ENV [ " RAILS_DB " ] ? migrate_to_s3 : migrate_to_s3_all_sites
end
def migrate_to_s3_all_sites
2019-05-21 00:25:56 +08:00
RailsMultisite :: ConnectionManagement . each_connection do
begin
migrate_to_s3
2019-05-21 00:43:30 +08:00
rescue RuntimeError = > e
2019-05-21 00:25:56 +08:00
if ENV [ " SKIP_FAILED " ]
puts e
else
raise e unless ENV [ " SKIP_FAILED " ]
end
2019-05-21 00:43:30 +08:00
end
2019-05-21 00:25:56 +08:00
end
2015-05-25 23:59:00 +08:00
end
2020-01-29 05:10:25 +08:00
def create_migration
2020-01-13 07:12:27 +08:00
FileStore :: ToS3Migration . new (
s3_options : FileStore :: ToS3Migration . s3_options_from_env ,
dry_run : ! ! ENV [ " DRY_RUN " ] ,
migrate_to_multisite : ! ! ENV [ " MIGRATE_TO_MULTISITE " ] ,
skip_etag_verify : ! ! ENV [ " SKIP_ETAG_VERIFY " ]
2020-01-29 05:10:25 +08:00
)
end
def migrate_to_s3
create_migration . migrate
2019-05-22 07:00:32 +08:00
end
task " uploads:s3_migration_status " = > :environment do
success = true
RailsMultisite :: ConnectionManagement . each_connection do
2020-01-29 05:10:25 +08:00
success && = create_migration . migration_successful?
2019-05-22 07:00:32 +08:00
end
2019-05-22 08:04:33 +08:00
queued_jobs = Sidekiq :: Stats . new . queues . sum { | _ , x | x }
if queued_jobs > 50
puts " WARNING: There are #{ queued_jobs } jobs queued! Wait till Sidekiq clears backlog prior to migrating site to a new host "
exit 1
end
2019-05-22 10:58:54 +08:00
if ! success
puts " Site is not ready for migration "
exit 1
end
2019-05-20 21:17:37 +08:00
2019-05-22 07:00:32 +08:00
puts " All sites appear to have uploads in order! "
2019-05-20 21:17:37 +08:00
end
2015-05-25 23:59:00 +08:00
################################################################################
2018-12-27 00:34:49 +08:00
# clean_up #
2015-05-25 23:59:00 +08:00
################################################################################
2014-09-30 00:31:53 +08:00
task " uploads:clean_up " = > :environment do
2018-12-27 00:34:49 +08:00
ENV [ " RAILS_DB " ] ? clean_up_uploads : clean_up_uploads_all_sites
end
def clean_up_uploads_all_sites
RailsMultisite :: ConnectionManagement . each_connection { clean_up_uploads }
2016-09-02 14:50:13 +08:00
end
2014-09-30 00:31:53 +08:00
2016-09-02 14:50:13 +08:00
def clean_up_uploads
db = RailsMultisite :: ConnectionManagement . current_db
2014-09-30 00:31:53 +08:00
2016-09-02 14:50:13 +08:00
puts " Cleaning up uploads and thumbnails for ' #{ db } '... "
2014-09-30 00:31:53 +08:00
2016-09-02 14:50:13 +08:00
if Discourse . store . external?
puts " This task only works for internal storages. "
exit 1
end
2014-09-30 00:31:53 +08:00
DEV: Correctly tag heredocs (#16061)
This allows text editors to use correct syntax coloring for the heredoc sections.
Heredoc tag names we use:
languages: SQL, JS, RUBY, LUA, HTML, CSS, SCSS, SH, HBS, XML, YAML/YML, MF, ICS
other: MD, TEXT/TXT, RAW, EMAIL
2022-03-01 03:50:55 +08:00
puts << ~ TEXT
2016-09-02 14:50:13 +08:00
This task will remove upload records and files permanently .
2014-09-30 00:31:53 +08:00
2016-09-02 14:50:13 +08:00
Would you like to take a full backup before the clean up? ( Y / N )
DEV: Correctly tag heredocs (#16061)
This allows text editors to use correct syntax coloring for the heredoc sections.
Heredoc tag names we use:
languages: SQL, JS, RUBY, LUA, HTML, CSS, SCSS, SH, HBS, XML, YAML/YML, MF, ICS
other: MD, TEXT/TXT, RAW, EMAIL
2022-03-01 03:50:55 +08:00
TEXT
2014-09-30 00:31:53 +08:00
2016-09-02 14:50:13 +08:00
if STDIN . gets . chomp . downcase == 'y'
puts " Starting backup... "
backuper = BackupRestore :: Backuper . new ( Discourse . system_user . id )
backuper . run
exit 1 unless backuper . success
end
2014-09-30 00:31:53 +08:00
2016-09-02 14:50:13 +08:00
public_directory = Rails . root . join ( " public " ) . to_s
2014-09-30 00:31:53 +08:00
2016-09-02 14:50:13 +08:00
##
## DATABASE vs FILE SYSTEM
##
2014-09-30 00:31:53 +08:00
2016-09-02 14:50:13 +08:00
# uploads & avatars
Upload . find_each do | upload |
path = File . join ( public_directory , upload . url )
2014-09-30 00:31:53 +08:00
2022-01-06 01:45:08 +08:00
if ! File . exist? ( path )
2016-09-02 14:50:13 +08:00
upload . destroy!
putc " # "
else
putc " . "
2014-09-30 00:31:53 +08:00
end
2016-09-02 14:50:13 +08:00
end
2014-09-30 00:31:53 +08:00
2016-09-02 14:50:13 +08:00
# optimized images
OptimizedImage . find_each do | optimized_image |
path = File . join ( public_directory , optimized_image . url )
2022-01-06 01:45:08 +08:00
if ! File . exist? ( path )
2016-09-02 14:50:13 +08:00
optimized_image . destroy!
putc " # "
else
putc " . "
2014-09-30 00:31:53 +08:00
end
2016-09-02 14:50:13 +08:00
end
2014-09-30 00:31:53 +08:00
2016-09-02 14:50:13 +08:00
##
## FILE SYSTEM vs DATABASE
##
2014-09-30 00:31:53 +08:00
2016-09-02 14:50:13 +08:00
uploads_directory = File . join ( public_directory , 'uploads' , db ) . to_s
# avatars (no avatar should be stored in that old directory)
FileUtils . rm_rf ( " #{ uploads_directory } /avatars " )
# uploads and optimized images
Dir . glob ( " #{ uploads_directory } /**/*.* " ) . each do | file_path |
sha1 = Upload . generate_digest ( file_path )
url = file_path . split ( public_directory , 2 ) [ 1 ]
if ( Upload . where ( sha1 : sha1 ) . empty? &&
Upload . where ( url : url ) . empty? ) &&
( OptimizedImage . where ( sha1 : sha1 ) . empty? &&
OptimizedImage . where ( url : url ) . empty? )
FileUtils . rm ( file_path )
putc " # "
else
putc " . "
end
2014-09-30 00:31:53 +08:00
end
2016-09-02 14:50:13 +08:00
puts " Removing empty directories... "
puts ` find #{ uploads_directory } -type d -empty -exec rmdir {} \\ ; `
puts " Done! "
2014-09-30 00:31:53 +08:00
end
2015-05-11 08:30:22 +08:00
2015-05-25 23:59:00 +08:00
################################################################################
2019-05-29 02:00:43 +08:00
# missing files #
2015-05-25 23:59:00 +08:00
################################################################################
2015-05-11 08:30:22 +08:00
# list all missing uploads and optimized images
2019-05-29 02:00:43 +08:00
task " uploads:missing_files " = > :environment do
2016-09-02 10:22:03 +08:00
if ENV [ " RAILS_DB " ]
2019-02-15 03:04:35 +08:00
list_missing_uploads ( skip_optimized : ENV [ 'SKIP_OPTIMIZED' ] )
2016-09-02 10:22:03 +08:00
else
RailsMultisite :: ConnectionManagement . each_connection do | db |
2019-05-21 14:06:35 +08:00
if ENV [ " SKIP_EXTERNAL " ] == " 1 " && Discourse . store . external?
puts " #{ RailsMultisite :: ConnectionManagement . current_db } has uploads stored externally skipping! "
else
2019-05-21 14:11:38 +08:00
if Discourse . store . external?
puts " - " * 80
puts " WARNING! WARNING! WARNING! "
puts " - " * 80
puts
puts << ~ TEXT
#{RailsMultisite::ConnectionManagement.current_db} has uploads on S3!
validating without inventory is likely to take an enormous amount of time .
We recommend you run SKIP_EXTERNAL = 1 rake uploads : missing to skip validating if on a multisite .
TEXT
end
2019-05-21 14:06:35 +08:00
list_missing_uploads ( skip_optimized : ENV [ 'SKIP_OPTIMIZED' ] )
end
2015-05-11 08:30:22 +08:00
end
2016-09-02 10:22:03 +08:00
end
end
2015-05-11 08:30:22 +08:00
2019-02-15 03:04:35 +08:00
def list_missing_uploads ( skip_optimized : false )
Discourse . store . list_missing_uploads ( skip_optimized : skip_optimized )
2015-05-11 08:30:22 +08:00
end
2015-05-11 18:59:50 +08:00
2019-05-29 02:00:43 +08:00
task " uploads:missing " = > :environment do
Rake :: Task [ " uploads:missing_files " ] . invoke
end
2015-05-25 23:59:00 +08:00
################################################################################
# regenerate_missing_optimized #
################################################################################
2015-05-11 18:59:50 +08:00
# regenerate missing optimized images
task " uploads:regenerate_missing_optimized " = > :environment do
2016-09-02 13:06:31 +08:00
if ENV [ " RAILS_DB " ]
regenerate_missing_optimized
else
RailsMultisite :: ConnectionManagement . each_connection { regenerate_missing_optimized }
end
2015-05-11 22:19:16 +08:00
end
def regenerate_missing_optimized
2015-05-12 01:07:39 +08:00
db = RailsMultisite :: ConnectionManagement . current_db
puts " Regenerating missing optimized images for ' #{ db } '... "
2015-05-11 18:59:50 +08:00
if Discourse . store . external?
puts " This task only works for internal storages. "
return
end
public_directory = " #{ Rails . root } /public "
missing_uploads = Set . new
2016-08-25 18:29:52 +08:00
avatar_upload_ids = UserAvatar . all . pluck ( :custom_upload_id , :gravatar_upload_id ) . flatten . compact
2015-05-11 18:59:50 +08:00
2016-08-25 18:29:52 +08:00
default_scope = OptimizedImage . includes ( :upload )
2015-05-12 01:07:39 +08:00
2016-08-25 18:29:52 +08:00
[
default_scope
. where ( " optimized_images.upload_id IN (?) " , avatar_upload_ids ) ,
2015-05-11 21:41:52 +08:00
2016-08-25 18:29:52 +08:00
default_scope
. where ( " optimized_images.upload_id NOT IN (?) " , avatar_upload_ids )
. where ( " LENGTH(COALESCE(url, '')) > 0 " )
. where ( " width > 0 AND height > 0 " )
] . each do | scope |
scope . find_each do | optimized_image |
upload = optimized_image . upload
2015-05-11 18:59:50 +08:00
2016-08-25 18:29:52 +08:00
next unless optimized_image . url =~ / ^ \/ [^ \/ ] /
next unless upload . url =~ / ^ \/ [^ \/ ] /
thumbnail = " #{ public_directory } #{ optimized_image . url } "
original = " #{ public_directory } #{ upload . url } "
2022-01-06 01:45:08 +08:00
if ! File . exist? ( thumbnail ) || File . size ( thumbnail ) < = 0
2016-08-25 18:29:52 +08:00
# make sure the original image exists locally
2022-01-06 01:45:08 +08:00
if ( ! File . exist? ( original ) || File . size ( original ) < = 0 ) && upload . origin . present?
2016-08-25 18:29:52 +08:00
# try to fix it by redownloading it
begin
2017-05-25 01:42:52 +08:00
downloaded = FileHelper . download (
upload . origin ,
max_file_size : SiteSetting . max_image_size_kb . kilobytes ,
tmp_file_name : " discourse-missing " ,
follow_redirect : true
) rescue nil
2016-08-25 18:29:52 +08:00
if downloaded && downloaded . size > 0
FileUtils . mkdir_p ( File . dirname ( original ) )
File . open ( original , " wb " ) { | f | f . write ( downloaded . read ) }
end
ensure
downloaded . try ( :close! ) if downloaded . respond_to? ( :close! )
2015-05-12 01:07:39 +08:00
end
2015-05-11 23:03:48 +08:00
end
2022-01-06 01:45:08 +08:00
if File . exist? ( original ) && File . size ( original ) > 0
2016-08-25 18:29:52 +08:00
FileUtils . mkdir_p ( File . dirname ( thumbnail ) )
OptimizedImage . resize ( original , thumbnail , optimized_image . width , optimized_image . height )
putc " # "
else
missing_uploads << original
putc " X "
end
2015-05-11 18:59:50 +08:00
else
2016-08-25 18:29:52 +08:00
putc " . "
2015-05-11 18:59:50 +08:00
end
end
end
puts " " , " Done "
if missing_uploads . size > 0
puts " Missing uploads: "
missing_uploads . sort . each { | u | puts u }
end
end
2015-05-19 18:31:51 +08:00
2015-05-25 23:59:00 +08:00
################################################################################
2015-06-12 18:02:36 +08:00
# migrate_to_new_scheme #
2015-05-25 23:59:00 +08:00
################################################################################
2015-06-12 18:02:36 +08:00
task " uploads:start_migration " = > :environment do
SiteSetting . migrate_to_new_scheme = true
puts " Migration started! "
2015-05-19 18:31:51 +08:00
end
2015-06-12 18:02:36 +08:00
task " uploads:stop_migration " = > :environment do
SiteSetting . migrate_to_new_scheme = false
puts " Migration stoped! "
2015-05-19 18:31:51 +08:00
end
2016-09-01 15:19:14 +08:00
task " uploads:analyze " , [ :cache_path , :limit ] = > :environment do | _ , args |
now = Time . zone . now
current_db = RailsMultisite :: ConnectionManagement . current_db
puts " Analyzing uploads for ' #{ current_db } '... This may take awhile... \n "
cache_path = args [ :cache_path ]
current_db = RailsMultisite :: ConnectionManagement . current_db
uploads_path = Rails . root . join ( 'public' , 'uploads' , current_db )
path =
if cache_path
cache_path
else
path = " /tmp/ #{ current_db } - #{ now . to_i } -paths.txt "
FileUtils . touch ( " /tmp/ #{ now . to_i } -paths.txt " )
` find #{ uploads_path } -type f -printf '%s %h/%f \n ' > #{ path } `
path
end
extensions = { }
paths_count = 0
File . readlines ( path ) . each do | line |
size , file_path = line . split ( " " , 2 )
paths_count += 1
extension = File . extname ( file_path ) . chomp . downcase
extensions [ extension ] || = { }
extensions [ extension ] [ " count " ] || = 0
extensions [ extension ] [ " count " ] += 1
extensions [ extension ] [ " size " ] || = 0
extensions [ extension ] [ " size " ] += size . to_i
end
uploads_count = Upload . count
optimized_images_count = OptimizedImage . count
DEV: Correctly tag heredocs (#16061)
This allows text editors to use correct syntax coloring for the heredoc sections.
Heredoc tag names we use:
languages: SQL, JS, RUBY, LUA, HTML, CSS, SCSS, SH, HBS, XML, YAML/YML, MF, ICS
other: MD, TEXT/TXT, RAW, EMAIL
2022-03-01 03:50:55 +08:00
puts << ~ TEXT
2016-09-01 15:19:14 +08:00
Report for '#{current_db}'
- - - - - - - - - - - #{'-' * current_db.length}
Number of ` Upload ` records in DB : #{uploads_count}
Number of ` OptimizedImage ` records in DB : #{optimized_images_count}
** Total DB records : #{uploads_count + optimized_images_count}**
Number of images in uploads folder : #{paths_count}
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #{'-' * paths_count.to_s.length}
DEV: Correctly tag heredocs (#16061)
This allows text editors to use correct syntax coloring for the heredoc sections.
Heredoc tag names we use:
languages: SQL, JS, RUBY, LUA, HTML, CSS, SCSS, SH, HBS, XML, YAML/YML, MF, ICS
other: MD, TEXT/TXT, RAW, EMAIL
2022-03-01 03:50:55 +08:00
TEXT
2016-09-01 15:19:14 +08:00
helper = Class . new do
include ActionView :: Helpers :: NumberHelper
end
helper = helper . new
printf " %-15s | %-15s | %-15s \n " , 'extname' , 'total size' , 'count'
puts " - " * 45
extensions . sort_by { | _ , value | value [ 'size' ] } . reverse . each do | extname , value |
printf " %-15s | %-15s | %-15s \n " , extname , helper . number_to_human_size ( value [ 'size' ] ) , value [ 'count' ]
end
puts " \n "
limit = args [ :limit ] || 10
sql = << ~ SQL
SELECT
users . username ,
COUNT ( uploads . user_id ) AS num_of_uploads ,
SUM ( uploads . filesize ) AS total_size_of_uploads ,
COUNT ( optimized_images . id ) AS num_of_optimized_images
FROM users
INNER JOIN uploads ON users . id = uploads . user_id
INNER JOIN optimized_images ON uploads . id = optimized_images . upload_id
GROUP BY users . id
ORDER BY total_size_of_uploads DESC
LIMIT #{limit}
SQL
puts " Users using the most disk space "
puts " ------------------------------- \n "
printf " %-25s | %-25s | %-25s | %-25s \n " , 'username' , 'total size of uploads' , 'number of uploads' , 'number of optimized images'
puts " - " * 110
2018-06-19 14:13:14 +08:00
DB . query_single ( sql ) . each do | username , num_of_uploads , total_size_of_uploads , num_of_optimized_images |
2016-09-01 15:19:14 +08:00
printf " %-25s | %-25s | %-25s | %-25s \n " , username , helper . number_to_human_size ( total_size_of_uploads ) , num_of_uploads , num_of_optimized_images
end
puts " \n "
puts " List of file paths @ #{ path } "
puts " Duration: #{ Time . zone . now - now } seconds "
end
2018-08-08 13:14:52 +08:00
task " uploads:fix_incorrect_extensions " = > :environment do
2018-08-10 09:28:05 +08:00
UploadFixer . fix_all_extensions
2018-08-08 13:14:52 +08:00
end
2018-09-05 16:54:15 +08:00
2019-03-07 21:15:30 +08:00
task " uploads:recover_from_tombstone " = > :environment do
Rake :: Task [ " uploads:recover " ] . invoke
end
2018-09-12 16:51:53 +08:00
task " uploads:recover " = > :environment do
2018-09-12 21:53:01 +08:00
dry_run = ENV [ " DRY_RUN " ] . present?
2019-08-02 02:24:06 +08:00
stop_on_error = ENV [ " STOP_ON_ERROR " ] . present?
2018-09-12 21:53:01 +08:00
2018-09-05 16:54:15 +08:00
if ENV [ " RAILS_DB " ]
2019-08-02 02:24:06 +08:00
UploadRecovery . new ( dry_run : dry_run , stop_on_error : stop_on_error ) . recover
2018-09-05 16:54:15 +08:00
else
RailsMultisite :: ConnectionManagement . each_connection do | db |
2019-08-02 02:24:06 +08:00
UploadRecovery . new ( dry_run : dry_run , stop_on_error : stop_on_error ) . recover
2018-09-10 15:14:30 +08:00
end
end
end
2019-05-22 13:24:36 +08:00
2020-03-03 07:03:58 +08:00
task " uploads:sync_s3_acls " = > :environment do
RailsMultisite :: ConnectionManagement . each_connection do | db |
unless Discourse . store . external?
puts " This task only works for external storage. "
exit 1
end
2022-05-23 11:14:11 +08:00
puts " CAUTION: This task may take a long time to complete! There are #{ Upload . count } uploads to sync ACLs for. "
puts " "
2020-03-03 07:03:58 +08:00
puts " - " * 30
puts " Uploads marked as secure will get a private ACL, and uploads marked as not secure will get a public ACL. "
2022-05-23 11:14:11 +08:00
puts " Upload ACLs will be updated in Sidekiq jobs in batches of 100 at a time, check Sidekiq queues for SyncAclsForUploads for progress. "
Upload . select ( :id ) . find_in_batches ( batch_size : 100 ) do | uploads |
adjust_acls ( uploads . map ( & :id ) )
end
2020-03-03 07:03:58 +08:00
puts " " , " Upload ACL sync complete! "
end
end
2022-09-29 07:24:33 +08:00
task " uploads:disable_secure_uploads " = > :environment do
2020-01-07 10:27:24 +08:00
RailsMultisite :: ConnectionManagement . each_connection do | db |
unless Discourse . store . external?
puts " This task only works for external storage. "
exit 1
end
2022-09-29 07:24:33 +08:00
puts " Disabling secure upload and resetting uploads to not secure in #{ db } ... " , " "
2020-01-07 10:27:24 +08:00
2022-09-29 07:24:33 +08:00
SiteSetting . secure_uploads = false
2020-01-07 10:27:24 +08:00
2022-06-09 07:24:30 +08:00
secure_uploads = Upload . joins ( :upload_references ) . where ( upload_references : { target_type : 'Post' } ) . where ( secure : true )
2020-01-07 10:27:24 +08:00
secure_upload_count = secure_uploads . count
2022-05-23 11:14:11 +08:00
secure_upload_ids = secure_uploads . pluck ( :id )
2020-01-07 10:27:24 +08:00
2020-03-03 07:03:58 +08:00
puts " " , " Marking #{ secure_upload_count } uploads as not secure. " , " "
2022-05-23 11:14:11 +08:00
secure_uploads . update_all (
secure : false ,
security_last_changed_at : Time . zone . now ,
2022-09-29 07:24:33 +08:00
security_last_changed_reason : " marked as not secure by disable_secure_uploads task "
2022-05-23 11:14:11 +08:00
)
2020-03-03 07:03:58 +08:00
2022-05-23 11:14:11 +08:00
post_ids_to_rebake = DB . query_single (
2022-06-09 07:24:30 +08:00
" SELECT DISTINCT target_id FROM upload_references WHERE upload_id IN (?) AND target_type = 'Post' " , secure_upload_ids
2022-05-23 11:14:11 +08:00
)
adjust_acls ( secure_upload_ids )
post_rebake_errors = rebake_upload_posts ( post_ids_to_rebake )
2020-03-03 07:03:58 +08:00
log_rebake_errors ( post_rebake_errors )
2022-05-23 11:14:11 +08:00
puts " " , " Rebaking and uploading complete! " , " "
2020-01-07 10:27:24 +08:00
end
2022-09-29 07:24:33 +08:00
puts " " , " Secure uploads are now disabled! " , " "
2020-03-03 07:03:58 +08:00
end
2019-11-18 09:25:42 +08:00
##
2022-09-29 07:24:33 +08:00
# Run this task whenever the secure_uploads or login_required
2019-11-18 09:25:42 +08:00
# settings are changed for a Discourse instance to update
2020-03-03 07:03:58 +08:00
# the upload secure flag and S3 upload ACLs. Any uploads that
# have their secure status changed will have all associated posts
# rebaked.
task " uploads:secure_upload_analyse_and_update " = > :environment do
2019-11-18 09:25:42 +08:00
RailsMultisite :: ConnectionManagement . each_connection do | db |
unless Discourse . store . external?
puts " This task only works for external storage. "
exit 1
end
2020-03-03 07:03:58 +08:00
puts " Analyzing security for uploads in #{ db } ... " , " "
2022-05-23 11:14:11 +08:00
all_upload_ids_changed , post_ids_to_rebake = nil
2019-11-18 09:25:42 +08:00
Upload . transaction do
2022-09-29 07:24:33 +08:00
# If secure upload is enabled we need to first set the access control post of
2020-02-17 12:21:43 +08:00
# all post uploads (even uploads that are linked to multiple posts). If the
2022-09-29 07:24:33 +08:00
# upload is not set to secure upload then this has no other effect on the upload,
# but we _must_ know what the access control post is because the with_secure_uploads?
2020-02-17 12:21:43 +08:00
# method is on the post, and this knows about the category security & PM status
2022-09-29 07:24:33 +08:00
if SiteSetting . secure_uploads?
2020-02-17 12:21:43 +08:00
update_uploads_access_control_post
end
2022-05-23 11:14:11 +08:00
puts " " , " Analysing which uploads need to be marked secure and be rebaked. " , " "
2019-11-18 09:25:42 +08:00
if SiteSetting . login_required?
2022-05-23 11:14:11 +08:00
# Simply mark all uploads linked to posts secure if login_required because no anons will be able to access them.
post_ids_to_rebake , all_upload_ids_changed = mark_all_as_secure_login_required
2019-11-18 09:25:42 +08:00
else
2022-05-23 11:14:11 +08:00
# Otherwise only mark uploads linked to posts in secure categories or PMs as secure.
post_ids_to_rebake , all_upload_ids_changed = update_specific_upload_security_no_login_required
2020-03-03 07:03:58 +08:00
end
2019-11-18 09:25:42 +08:00
end
2020-03-03 07:03:58 +08:00
# Enqueue rebakes AFTER upload transaction complete, so there is no race condition
# between updating the DB and the rebakes occurring.
2022-05-23 11:14:11 +08:00
post_rebake_errors = rebake_upload_posts ( post_ids_to_rebake )
2020-03-03 07:03:58 +08:00
log_rebake_errors ( post_rebake_errors )
# Also do this AFTER upload transaction complete so we don't end up with any
# errors leaving ACLs in a bad state (the ACL sync task can be run to fix any
# outliers at any time).
2022-05-23 11:14:11 +08:00
adjust_acls ( all_upload_ids_changed )
2019-11-18 09:25:42 +08:00
end
2020-03-03 07:03:58 +08:00
puts " " , " " , " Done! "
2019-11-18 09:25:42 +08:00
end
2022-05-23 11:14:11 +08:00
def adjust_acls ( upload_ids_to_adjust_acl_for )
jobs_to_create = ( upload_ids_to_adjust_acl_for . count . to_f / 100 . 00 ) . ceil
if jobs_to_create > 1
puts " Adjusting ACLs for #{ upload_ids_to_adjust_acl_for } uploads. These will be batched across #{ jobs_to_create } sync job(s). "
end
upload_ids_to_adjust_acl_for . each_slice ( 100 ) do | upload_ids |
Jobs . enqueue ( :sync_acls_for_uploads , upload_ids : upload_ids )
end
if jobs_to_create > 1
puts " ACL batching complete. Keep an eye on the Sidekiq queue for progress. "
2020-03-03 07:03:58 +08:00
end
end
2022-05-23 11:14:11 +08:00
def mark_all_as_secure_login_required
post_upload_ids_marked_secure = DB . query_single ( << ~ SQL )
WITH upl AS (
SELECT DISTINCT ON ( upload_id ) upload_id
2022-06-09 07:24:30 +08:00
FROM upload_references
INNER JOIN posts ON posts . id = upload_references . target_id AND upload_references . target_type = 'Post'
2022-05-23 11:14:11 +08:00
INNER JOIN topics ON topics . id = posts . topic_id
)
UPDATE uploads
SET secure = true ,
security_last_changed_reason = 'upload security rake task mark as secure' ,
security_last_changed_at = NOW ( )
FROM upl
WHERE uploads . id = upl . upload_id AND NOT uploads . secure
RETURNING uploads . id
SQL
puts " Marked #{ post_upload_ids_marked_secure . count } upload(s) as secure because login_required is true. " , " "
upload_ids_marked_not_secure = DB . query_single ( << ~ SQL , post_upload_ids_marked_secure )
UPDATE uploads
SET secure = false ,
security_last_changed_reason = 'upload security rake task mark as not secure' ,
security_last_changed_at = NOW ( )
WHERE id NOT IN ( ?) AND uploads . secure
RETURNING uploads . id
SQL
puts " Marked #{ upload_ids_marked_not_secure . count } upload(s) as not secure because they are not linked to posts. " , " "
2019-11-18 09:25:42 +08:00
puts " Finished marking upload(s) as secure. "
2022-05-23 11:14:11 +08:00
post_ids_to_rebake = DB . query_single (
2022-06-09 07:24:30 +08:00
" SELECT DISTINCT target_id FROM upload_references WHERE upload_id IN (?) AND target_type = 'Post' " , post_upload_ids_marked_secure
2022-05-23 11:14:11 +08:00
)
[ post_ids_to_rebake , ( post_upload_ids_marked_secure + upload_ids_marked_not_secure ) . uniq ]
2019-11-18 09:25:42 +08:00
end
2020-02-17 12:21:43 +08:00
def log_rebake_errors ( rebake_errors )
return if rebake_errors . empty?
puts " The following post rebakes failed with error: " , " "
rebake_errors . each do | message |
2019-11-18 09:25:42 +08:00
puts message
end
end
2022-05-23 11:14:11 +08:00
def update_specific_upload_security_no_login_required
# A simplification of the rules found in UploadSecurity which is a lot faster than
# having to loop through records and use that class to check security.
post_upload_ids_marked_secure = DB . query_single ( << ~ SQL )
WITH upl AS (
SELECT DISTINCT ON ( upload_id ) upload_id
2022-06-09 07:24:30 +08:00
FROM upload_references
INNER JOIN posts ON posts . id = upload_references . target_id AND upload_references . target_type = 'Post'
2022-05-23 11:14:11 +08:00
INNER JOIN topics ON topics . id = posts . topic_id
LEFT JOIN categories ON categories . id = topics . category_id
WHERE ( topics . category_id IS NOT NULL AND categories . read_restricted ) OR
( topics . archetype = 'private_message' )
2021-01-29 07:03:44 +08:00
)
2022-05-23 11:14:11 +08:00
UPDATE uploads
SET secure = true ,
security_last_changed_reason = 'upload security rake task mark as secure' ,
security_last_changed_at = NOW ( )
FROM upl
WHERE uploads . id = upl . upload_id AND NOT uploads . secure
RETURNING uploads . id
SQL
puts " Marked #{ post_upload_ids_marked_secure . length } uploads as secure. "
# Anything in a public category or a regular topic should not be secure.
post_upload_ids_marked_not_secure = DB . query_single ( << ~ SQL )
WITH upl AS (
SELECT DISTINCT ON ( upload_id ) upload_id
2022-06-09 07:24:30 +08:00
FROM upload_references
INNER JOIN posts ON posts . id = upload_references . target_id AND upload_references . target_type = 'Post'
2022-05-23 11:14:11 +08:00
INNER JOIN topics ON topics . id = posts . topic_id
LEFT JOIN categories ON categories . id = topics . category_id
WHERE ( topics . archetype = 'regular' AND topics . category_id IS NOT NULL AND NOT categories . read_restricted ) OR
( topics . archetype = 'regular' AND topics . category_id IS NULL )
2021-01-29 07:03:44 +08:00
)
2022-05-23 11:14:11 +08:00
UPDATE uploads
SET secure = false ,
security_last_changed_reason = 'upload security rake task mark as not secure' ,
security_last_changed_at = NOW ( )
FROM upl
WHERE uploads . id = upl . upload_id AND uploads . secure
RETURNING uploads . id
SQL
puts " Marked #{ post_upload_ids_marked_not_secure . length } uploads as not secure. "
# Everything else should not be secure!
upload_ids_changed = ( post_upload_ids_marked_secure + post_upload_ids_marked_not_secure ) . uniq
upload_ids_marked_not_secure = DB . query_single ( << ~ SQL , upload_ids_changed )
UPDATE uploads
SET secure = false ,
security_last_changed_reason = 'upload security rake task mark as not secure' ,
security_last_changed_at = NOW ( )
WHERE id NOT IN ( ?) AND uploads . secure
RETURNING uploads . id
SQL
puts " Finished updating upload security. Marked #{ upload_ids_marked_not_secure . length } uploads not linked to posts as not secure. "
all_upload_ids_changed = ( upload_ids_changed + upload_ids_marked_not_secure ) . uniq
2022-06-09 07:24:30 +08:00
post_ids_to_rebake = DB . query_single ( " SELECT DISTINCT target_id FROM upload_references WHERE upload_id IN (?) AND target_type = 'Post' " , upload_ids_changed )
2022-05-23 11:14:11 +08:00
[ post_ids_to_rebake , all_upload_ids_changed ]
2019-11-18 09:25:42 +08:00
end
2020-02-17 12:21:43 +08:00
def update_uploads_access_control_post
2022-05-23 11:14:11 +08:00
DB . exec ( << ~ SQL )
WITH upl AS (
2022-06-09 07:24:30 +08:00
SELECT DISTINCT ON ( upload_id ) upload_id , target_id AS post_id
FROM upload_references
WHERE target_type = 'Post'
ORDER BY upload_id , target_id
2022-05-23 11:14:11 +08:00
)
UPDATE uploads
SET access_control_post_id = upl . post_id
FROM upl
WHERE uploads . id = upl . upload_id
2020-02-17 12:21:43 +08:00
SQL
end
2022-05-23 11:14:11 +08:00
def rebake_upload_posts ( post_ids_to_rebake )
posts_to_rebake = Post . where ( id : post_ids_to_rebake )
2020-02-17 12:21:43 +08:00
post_rebake_errors = [ ]
puts " " , " Rebaking #{ posts_to_rebake . length } posts with affected uploads. " , " "
begin
i = 0
posts_to_rebake . each do | post |
2020-03-03 07:03:58 +08:00
RakeHelpers . print_status_with_label ( " Rebaking posts..... " , i , posts_to_rebake . length )
2020-02-17 12:21:43 +08:00
post . rebake!
i += 1
end
RakeHelpers . print_status_with_label ( " Rebaking complete! " , i , posts_to_rebake . length )
puts " "
rescue = > e
post_rebake_errors << e . message
end
post_rebake_errors
end
2019-05-22 13:24:36 +08:00
def inline_uploads ( post )
replaced = false
original_raw = post . raw
post . raw = post . raw . gsub ( / ( \ (( \/ uploads \ S+).* \ )) / ) do
upload = Upload . find_by ( url : $2 )
2019-05-22 13:51:09 +08:00
if ! upload
data = Upload . extract_url ( $2 )
if data && sha1 = data [ 2 ]
upload = Upload . find_by ( sha1 : sha1 )
if ! upload
sha_map = JSON . parse ( post . custom_fields [ " UPLOAD_SHA1_MAP " ] || " {} " )
if mapped_sha = sha_map [ sha1 ]
upload = Upload . find_by ( sha1 : mapped_sha )
end
end
end
end
2019-05-22 13:24:36 +08:00
result = $1
if upload & . id
result . sub! ( $2 , upload . short_url )
replaced = true
else
puts " Upload not found #{ $2 } in Post #{ post . id } - #{ post . url } "
end
result
end
if replaced
2019-05-23 13:09:16 +08:00
puts " Corrected image urls in #{ post . full_url } raw backup stored in custom field "
2019-05-22 13:24:36 +08:00
post . custom_fields [ " BACKUP_POST_RAW " ] = original_raw
post . save_custom_fields
2019-05-23 13:09:16 +08:00
post . save! ( validate : false )
2019-05-22 13:24:36 +08:00
post . rebake!
end
end
2019-05-23 13:09:16 +08:00
def inline_img_tags ( post )
replaced = false
original_raw = post . raw
post . raw = post . raw . gsub ( / (<img \ s+src=["']( \/ uploads \/ [^'"]*)["'].*>) /i ) do
next if $2 . include? ( " .. " )
upload = Upload . find_by ( url : $2 )
if ! upload
data = Upload . extract_url ( $2 )
if data && sha1 = data [ 2 ]
upload = Upload . find_by ( sha1 : sha1 )
end
end
if ! upload
local_file = File . join ( Rails . root , " public " , $2 )
if File . exist? ( local_file )
2019-05-23 13:28:41 +08:00
File . open ( local_file ) do | f |
upload = UploadCreator . new ( f , " image " ) . create_for ( post . user_id )
2019-05-23 13:09:16 +08:00
end
end
end
if upload
replaced = true
" ![image]( #{ upload . short_url } ) "
else
puts " skipping missing upload in #{ post . full_url } #{ $1 } "
$1
end
end
if replaced
puts " Corrected image urls in #{ post . full_url } raw backup stored in custom field "
post . custom_fields [ " BACKUP_POST_RAW " ] = original_raw
post . save_custom_fields
post . save! ( validate : false )
post . rebake!
end
end
def fix_relative_links
2019-05-22 13:24:36 +08:00
Post . where ( 'raw like ?' , '%](/uploads%' ) . find_each do | post |
inline_uploads ( post )
end
2019-05-23 13:09:16 +08:00
Post . where ( " raw ilike ? " , '%<img%src=%/uploads/%>%' ) . find_each do | post |
inline_img_tags ( post )
end
end
task " uploads:fix_relative_upload_links " = > :environment do
if RailsMultisite :: ConnectionManagement . current_db != " default "
fix_relative_links
else
RailsMultisite :: ConnectionManagement . each_connection do
fix_relative_links
end
end
2019-05-22 13:24:36 +08:00
end
2020-08-12 11:32:47 +08:00
2020-08-13 06:26:13 +08:00
def analyze_missing_s3
2020-08-12 11:32:47 +08:00
puts " List of posts with missing images: "
sql = << ~ SQL
2022-06-14 17:05:03 +08:00
SELECT ur . target_id , u . url , u . sha1 , u . extension , u . id
2022-06-09 07:24:30 +08:00
FROM upload_references ur
2022-06-14 17:05:03 +08:00
RIGHT JOIN uploads u ON u . id = ur . upload_id
WHERE ur . target_type = 'Post' AND u . verification_status = :invalid_etag
ORDER BY ur . created_at
2020-08-12 11:32:47 +08:00
SQL
lookup = { }
2020-08-12 12:04:21 +08:00
other = [ ]
2020-08-26 15:48:42 +08:00
all = [ ]
2020-08-27 09:49:50 +08:00
2020-09-17 11:35:29 +08:00
DB . query ( sql , invalid_etag : Upload . verification_statuses [ :invalid_etag ] ) . each do | r |
2020-08-26 15:48:42 +08:00
all << r
2022-06-09 07:24:30 +08:00
if r . target_id
lookup [ r . target_id ] || = [ ]
lookup [ r . target_id ] << [ r . url , r . sha1 , r . extension ]
2020-08-12 12:04:21 +08:00
else
other << r
end
2020-08-12 11:32:47 +08:00
end
posts = Post . where ( id : lookup . keys )
posts . order ( :created_at ) . each do | post |
puts " #{ Discourse . base_url } /p/ #{ post . id } #{ lookup [ post . id ] . length } missing, #{ post . created_at } "
lookup [ post . id ] . each do | url , sha1 , extension |
puts url
puts " #{ Upload . base62_sha1 ( sha1 ) } . #{ extension } "
end
puts
end
2020-09-17 11:35:29 +08:00
missing_uploads = Upload . where ( verification_status : Upload . verification_statuses [ :invalid_etag ] )
2020-08-27 09:49:50 +08:00
puts " Total missing uploads: #{ missing_uploads . count } , newest is #{ missing_uploads . maximum ( :created_at ) } "
2020-08-12 11:32:47 +08:00
puts " Total problem posts: #{ lookup . keys . count } with #{ lookup . values . sum { | a | a . length } } missing uploads "
2020-08-12 12:04:21 +08:00
puts " Other missing uploads count: #{ other . count } "
2020-08-26 15:48:42 +08:00
if all . count > 0
ids = all . map { | r | r . id }
lookups = [
2022-06-09 07:24:30 +08:00
[ :upload_references , :upload_id ] ,
2020-08-26 15:48:42 +08:00
[ :users , :uploaded_avatar_id ] ,
[ :user_avatars , :gravatar_upload_id ] ,
[ :user_avatars , :custom_upload_id ] ,
[ :site_settings , [ " NULLIF(value, '')::integer " , " data_type = #{ SiteSettings :: TypeSupervisor . types [ :upload ] . to_i } " ] ] ,
[ :user_profiles , :profile_background_upload_id ] ,
[ :user_profiles , :card_background_upload_id ] ,
[ :categories , :uploaded_logo_id ] ,
2022-10-07 23:00:44 +08:00
[ :categories , :uploaded_logo_dark_id ] ,
2020-08-26 15:48:42 +08:00
[ :categories , :uploaded_background_id ] ,
[ :custom_emojis , :upload_id ] ,
[ :theme_fields , :upload_id ] ,
[ :user_exports , :upload_id ] ,
[ :groups , :flair_upload_id ] ,
]
lookups . each do | table , ( column , where ) |
count = DB . query_single ( << ~ SQL , ids : ids ) . first
SELECT COUNT ( * ) FROM #{table} WHERE #{column} IN (:ids) #{"AND #{where}" if where}
SQL
if count > 0
puts " Found #{ count } missing row #{ " s " if count > 1 } in #{ table } ( #{ column } ) "
end
2020-08-12 15:28:41 +08:00
end
2020-08-26 15:48:42 +08:00
2020-08-12 12:04:21 +08:00
end
2020-08-12 11:32:47 +08:00
end
2020-08-27 09:49:50 +08:00
def delete_missing_s3
2020-09-17 11:35:29 +08:00
missing = Upload . where (
verification_status : Upload . verification_statuses [ :invalid_etag ]
) . order ( :created_at )
2020-08-27 09:49:50 +08:00
count = missing . count
if count > 0
puts " The following uploads will be deleted from the database "
missing . each do | upload |
puts " #{ upload . id } - #{ upload . url } - #{ upload . created_at } "
end
puts " Please confirm you wish to delete #{ count } upload records by typing YES "
confirm = STDIN . gets . strip
if confirm == " YES "
missing . destroy_all
puts " #{ count } records were deleted "
else
STDERR . puts " Aborting "
exit 1
end
end
end
task " uploads:delete_missing_s3 " = > :environment do
if RailsMultisite :: ConnectionManagement . current_db != " default "
delete_missing_s3
else
RailsMultisite :: ConnectionManagement . each_connection do
delete_missing_s3
end
end
end
2020-08-13 06:26:13 +08:00
task " uploads:analyze_missing_s3 " = > :environment do
2020-08-12 11:32:47 +08:00
if RailsMultisite :: ConnectionManagement . current_db != " default "
2020-08-13 06:26:13 +08:00
analyze_missing_s3
2020-08-12 11:32:47 +08:00
else
RailsMultisite :: ConnectionManagement . each_connection do
2020-08-13 06:26:13 +08:00
analyze_missing_s3
2020-08-12 11:32:47 +08:00
end
end
end
2020-08-13 06:26:13 +08:00
def fix_missing_s3
2020-08-12 11:32:47 +08:00
Jobs . run_immediately!
2020-08-13 09:22:14 +08:00
puts " Attempting to download missing uploads and recreate "
2020-09-17 11:35:29 +08:00
ids = Upload . where (
verification_status : Upload . verification_statuses [ :invalid_etag ]
) . pluck ( :id )
2020-08-13 09:22:14 +08:00
ids . each do | id |
2021-06-22 23:00:55 +08:00
upload = Upload . find_by ( id : id )
next if ! upload
2020-08-13 09:22:14 +08:00
2020-08-13 11:48:11 +08:00
tempfile = nil
2022-01-19 18:05:58 +08:00
downloaded_from = nil
2020-08-13 11:48:11 +08:00
begin
tempfile = FileHelper . download ( upload . url , max_file_size : 30 . megabyte , tmp_file_name : " #{ SecureRandom . hex } . #{ upload . extension } " )
2022-01-19 18:05:58 +08:00
downloaded_from = upload . url
2020-08-13 11:48:11 +08:00
rescue = > e
2022-01-19 18:05:58 +08:00
if upload . origin . present?
begin
tempfile = FileHelper . download ( upload . origin , max_file_size : 30 . megabyte , tmp_file_name : " #{ SecureRandom . hex } . #{ upload . extension } " )
downloaded_from = upload . origin
rescue = > e
puts " Failed to download #{ upload . origin } #{ e } "
end
else
puts " Failed to download #{ upload . url } #{ e } "
end
2020-08-13 11:48:11 +08:00
end
2020-08-13 09:22:14 +08:00
if tempfile
2022-01-19 18:05:58 +08:00
puts " Successfully downloaded upload id: #{ upload . id } - #{ downloaded_from } fixing upload "
2020-08-13 09:22:14 +08:00
fixed_upload = nil
2020-08-18 15:55:35 +08:00
fix_error = nil
2020-08-13 09:22:14 +08:00
Upload . transaction do
2020-08-18 15:55:35 +08:00
begin
2021-03-25 18:35:29 +08:00
upload . update_column ( :sha1 , SecureRandom . hex )
2021-05-19 23:24:52 +08:00
fixed_upload = UploadCreator . new ( tempfile , " temp. #{ upload . extension } " , skip_validations : true ) . create_for ( Discourse . system_user . id )
2020-08-18 15:55:35 +08:00
rescue = > fix_error
# invalid extension is the most common issue
end
2020-08-13 09:22:14 +08:00
raise ActiveRecord :: Rollback
end
2020-08-18 15:55:35 +08:00
if fix_error
2020-08-28 10:28:41 +08:00
puts " Failed to fix upload #{ fix_error } "
2020-08-18 15:55:35 +08:00
else
# we do not fix sha, it may be wrong for arbitrary reasons, if we correct it
# we may end up breaking posts
2021-07-07 21:23:43 +08:00
save_error = nil
begin
upload . assign_attributes ( etag : fixed_upload . etag , url : fixed_upload . url , verification_status : Upload . verification_statuses [ :unchecked ] )
upload . save! ( validate : false )
rescue = > save_error
# url might be null
end
2020-08-18 13:37:11 +08:00
2021-07-07 21:23:43 +08:00
if save_error
2021-07-07 22:29:03 +08:00
puts " Failed to save upload #{ save_error } "
2021-07-07 21:23:43 +08:00
else
2021-07-07 19:27:24 +08:00
OptimizedImage . where ( upload_id : upload . id ) . destroy_all
2022-07-07 02:40:54 +08:00
rebake_ids = UploadReference . where ( upload_id : upload . id ) . where ( target_type : 'Post' ) . pluck ( :target_id )
2020-08-18 13:37:11 +08:00
2021-07-07 19:27:24 +08:00
if rebake_ids . present?
Post . where ( id : rebake_ids ) . each do | post |
puts " rebake post #{ post . id } "
post . rebake!
end
2020-08-18 15:55:35 +08:00
end
2020-08-18 13:37:11 +08:00
end
end
2020-08-13 09:22:14 +08:00
end
end
2020-08-12 11:32:47 +08:00
puts " Attempting to automatically fix problem uploads "
puts
puts " Rebaking posts with missing uploads, this can take a while as all rebaking runs inline "
sql = << ~ SQL
2022-06-14 17:05:03 +08:00
SELECT ur . target_id
2022-06-09 07:24:30 +08:00
FROM upload_references ur
2022-06-14 17:05:03 +08:00
JOIN uploads u ON u . id = ur . upload_id
WHERE ur . target_type = 'Post' AND u . verification_status = :invalid_etag
ORDER BY ur . target_id DESC
2020-08-12 11:32:47 +08:00
SQL
2020-09-17 11:35:29 +08:00
DB . query_single ( sql , invalid_etag : Upload . verification_statuses [ :invalid_etag ] ) . each do | post_id |
2020-08-27 09:49:50 +08:00
post = Post . find_by ( id : post_id )
if post
post . rebake!
print " . "
else
puts " Skipping #{ post_id } since it is deleted "
end
2020-08-12 11:32:47 +08:00
end
puts
end
2020-08-13 06:26:13 +08:00
task " uploads:fix_missing_s3 " = > :environment do
2020-08-12 11:32:47 +08:00
if RailsMultisite :: ConnectionManagement . current_db != " default "
2020-08-28 10:35:35 +08:00
fix_missing_s3
2020-08-12 11:32:47 +08:00
else
RailsMultisite :: ConnectionManagement . each_connection do
2020-08-28 10:35:35 +08:00
fix_missing_s3
2020-08-12 11:32:47 +08:00
end
end
end