# frozen_string_literal: true require "db_helper" require "digest/sha1" require "base62" ################################################################################ # gather # ################################################################################ require_dependency "rake_helpers" task "uploads:gather" => :environment do ENV["RAILS_DB"] ? gather_uploads : gather_uploads_for_all_sites end def gather_uploads_for_all_sites RailsMultisite::ConnectionManagement.each_connection { gather_uploads } end def file_exists?(path) File.exists?(path) && File.size(path) > 0 rescue false end def gather_uploads public_directory = "#{Rails.root}/public" current_db = RailsMultisite::ConnectionManagement.current_db puts "", "Gathering uploads for '#{current_db}'...", "" Upload.where("url ~ '^\/uploads\/'") .where("url !~ '^\/uploads\/#{current_db}'") .find_each do |upload| begin old_db = upload.url[/^\/uploads\/([^\/]+)\//, 1] from = upload.url.dup to = upload.url.sub("/uploads/#{old_db}/", "/uploads/#{current_db}/") source = "#{public_directory}#{from}" destination = "#{public_directory}#{to}" # create destination directory & copy file unless it already exists unless file_exists?(destination) `mkdir -p '#{File.dirname(destination)}'` `cp --link '#{source}' '#{destination}'` end # ensure file has been succesfuly copied over raise unless file_exists?(destination) # remap links in db DbHelper.remap(from, to) rescue putc "!" else putc "." end end puts "", "Done!" end ################################################################################ # backfill_shas # ################################################################################ task "uploads:backfill_shas" => :environment do RailsMultisite::ConnectionManagement.each_connection do |db| puts "Backfilling #{db}..." Upload.where(sha1: nil).find_each do |u| begin path = Discourse.store.path_for(u) u.sha1 = Upload.generate_digest(path) u.save! putc "." rescue => e puts "Skipping #{u.original_filename} (#{u.url}) #{e.message}" end end end puts "", "Done" end ################################################################################ # migrate_from_s3 # ################################################################################ task "uploads:migrate_from_s3" => :environment do ENV["RAILS_DB"] ? migrate_from_s3 : migrate_all_from_s3 end def guess_filename(url, raw) begin uri = URI.parse("http:#{url}") f = uri.open("rb", read_timeout: 5, redirect: true, allow_redirections: :all) filename = if f.meta && f.meta["content-disposition"] f.meta["content-disposition"][/filename="([^"]+)"/, 1].presence end filename ||= raw[/([^<]+)<\/a>/, 1].presence filename ||= File.basename(url) filename rescue nil ensure f.try(:close!) rescue nil end end def migrate_all_from_s3 RailsMultisite::ConnectionManagement.each_connection { migrate_from_s3 } end def migrate_from_s3 require "file_store/s3_store" # make sure S3 is disabled if SiteSetting.Upload.enable_s3_uploads puts "You must disable S3 uploads before running that task." return end db = RailsMultisite::ConnectionManagement.current_db puts "Migrating uploads from S3 to local storage for '#{db}'..." max_file_size = [SiteSetting.max_image_size_kb, SiteSetting.max_attachment_size_kb].max.kilobytes Post .where("user_id > 0") .where("raw LIKE '%.s3%.amazonaws.com/%' OR raw LIKE '%(upload://%'") .find_each do |post| begin updated = false post.raw.gsub!(/(\/\/[\w.-]+amazonaws\.com\/(original|optimized)\/([a-z0-9]+\/)+\h{40}([\w.-]+)?)/i) do |url| begin if filename = guess_filename(url, post.raw) file = FileHelper.download("http:#{url}", max_file_size: max_file_size, tmp_file_name: "from_s3", follow_redirect: true) sha1 = Upload.generate_digest(file) origin = nil existing_upload = Upload.find_by(sha1: sha1) if existing_upload&.url&.start_with?("//") filename = existing_upload.original_filename origin = existing_upload.origin existing_upload.destroy end new_upload = UploadCreator.new(file, filename, origin: origin).create_for(post.user_id || -1) if new_upload&.save updated = true url = new_upload.url end end url rescue url end end post.raw.gsub!(/(upload:\/\/[0-9a-zA-Z]+\.\w+)/) do |url| begin if sha1 = Upload.sha1_from_short_url(url) if upload = Upload.find_by(sha1: sha1) if upload.url.start_with?("//") file = FileHelper.download("http:#{upload.url}", max_file_size: max_file_size, tmp_file_name: "from_s3", follow_redirect: true) filename = upload.original_filename origin = upload.origin upload.destroy new_upload = UploadCreator.new(file, filename, origin: origin).create_for(post.user_id || -1) if new_upload&.save updated = true url = new_upload.url end end end end url rescue url end end if updated post.save! post.rebake! putc "#" else putc "." end rescue putc "X" end end puts "Done!" end ################################################################################ # migrate_to_s3 # ################################################################################ task "uploads:migrate_to_s3" => :environment do ENV["RAILS_DB"] ? migrate_to_s3 : migrate_to_s3_all_sites end def migrate_to_s3_all_sites RailsMultisite::ConnectionManagement.each_connection do begin migrate_to_s3 rescue RuntimeError => e if ENV["SKIP_FAILED"] puts e else raise e unless ENV["SKIP_FAILED"] end end end end def migration_successful?(db, should_raise = false) success = true failure_message = "S3 migration failed for db '#{db}'." prefix = ENV["MIGRATE_TO_MULTISITE"] ? "uploads/#{db}/original/" : "original/" base_url = File.join(SiteSetting.Upload.s3_base_url, prefix) count = Upload.by_users.where("url NOT LIKE '#{base_url}%'").count error_message = "#{count} of #{Upload.count} uploads are not migrated to S3. #{failure_message}" raise error_message if count > 0 && should_raise success &&= count == 0 puts error_message if count > 0 cdn_path = SiteSetting.cdn_path("/uploads/#{db}/original").sub(/https?:/, "") count = Post.where("cooked LIKE '%#{cdn_path}%'").count error_message = "#{count} posts are not remapped to new S3 upload URL. #{failure_message}" raise error_message if count > 0 && should_raise success &&= count == 0 puts error_message if count > 0 Rake::Task['posts:missing_uploads'].invoke('single_site') count = PostCustomField.where(name: Post::MISSING_UPLOADS).count error_message = "rake posts:missing_uploads identified #{count} issues. #{failure_message}" raise error_message if count > 0 && should_raise success &&= count == 0 puts error_message if count > 0 count = Post.where('baked_version <> ? OR baked_version IS NULL', Post::BAKED_VERSION).count if count > 0 puts "#{count} posts still require rebaking and will be rebaked during regular job" if count > 100 puts "To speed up migrations of posts we recommend you run 'rake posts:rebake_uncooked_posts'" end success = false else puts "No posts require rebaking" end success end task "uploads:s3_migration_status" => :environment do success = true RailsMultisite::ConnectionManagement.each_connection do db = RailsMultisite::ConnectionManagement.current_db success &&= migration_successful?(db) end queued_jobs = Sidekiq::Stats.new.queues.sum { |_ , x| x } if queued_jobs > 50 puts "WARNING: There are #{queued_jobs} jobs queued! Wait till Sidekiq clears backlog prior to migrating site to a new host" exit 1 end if !success puts "Site is not ready for migration" exit 1 end puts "All sites appear to have uploads in order!" end def migrate_to_s3 # we don't want have migrated state, ensure we run all jobs here Jobs.run_immediately! db = RailsMultisite::ConnectionManagement.current_db dry_run = !!ENV["DRY_RUN"] puts "Checking if #{db} already migrated..." return puts "Already migrated #{db}!" if migration_successful?(db) puts "*" * 30 + " DRY RUN " + "*" * 30 if dry_run puts "Migrating uploads to S3 for '#{db}'..." if Upload.by_users.where("url NOT LIKE '//%' AND url NOT LIKE '#{GlobalSetting.relative_url_root}/uploads/#{db}/original/_X/%'").exists? puts <<~TEXT Some uploads were not migrated to the new scheme. Please run these commands in the rails console SiteSetting.migrate_to_new_scheme = true Jobs::MigrateUploadScheme.new.execute(nil) TEXT exit 1 end unless ENV["DISCOURSE_S3_BUCKET"].present? && ENV["DISCOURSE_S3_REGION"].present? && ( ( ENV["DISCOURSE_S3_ACCESS_KEY_ID"].present? && ENV["DISCOURSE_S3_SECRET_ACCESS_KEY"].present? ) || ENV["DISCOURSE_S3_USE_IAM_PROFILE"].present? ) puts <<~TEXT Please provide the following environment variables - DISCOURSE_S3_BUCKET - DISCOURSE_S3_REGION and either - DISCOURSE_S3_ACCESS_KEY_ID - DISCOURSE_S3_SECRET_ACCESS_KEY or - DISCOURSE_S3_USE_IAM_PROFILE TEXT exit 2 end if SiteSetting.Upload.s3_cdn_url.blank? puts "Please provide the 'DISCOURSE_S3_CDN_URL' environment variable" exit 3 end bucket_has_folder_path = true if ENV["DISCOURSE_S3_BUCKET"].include? "/" public_directory = Rails.root.join("public").to_s opts = { region: ENV["DISCOURSE_S3_REGION"], access_key_id: ENV["DISCOURSE_S3_ACCESS_KEY_ID"], secret_access_key: ENV["DISCOURSE_S3_SECRET_ACCESS_KEY"] } # S3::Client ignores the `region` option when an `endpoint` is provided. # Without `region`, non-default region bucket creation will break for S3, so we can only # define endpoint when not using S3 i.e. when SiteSetting.s3_endpoint is provided. opts[:endpoint] = SiteSetting.s3_endpoint if SiteSetting.s3_endpoint.present? s3 = Aws::S3::Client.new(opts) if bucket_has_folder_path bucket, folder = S3Helper.get_bucket_and_folder_path(ENV["DISCOURSE_S3_BUCKET"]) folder = File.join(folder, "/") else bucket, folder = ENV["DISCOURSE_S3_BUCKET"], "" end puts "Uploading files to S3..." print " - Listing local files" local_files = [] IO.popen("cd #{public_directory} && find uploads/#{db}/original -type f").each do |file| local_files << file.chomp putc "." if local_files.size % 1000 == 0 end puts " => #{local_files.size} files" print " - Listing S3 files" s3_objects = [] prefix = ENV["MIGRATE_TO_MULTISITE"] ? "uploads/#{db}/original/" : "original/" options = { bucket: bucket, prefix: folder + prefix } loop do response = s3.list_objects_v2(options) s3_objects.concat(response.contents) putc "." break if response.next_continuation_token.blank? options[:continuation_token] = response.next_continuation_token end puts " => #{s3_objects.size} files" puts " - Syncing files to S3" synced = 0 failed = [] skip_etag_verify = ENV["SKIP_ETAG_VERIFY"].present? local_files.each do |file| path = File.join(public_directory, file) name = File.basename(path) etag = Digest::MD5.file(path).hexdigest unless skip_etag_verify key = file[file.index(prefix)..-1] key.prepend(folder) if bucket_has_folder_path original_path = file.sub("uploads/#{db}", "") if s3_object = s3_objects.find { |obj| obj.key.ends_with?(original_path) } next if File.size(path) == s3_object.size && (skip_etag_verify || s3_object.etag[etag]) end options = { acl: "public-read", body: File.open(path, "rb"), bucket: bucket, content_type: MiniMime.lookup_by_filename(name)&.content_type, key: key, } if !FileHelper.is_supported_image?(name) upload = Upload.find_by(url: "/#{file}") if upload&.original_filename options[:content_disposition] = %Q{attachment; filename="#{upload.original_filename}"} end if upload.secure options[:acl] = "private" end end etag ||= Digest::MD5.file(path).hexdigest if dry_run puts "#{file} => #{options[:key]}" synced += 1 elsif s3.put_object(options).etag[etag] putc "." synced += 1 else putc "X" failed << path end end puts failure_message = "S3 migration failed for db '#{db}'." if failed.size > 0 puts "Failed to upload #{failed.size} files" puts failed.join("\n") raise failure_message elsif s3_objects.size + synced >= local_files.size puts "Updating the URLs in the database..." from = "/uploads/#{db}/original/" to = "#{SiteSetting.Upload.s3_base_url}/#{prefix}" if dry_run puts "REPLACING '#{from}' WITH '#{to}'" else DbHelper.remap(from, to, anchor_left: true) end [ [ "src=\"/uploads/#{db}/original/(\\dX/(?:[a-f0-9]/)*[a-f0-9]{40}[a-z0-9\\.]*)", "src=\"#{SiteSetting.Upload.s3_base_url}/#{prefix}\\1" ], [ "src='/uploads/#{db}/original/(\\dX/(?:[a-f0-9]/)*[a-f0-9]{40}[a-z0-9\\.]*)", "src='#{SiteSetting.Upload.s3_base_url}/#{prefix}\\1" ], [ "href=\"/uploads/#{db}/original/(\\dX/(?:[a-f0-9]/)*[a-f0-9]{40}[a-z0-9\\.]*)", "href=\"#{SiteSetting.Upload.s3_base_url}/#{prefix}\\1" ], [ "href='/uploads/#{db}/original/(\\dX/(?:[a-f0-9]/)*[a-f0-9]{40}[a-z0-9\\.]*)", "href='#{SiteSetting.Upload.s3_base_url}/#{prefix}\\1" ], [ "\\[img\\]/uploads/#{db}/original/(\\dX/(?:[a-f0-9]/)*[a-f0-9]{40}[a-z0-9\\.]*)\\[/img\\]", "[img]#{SiteSetting.Upload.s3_base_url}/#{prefix}\\1[/img]" ] ].each do |from_url, to_url| if dry_run puts "REPLACING '#{from_url}' WITH '#{to_url}'" else DbHelper.regexp_replace(from_url, to_url) end end unless dry_run # Legacy inline image format Post.where("raw LIKE '%![](/uploads/default/original/%)%'").each do |post| regexp = /!\[\](\/uploads\/#{db}\/original\/(\dX\/(?:[a-f0-9]\/)*[a-f0-9]{40}[a-z0-9\.]*))/ post.raw.scan(regexp).each do |upload_url, _| upload = Upload.get_from_url(upload_url) post.raw = post.raw.gsub("![](#{upload_url})", "![](#{upload.short_url})") end post.save!(validate: false) end end if Discourse.asset_host.present? # Uploads that were on local CDN will now be on S3 CDN from = "#{Discourse.asset_host}/uploads/#{db}/original/" to = "#{SiteSetting.Upload.s3_cdn_url}/#{prefix}" if dry_run puts "REMAPPING '#{from}' TO '#{to}'" else DbHelper.remap(from, to) end end # Uploads that were on base hostname will now be on S3 CDN from = "#{Discourse.base_url}/uploads/#{db}/original/" to = "#{SiteSetting.Upload.s3_cdn_url}/#{prefix}" if dry_run puts "REMAPPING '#{from}' TO '#{to}'" else DbHelper.remap(from, to) end unless dry_run puts "Removing old optimized images..." OptimizedImage .joins("LEFT JOIN uploads u ON optimized_images.upload_id = u.id") .where("u.id IS NOT NULL AND u.url LIKE '//%' AND optimized_images.url NOT LIKE '//%'") .delete_all puts "Flagging all posts containing lightboxes for rebake..." count = Post.where("cooked LIKE '%class=\"lightbox\"%'").update_all(baked_version: nil) puts "#{count} posts were flagged for a rebake" end end migration_successful?(db, true) puts "Done!" end ################################################################################ # clean_up # ################################################################################ task "uploads:clean_up" => :environment do ENV["RAILS_DB"] ? clean_up_uploads : clean_up_uploads_all_sites end def clean_up_uploads_all_sites RailsMultisite::ConnectionManagement.each_connection { clean_up_uploads } end def clean_up_uploads db = RailsMultisite::ConnectionManagement.current_db puts "Cleaning up uploads and thumbnails for '#{db}'..." if Discourse.store.external? puts "This task only works for internal storages." exit 1 end puts <<~OUTPUT This task will remove upload records and files permanently. Would you like to take a full backup before the clean up? (Y/N) OUTPUT if STDIN.gets.chomp.downcase == 'y' puts "Starting backup..." backuper = BackupRestore::Backuper.new(Discourse.system_user.id) backuper.run exit 1 unless backuper.success end public_directory = Rails.root.join("public").to_s ## ## DATABASE vs FILE SYSTEM ## # uploads & avatars Upload.find_each do |upload| path = File.join(public_directory, upload.url) if !File.exists?(path) upload.destroy! putc "#" else putc "." end end # optimized images OptimizedImage.find_each do |optimized_image| path = File.join(public_directory, optimized_image.url) if !File.exists?(path) optimized_image.destroy! putc "#" else putc "." end end ## ## FILE SYSTEM vs DATABASE ## uploads_directory = File.join(public_directory, 'uploads', db).to_s # avatars (no avatar should be stored in that old directory) FileUtils.rm_rf("#{uploads_directory}/avatars") # uploads and optimized images Dir.glob("#{uploads_directory}/**/*.*").each do |file_path| sha1 = Upload.generate_digest(file_path) url = file_path.split(public_directory, 2)[1] if (Upload.where(sha1: sha1).empty? && Upload.where(url: url).empty?) && (OptimizedImage.where(sha1: sha1).empty? && OptimizedImage.where(url: url).empty?) FileUtils.rm(file_path) putc "#" else putc "." end end puts "Removing empty directories..." puts `find #{uploads_directory} -type d -empty -exec rmdir {} \\;` puts "Done!" end ################################################################################ # missing files # ################################################################################ # list all missing uploads and optimized images task "uploads:missing_files" => :environment do if ENV["RAILS_DB"] list_missing_uploads(skip_optimized: ENV['SKIP_OPTIMIZED']) else RailsMultisite::ConnectionManagement.each_connection do |db| if ENV["SKIP_EXTERNAL"] == "1" && Discourse.store.external? puts "#{RailsMultisite::ConnectionManagement.current_db} has uploads stored externally skipping!" else if Discourse.store.external? puts "-" * 80 puts "WARNING! WARNING! WARNING!" puts "-" * 80 puts puts <<~TEXT #{RailsMultisite::ConnectionManagement.current_db} has uploads on S3! validating without inventory is likely to take an enormous amount of time. We recommend you run SKIP_EXTERNAL=1 rake uploads:missing to skip validating if on a multisite. TEXT end list_missing_uploads(skip_optimized: ENV['SKIP_OPTIMIZED']) end end end end def list_missing_uploads(skip_optimized: false) Discourse.store.list_missing_uploads(skip_optimized: skip_optimized) end task "uploads:missing" => :environment do Rake::Task["uploads:missing_files"].invoke end ################################################################################ # regenerate_missing_optimized # ################################################################################ # regenerate missing optimized images task "uploads:regenerate_missing_optimized" => :environment do if ENV["RAILS_DB"] regenerate_missing_optimized else RailsMultisite::ConnectionManagement.each_connection { regenerate_missing_optimized } end end def regenerate_missing_optimized db = RailsMultisite::ConnectionManagement.current_db puts "Regenerating missing optimized images for '#{db}'..." if Discourse.store.external? puts "This task only works for internal storages." return end public_directory = "#{Rails.root}/public" missing_uploads = Set.new avatar_upload_ids = UserAvatar.all.pluck(:custom_upload_id, :gravatar_upload_id).flatten.compact default_scope = OptimizedImage.includes(:upload) [ default_scope .where("optimized_images.upload_id IN (?)", avatar_upload_ids), default_scope .where("optimized_images.upload_id NOT IN (?)", avatar_upload_ids) .where("LENGTH(COALESCE(url, '')) > 0") .where("width > 0 AND height > 0") ].each do |scope| scope.find_each do |optimized_image| upload = optimized_image.upload next unless optimized_image.url =~ /^\/[^\/]/ next unless upload.url =~ /^\/[^\/]/ thumbnail = "#{public_directory}#{optimized_image.url}" original = "#{public_directory}#{upload.url}" if !File.exists?(thumbnail) || File.size(thumbnail) <= 0 # make sure the original image exists locally if (!File.exists?(original) || File.size(original) <= 0) && upload.origin.present? # try to fix it by redownloading it begin downloaded = FileHelper.download( upload.origin, max_file_size: SiteSetting.max_image_size_kb.kilobytes, tmp_file_name: "discourse-missing", follow_redirect: true ) rescue nil if downloaded && downloaded.size > 0 FileUtils.mkdir_p(File.dirname(original)) File.open(original, "wb") { |f| f.write(downloaded.read) } end ensure downloaded.try(:close!) if downloaded.respond_to?(:close!) end end if File.exists?(original) && File.size(original) > 0 FileUtils.mkdir_p(File.dirname(thumbnail)) OptimizedImage.resize(original, thumbnail, optimized_image.width, optimized_image.height) putc "#" else missing_uploads << original putc "X" end else putc "." end end end puts "", "Done" if missing_uploads.size > 0 puts "Missing uploads:" missing_uploads.sort.each { |u| puts u } end end ################################################################################ # migrate_to_new_scheme # ################################################################################ task "uploads:start_migration" => :environment do SiteSetting.migrate_to_new_scheme = true puts "Migration started!" end task "uploads:stop_migration" => :environment do SiteSetting.migrate_to_new_scheme = false puts "Migration stoped!" end task "uploads:analyze", [:cache_path, :limit] => :environment do |_, args| now = Time.zone.now current_db = RailsMultisite::ConnectionManagement.current_db puts "Analyzing uploads for '#{current_db}'... This may take awhile...\n" cache_path = args[:cache_path] current_db = RailsMultisite::ConnectionManagement.current_db uploads_path = Rails.root.join('public', 'uploads', current_db) path = if cache_path cache_path else path = "/tmp/#{current_db}-#{now.to_i}-paths.txt" FileUtils.touch("/tmp/#{now.to_i}-paths.txt") `find #{uploads_path} -type f -printf '%s %h/%f\n' > #{path}` path end extensions = {} paths_count = 0 File.readlines(path).each do |line| size, file_path = line.split(" ", 2) paths_count += 1 extension = File.extname(file_path).chomp.downcase extensions[extension] ||= {} extensions[extension]["count"] ||= 0 extensions[extension]["count"] += 1 extensions[extension]["size"] ||= 0 extensions[extension]["size"] += size.to_i end uploads_count = Upload.count optimized_images_count = OptimizedImage.count puts <<~REPORT Report for '#{current_db}' -----------#{'-' * current_db.length} Number of `Upload` records in DB: #{uploads_count} Number of `OptimizedImage` records in DB: #{optimized_images_count} **Total DB records: #{uploads_count + optimized_images_count}** Number of images in uploads folder: #{paths_count} ------------------------------------#{'-' * paths_count.to_s.length} REPORT helper = Class.new do include ActionView::Helpers::NumberHelper end helper = helper.new printf "%-15s | %-15s | %-15s\n", 'extname', 'total size', 'count' puts "-" * 45 extensions.sort_by { |_, value| value['size'] }.reverse.each do |extname, value| printf "%-15s | %-15s | %-15s\n", extname, helper.number_to_human_size(value['size']), value['count'] end puts "\n" limit = args[:limit] || 10 sql = <<~SQL SELECT users.username, COUNT(uploads.user_id) AS num_of_uploads, SUM(uploads.filesize) AS total_size_of_uploads, COUNT(optimized_images.id) AS num_of_optimized_images FROM users INNER JOIN uploads ON users.id = uploads.user_id INNER JOIN optimized_images ON uploads.id = optimized_images.upload_id GROUP BY users.id ORDER BY total_size_of_uploads DESC LIMIT #{limit} SQL puts "Users using the most disk space" puts "-------------------------------\n" printf "%-25s | %-25s | %-25s | %-25s\n", 'username', 'total size of uploads', 'number of uploads', 'number of optimized images' puts "-" * 110 DB.query_single(sql).each do |username, num_of_uploads, total_size_of_uploads, num_of_optimized_images| printf "%-25s | %-25s | %-25s | %-25s\n", username, helper.number_to_human_size(total_size_of_uploads), num_of_uploads, num_of_optimized_images end puts "\n" puts "List of file paths @ #{path}" puts "Duration: #{Time.zone.now - now} seconds" end task "uploads:fix_incorrect_extensions" => :environment do UploadFixer.fix_all_extensions end task "uploads:recover_from_tombstone" => :environment do Rake::Task["uploads:recover"].invoke end task "uploads:recover" => :environment do dry_run = ENV["DRY_RUN"].present? stop_on_error = ENV["STOP_ON_ERROR"].present? if ENV["RAILS_DB"] UploadRecovery.new(dry_run: dry_run, stop_on_error: stop_on_error).recover else RailsMultisite::ConnectionManagement.each_connection do |db| UploadRecovery.new(dry_run: dry_run, stop_on_error: stop_on_error).recover end end end ## # Run this task whenever the secure_media or login_required # settings are changed for a Discourse instance to update # the upload secure flag and S3 upload ACLs. task "uploads:ensure_correct_acl" => :environment do RailsMultisite::ConnectionManagement.each_connection do |db| unless Discourse.store.external? puts "This task only works for external storage." exit 1 end puts "Ensuring correct ACL for uploads in #{db}...", "" Upload.transaction do mark_secure_in_loop_because_no_login_required = false # First of all only get relevant uploads (supported media). # # Also only get uploads that are not for a theme or a site setting, so only # get post related uploads. uploads_with_supported_media = Upload.includes(:posts, :optimized_images).where( "LOWER(original_filename) SIMILAR TO '%\.(jpg|jpeg|png|gif|svg|ico|mp3|ogg|wav|m4a|mov|mp4|webm|ogv)'" ).joins(:post_uploads) puts "There are #{uploads_with_supported_media.count} upload(s) with supported media that could be marked secure.", "" # Simply mark all these uploads as secure if login_required because no anons will be able to access them if SiteSetting.login_required? mark_all_as_secure_login_required(uploads_with_supported_media) else # If NOT login_required, then we have to go for the other slower flow, where in the loop # we mark the upload as secure if the first post it is used in is with_secure_media? mark_secure_in_loop_because_no_login_required = true puts "Marking posts as secure in the next step because login_required is false." end puts "", "Rebaking #{uploads_with_supported_media.count} upload posts and updating ACLs in S3.", "" upload_ids_to_mark_as_secure, uploads_skipped_because_of_error = update_acls_and_rebake_upload_posts( uploads_with_supported_media, mark_secure_in_loop_because_no_login_required ) log_rebake_errors(uploads_skipped_because_of_error) mark_specific_uploads_as_secure_no_login_required(upload_ids_to_mark_as_secure) end end puts "", "Done" end def mark_all_as_secure_login_required(uploads_with_supported_media) puts "Marking #{uploads_with_supported_media.count} upload(s) as secure because login_required is true.", "" uploads_with_supported_media.update_all(secure: true) puts "Finished marking upload(s) as secure." end def log_rebake_errors(uploads_skipped_because_of_error) return if uploads_skipped_because_of_error.empty? puts "Skipped the following uploads due to error:", "" uploads_skipped_because_of_error.each do |message| puts message end end def mark_specific_uploads_as_secure_no_login_required(upload_ids_to_mark_as_secure) return if upload_ids_to_mark_as_secure.empty? puts "Marking #{upload_ids_to_mark_as_secure.length} uploads as secure because their first post contains secure media." Upload.where(id: upload_ids_to_mark_as_secure).update_all(secure: true) puts "Finished marking uploads as secure." end def update_acls_and_rebake_upload_posts(uploads_with_supported_media, mark_secure_in_loop_because_no_login_required) upload_ids_to_mark_as_secure = [] uploads_skipped_because_of_error = [] i = 0 uploads_with_supported_media.find_each(batch_size: 50) do |upload_with_supported_media| RakeHelpers.print_status_with_label("Updating ACL for upload.......", i, uploads_with_supported_media.count) Discourse.store.update_upload_ACL(upload_with_supported_media) RakeHelpers.print_status_with_label("Rebaking posts for upload.....", i, uploads_with_supported_media.count) begin upload_with_supported_media.posts.each { |post| post.rebake! } if mark_secure_in_loop_because_no_login_required first_post_with_upload = upload_with_supported_media.posts.order(sort_order: :asc).first mark_secure = first_post_with_upload ? first_post_with_upload.with_secure_media? : false upload_ids_to_mark_as_secure << upload_with_supported_media.id if mark_secure end rescue => e uploads_skipped_because_of_error << "#{upload_with_supported_media.original_filename} (#{upload_with_supported_media.url}) #{e.message}" end i += 1 end RakeHelpers.print_status_with_label("Rebaking complete! ", i, uploads_with_supported_media.count) puts "" [upload_ids_to_mark_as_secure, uploads_skipped_because_of_error] end def inline_uploads(post) replaced = false original_raw = post.raw post.raw = post.raw.gsub(/(\((\/uploads\S+).*\))/) do upload = Upload.find_by(url: $2) if !upload data = Upload.extract_url($2) if data && sha1 = data[2] upload = Upload.find_by(sha1: sha1) if !upload sha_map = JSON.parse(post.custom_fields["UPLOAD_SHA1_MAP"] || "{}") if mapped_sha = sha_map[sha1] upload = Upload.find_by(sha1: mapped_sha) end end end end result = $1 if upload&.id result.sub!($2, upload.short_url) replaced = true else puts "Upload not found #{$2} in Post #{post.id} - #{post.url}" end result end if replaced puts "Corrected image urls in #{post.full_url} raw backup stored in custom field" post.custom_fields["BACKUP_POST_RAW"] = original_raw post.save_custom_fields post.save!(validate: false) post.rebake! end end def inline_img_tags(post) replaced = false original_raw = post.raw post.raw = post.raw.gsub(/()/i) do next if $2.include?("..") upload = Upload.find_by(url: $2) if !upload data = Upload.extract_url($2) if data && sha1 = data[2] upload = Upload.find_by(sha1: sha1) end end if !upload local_file = File.join(Rails.root, "public", $2) if File.exist?(local_file) File.open(local_file) do |f| upload = UploadCreator.new(f, "image").create_for(post.user_id) end end end if upload replaced = true "![image](#{upload.short_url})" else puts "skipping missing upload in #{post.full_url} #{$1}" $1 end end if replaced puts "Corrected image urls in #{post.full_url} raw backup stored in custom field" post.custom_fields["BACKUP_POST_RAW"] = original_raw post.save_custom_fields post.save!(validate: false) post.rebake! end end def fix_relative_links Post.where('raw like ?', '%](/uploads%').find_each do |post| inline_uploads(post) end Post.where("raw ilike ?", '%%').find_each do |post| inline_img_tags(post) end end task "uploads:fix_relative_upload_links" => :environment do if RailsMultisite::ConnectionManagement.current_db != "default" fix_relative_links else RailsMultisite::ConnectionManagement.each_connection do fix_relative_links end end end