From 0388653a4d7d422a1525649a155182446da34405 Mon Sep 17 00:00:00 2001 From: Martin Brennan Date: Tue, 3 Mar 2020 10:03:58 +1100 Subject: [PATCH] DEV: Upload and secure media retroactive rake task improvements (#9027) * Add uploads:sync_s3_acls rake task to ensure the ACLs in S3 are the correct (public-read or private) setting based on upload security * Improved uploads:disable_secure_media to be more efficient and provide better messages to the user. * Rename uploads:ensure_correct_acl task to uploads:secure_upload_analyse_and_update as it does more than check the ACL * Many improvements to uploads:secure_upload_analyse_and_update * Make sure that upload.access_control_post is unscoped so deleted posts are still fetched, because they still affect the security of the upload. * Add escape hatch for capture_stdout in the form of RAILS_ENABLE_TEST_STDOUT. If provided the capture_stdout code will be ignored, so you can see the output if you need. --- app/models/upload.rb | 6 + lib/tasks/uploads.rake | 195 +++++++++++++----- lib/upload_security.rb | 3 - .../update_private_uploads_acl_spec.rb | 20 +- spec/lib/upload_security_spec.rb | 11 +- spec/support/helpers.rb | 4 + spec/tasks/uploads_spec.rb | 94 ++++++++- 7 files changed, 271 insertions(+), 62 deletions(-) diff --git a/app/models/upload.rb b/app/models/upload.rb index d306ef95384..fe6f5c3e0ee 100644 --- a/app/models/upload.rb +++ b/app/models/upload.rb @@ -14,6 +14,12 @@ class Upload < ActiveRecord::Base belongs_to :user belongs_to :access_control_post, class_name: 'Post' + # when we access this post we don't care if the post + # is deleted + def access_control_post + Post.unscoped { super } + end + has_many :post_uploads, dependent: :destroy has_many :posts, through: :post_uploads diff --git a/lib/tasks/uploads.rake b/lib/tasks/uploads.rake index 7480cb73c43..33c69b7d6c8 100644 --- a/lib/tasks/uploads.rake +++ b/lib/tasks/uploads.rake @@ -615,6 +615,21 @@ task "uploads:recover" => :environment do end end +task "uploads:sync_s3_acls" => :environment do + RailsMultisite::ConnectionManagement.each_connection do |db| + unless Discourse.store.external? + puts "This task only works for external storage." + exit 1 + end + + puts "CAUTION: This task may take a long time to complete!" + puts "-" * 30 + puts "Uploads marked as secure will get a private ACL, and uploads marked as not secure will get a public ACL." + adjust_acls(Upload.find_each(batch_size: 100)) + puts "", "Upload ACL sync complete!" + end +end + task "uploads:disable_secure_media" => :environment do RailsMultisite::ConnectionManagement.each_connection do |db| unless Discourse.store.external? @@ -628,43 +643,56 @@ task "uploads:disable_secure_media" => :environment do secure_uploads = Upload.includes(:posts).where(secure: true) secure_upload_count = secure_uploads.count + uploads_to_adjust_acl_for = [] + posts_to_rebake = {} i = 0 secure_uploads.find_each(batch_size: 20).each do |upload| - Upload.transaction do - upload.secure = false + uploads_to_adjust_acl_for << upload - RakeHelpers.print_status_with_label("Updating ACL for upload #{upload.id}.......", i, secure_upload_count) - Discourse.store.update_upload_ACL(upload) - - RakeHelpers.print_status_with_label("Rebaking posts for upload #{upload.id}.......", i, secure_upload_count) - upload.posts.each(&:rebake!) - upload.save - - i += 1 + upload.posts.each do |post| + # don't want unnecessary double-ups + next if posts_to_rebake.key?(post.id) + posts_to_rebake[post.id] = post end + + i += 1 end + puts "", "Marking #{secure_upload_count} uploads as not secure.", "" + secure_uploads.update_all(secure: false) + + adjust_acls(uploads_to_adjust_acl_for) + post_rebake_errors = rebake_upload_posts(posts_to_rebake) + log_rebake_errors(post_rebake_errors) + RakeHelpers.print_status_with_label("Rebaking and updating complete! ", i, secure_upload_count) - puts "" end - puts "Secure media is now disabled!", "" + puts "", "Secure media is now disabled!", "" +end + +# Renamed to uploads:secure_upload_analyse_and_update +task "uploads:ensure_correct_acl" => :environment do + puts "This task has been deprecated, run uploads:secure_upload_analyse_and_update task instead." + exit 1 end ## # Run this task whenever the secure_media or login_required # settings are changed for a Discourse instance to update -# the upload secure flag and S3 upload ACLs. -task "uploads:ensure_correct_acl" => :environment do +# the upload secure flag and S3 upload ACLs. Any uploads that +# have their secure status changed will have all associated posts +# rebaked. +task "uploads:secure_upload_analyse_and_update" => :environment do RailsMultisite::ConnectionManagement.each_connection do |db| unless Discourse.store.external? puts "This task only works for external storage." exit 1 end - puts "Ensuring correct ACL for uploads in #{db}...", "" - + puts "Analyzing security for uploads in #{db}...", "" + upload_ids_to_mark_as_secure, upload_ids_to_mark_as_not_secure, posts_to_rebake, uploads_to_adjust_acl_for = nil Upload.transaction do mark_secure_in_loop_because_no_login_required = false @@ -677,19 +705,23 @@ task "uploads:ensure_correct_acl" => :environment do update_uploads_access_control_post end - # First of all only get relevant uploads (supported media). - # - # Also only get uploads that are not for a theme or a site setting, so only - # get post related uploads. - uploads_with_supported_media = Upload.includes(:posts, :access_control_post, :optimized_images).where( - "LOWER(original_filename) SIMILAR TO '%\.(jpg|jpeg|png|gif|svg|ico|mp3|ogg|wav|m4a|mov|mp4|webm|ogv)'" - ).joins(:post_uploads) + # Get all uploads in the database, including optimized images. Both media (images, videos, + # etc) along with attachments (pdfs, txt, etc.) must be loaded because all can be marked as + # secure based on site settings. + uploads_to_update = Upload.includes(:posts, :optimized_images).joins(:post_uploads) - puts "There are #{uploads_with_supported_media.count} upload(s) with supported media that could be marked secure.", "" + # we do this to avoid a heavier post query, and to make sure we only + # get unique posts AND include deleted posts (unscoped) + unique_access_control_posts = Post.unscoped.select(:id, :topic_id).includes(topic: :category).where(id: uploads_to_update.pluck(:access_control_post_id).uniq) + uploads_to_update.each do |upload| + upload.access_control_post = unique_access_control_posts.find { |post| post.id == upload.access_control_post_id } + end + + puts "There are #{uploads_to_update.count} upload(s) that could be marked secure.", "" # Simply mark all these uploads as secure if login_required because no anons will be able to access them if SiteSetting.login_required? - mark_all_as_secure_login_required(uploads_with_supported_media) + mark_secure_in_loop_because_no_login_required = false else # If NOT login_required, then we have to go for the other slower flow, where in the loop @@ -698,24 +730,50 @@ task "uploads:ensure_correct_acl" => :environment do puts "Marking posts as secure in the next step because login_required is false." end - puts "", "Determining which of #{uploads_with_supported_media.count} upload posts need to be marked secure and be rebaked.", "" + puts "", "Analysing which of #{uploads_to_update.count} uploads need to be marked secure and be rebaked.", "" - upload_ids_to_mark_as_secure, posts_to_rebake = determine_upload_security_and_posts_to_rebake( - uploads_with_supported_media, mark_secure_in_loop_because_no_login_required + upload_ids_to_mark_as_secure, + upload_ids_to_mark_as_not_secure, + posts_to_rebake, + uploads_to_adjust_acl_for = determine_upload_security_and_posts_to_rebake( + uploads_to_update, mark_secure_in_loop_because_no_login_required ) - mark_specific_uploads_as_secure_no_login_required(upload_ids_to_mark_as_secure) - - post_rebake_errors = rebake_upload_posts(posts_to_rebake) - log_rebake_errors(post_rebake_errors) + if !SiteSetting.login_required? + update_specific_upload_security_no_login_required(upload_ids_to_mark_as_secure, upload_ids_to_mark_as_not_secure) + else + mark_all_as_secure_login_required(uploads_to_update) + end end + + # Enqueue rebakes AFTER upload transaction complete, so there is no race condition + # between updating the DB and the rebakes occurring. + post_rebake_errors = rebake_upload_posts(posts_to_rebake) + log_rebake_errors(post_rebake_errors) + + # Also do this AFTER upload transaction complete so we don't end up with any + # errors leaving ACLs in a bad state (the ACL sync task can be run to fix any + # outliers at any time). + adjust_acls(uploads_to_adjust_acl_for) end - puts "", "Done" + puts "", "", "Done!" end -def mark_all_as_secure_login_required(uploads_with_supported_media) - puts "Marking #{uploads_with_supported_media.count} upload(s) as secure because login_required is true.", "" - uploads_with_supported_media.update_all(secure: true) +def adjust_acls(uploads_to_adjust_acl_for) + total_count = uploads_to_adjust_acl_for.respond_to?(:length) ? uploads_to_adjust_acl_for.length : uploads_to_adjust_acl_for.count + puts "", "Updating ACL for #{total_count} uploads.", "" + i = 0 + uploads_to_adjust_acl_for.each do |upload| + RakeHelpers.print_status_with_label("Updating ACL for upload.......", i, total_count) + Discourse.store.update_upload_ACL(upload) + i += 1 + end + RakeHelpers.print_status_with_label("Updaing ACLs complete! ", i, total_count) +end + +def mark_all_as_secure_login_required(uploads_to_update) + puts "Marking #{uploads_to_update.count} upload(s) as secure because login_required is true.", "" + uploads_to_update.update_all(secure: true) puts "Finished marking upload(s) as secure." end @@ -727,11 +785,16 @@ def log_rebake_errors(rebake_errors) end end -def mark_specific_uploads_as_secure_no_login_required(upload_ids_to_mark_as_secure) - return if upload_ids_to_mark_as_secure.empty? - puts "Marking #{upload_ids_to_mark_as_secure.length} uploads as secure because UploadSecurity determined them to be secure." - Upload.where(id: upload_ids_to_mark_as_secure).update_all(secure: true) - puts "Finished marking uploads as secure." +def update_specific_upload_security_no_login_required(upload_ids_to_mark_as_secure, upload_ids_to_mark_as_not_secure) + if upload_ids_to_mark_as_secure.any? + puts "Marking #{upload_ids_to_mark_as_secure.length} uploads as secure because UploadSecurity determined them to be secure." + Upload.where(id: upload_ids_to_mark_as_secure).update_all(secure: true) + end + if upload_ids_to_mark_as_not_secure.any? + puts "Marking #{upload_ids_to_mark_as_not_secure.length} uploads as not secure because UploadSecurity determined them to be not secure." + Upload.where(id: upload_ids_to_mark_as_not_secure).update_all(secure: false) + end + puts "Finished updating upload security." end def update_uploads_access_control_post @@ -752,12 +815,13 @@ def update_uploads_access_control_post end def rebake_upload_posts(posts_to_rebake) + posts_to_rebake = posts_to_rebake.values post_rebake_errors = [] puts "", "Rebaking #{posts_to_rebake.length} posts with affected uploads.", "" begin i = 0 posts_to_rebake.each do |post| - RakeHelpers.print_status_with_label("Determining which uploads to mark secure and rebake.....", i, posts_to_rebake.length) + RakeHelpers.print_status_with_label("Rebaking posts.....", i, posts_to_rebake.length) post.rebake! i += 1 end @@ -770,32 +834,55 @@ def rebake_upload_posts(posts_to_rebake) post_rebake_errors end -def determine_upload_security_and_posts_to_rebake(uploads_with_supported_media, mark_secure_in_loop_because_no_login_required) +def determine_upload_security_and_posts_to_rebake(uploads_to_update, mark_secure_in_loop_because_no_login_required) upload_ids_to_mark_as_secure = [] - posts_to_rebake = [] + upload_ids_to_mark_as_not_secure = [] + uploads_to_adjust_acl_for = [] + posts_to_rebake = {} i = 0 - uploads_with_supported_media.find_each(batch_size: 50) do |upload_with_supported_media| - RakeHelpers.print_status_with_label("Updating ACL for upload.......", i, uploads_with_supported_media.count) + uploads_to_update.find_each(batch_size: 50) do |upload_to_update| # we just need to determine the post security here so the ACL is set to the correct thing, # because the update_upload_ACL method uses upload.secure? - upload_with_supported_media.secure = UploadSecurity.new(upload_with_supported_media).should_be_secure? - Discourse.store.update_upload_ACL(upload_with_supported_media) + original_update_secure_status = upload_to_update.secure + upload_to_update.secure = UploadSecurity.new(upload_to_update).should_be_secure? - RakeHelpers.print_status_with_label("Determining which uploads to mark secure and rebake.....", i, uploads_with_supported_media.count) - upload_with_supported_media.posts.each { |post| posts_to_rebake << post } + # no point changing ACLs or rebaking or doing any such shenanigans + # when the secure status hasn't even changed! + if original_update_secure_status == upload_to_update.secure + i += 1 + next + end - if mark_secure_in_loop_because_no_login_required && upload_with_supported_media.secure? - upload_ids_to_mark_as_secure << upload_with_supported_media.id + # we only want to update the acl later once the secure status + # has been saved in the DB; otherwise if there is a later failure + # we get stuck with an incorrect ACL in S3 + uploads_to_adjust_acl_for << upload_to_update + RakeHelpers.print_status_with_label("Analysing which upload posts to rebake.....", i, uploads_to_update.count) + upload_to_update.posts.each do |post| + # don't want unnecessary double-ups + next if posts_to_rebake.key?(post.id) + posts_to_rebake[post.id] = post + end + + # some uploads will be marked as not secure here. + # we need to address this with upload_ids_to_mark_as_not_secure + # e.g. turning off SiteSetting.login_required + if mark_secure_in_loop_because_no_login_required + if upload_to_update.secure? + upload_ids_to_mark_as_secure << upload_to_update.id + else + upload_ids_to_mark_as_not_secure << upload_to_update.id + end end i += 1 end - RakeHelpers.print_status_with_label("Determination complete! ", i, uploads_with_supported_media.count) + RakeHelpers.print_status_with_label("Analysis complete! ", i, uploads_to_update.count) puts "" - [upload_ids_to_mark_as_secure, posts_to_rebake] + [upload_ids_to_mark_as_secure, upload_ids_to_mark_as_not_secure, posts_to_rebake, uploads_to_adjust_acl_for] end def inline_uploads(post) diff --git a/lib/upload_security.rb b/lib/upload_security.rb index d2e11d9eef5..2c9d2f23868 100644 --- a/lib/upload_security.rb +++ b/lib/upload_security.rb @@ -57,9 +57,6 @@ class UploadSecurity # if there is no access control post id and the upload is currently secure, we # do not want to make it un-secure to avoid unintentionally exposing it def access_control_post_has_secure_media? - # if the post is deleted the access_control_post will be blank... - # TODO: deal with this in a better way - return false if @upload.access_control_post.blank? @upload.access_control_post.with_secure_media? end diff --git a/spec/jobs/regular/update_private_uploads_acl_spec.rb b/spec/jobs/regular/update_private_uploads_acl_spec.rb index 25d6fe07d0c..9c51ef734fd 100644 --- a/spec/jobs/regular/update_private_uploads_acl_spec.rb +++ b/spec/jobs/regular/update_private_uploads_acl_spec.rb @@ -24,9 +24,9 @@ describe Jobs::UpdatePrivateUploadsAcl do before do SiteSetting.login_required = true SiteSetting.prevent_anons_from_downloading_files = true - SiteSetting::Upload.stubs(:enable_s3_uploads).returns(true) Discourse.stubs(:store).returns(stub(external?: false)) - SiteSetting.stubs(:secure_media?).returns(true) + enable_s3_uploads([upload]) + SiteSetting.secure_media = true end it "changes the upload to secure" do @@ -35,4 +35,20 @@ describe Jobs::UpdatePrivateUploadsAcl do end end end + + def enable_s3_uploads(uploads) + SiteSetting.enable_s3_uploads = true + SiteSetting.s3_upload_bucket = "s3-upload-bucket" + SiteSetting.s3_access_key_id = "some key" + SiteSetting.s3_secret_access_key = "some secrets3_region key" + + stub_request(:head, "https://#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/") + + uploads.each do |upload| + stub_request( + :put, + "https://#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/original/1X/#{upload.sha1}.#{upload.extension}?acl" + ) + end + end end diff --git a/spec/lib/upload_security_spec.rb b/spec/lib/upload_security_spec.rb index 3dc812bbdc2..eabf0616784 100644 --- a/spec/lib/upload_security_spec.rb +++ b/spec/lib/upload_security_spec.rb @@ -97,11 +97,20 @@ RSpec.describe UploadSecurity do context "when the access control post has_secure_media?" do before do - upload.update(access_control_post: post_in_secure_context) + upload.update(access_control_post_id: post_in_secure_context.id) end it "returns true" do expect(subject.should_be_secure?).to eq(true) end + + context "when the post is deleted" do + before do + post_in_secure_context.trash! + end + it "still determines whether the post has secure media; returns true" do + expect(subject.should_be_secure?).to eq(true) + end + end end context "when uploading in the composer" do diff --git a/spec/support/helpers.rb b/spec/support/helpers.rb index e2e278bb91e..ad5fe6873e4 100644 --- a/spec/support/helpers.rb +++ b/spec/support/helpers.rb @@ -130,6 +130,10 @@ module Helpers def capture_stdout old_stdout = $stdout + if ENV['RAILS_ENABLE_TEST_STDOUT'] + yield + return + end io = StringIO.new $stdout = io yield diff --git a/spec/tasks/uploads_spec.rb b/spec/tasks/uploads_spec.rb index 944c78b5a08..21c769a0925 100644 --- a/spec/tasks/uploads_spec.rb +++ b/spec/tasks/uploads_spec.rb @@ -6,9 +6,10 @@ RSpec.describe "tasks/uploads" do before do Rake::Task.clear Discourse::Application.load_tasks + SiteSetting.authorized_extensions += "|pdf" end - describe "uploads:ensure_correct_acl" do + describe "uploads:secure_upload_analyse_and_update" do let!(:uploads) do [ multi_post_upload1, @@ -19,6 +20,7 @@ RSpec.describe "tasks/uploads" do let(:multi_post_upload1) { Fabricate(:upload_s3) } let(:upload1) { Fabricate(:upload_s3) } let(:upload2) { Fabricate(:upload_s3) } + let(:upload3) { Fabricate(:upload_s3, original_filename: 'test.pdf') } let!(:post1) { Fabricate(:post) } let!(:post2) { Fabricate(:post) } @@ -29,11 +31,12 @@ RSpec.describe "tasks/uploads" do PostUpload.create(post: post2, upload: multi_post_upload1) PostUpload.create(post: post2, upload: upload1) PostUpload.create(post: post3, upload: upload2) + PostUpload.create(post: post3, upload: upload3) end def invoke_task capture_stdout do - Rake::Task['uploads:ensure_correct_acl'].invoke + Rake::Task['uploads:secure_upload_analyse_and_update'].invoke end end @@ -59,6 +62,7 @@ RSpec.describe "tasks/uploads" do expect(multi_post_upload1.reload.access_control_post).to eq(post1) expect(upload1.reload.access_control_post).to eq(post2) expect(upload2.reload.access_control_post).to eq(post3) + expect(upload3.reload.access_control_post).to eq(post3) end it "sets the upload in the read restricted topic category to secure" do @@ -66,6 +70,7 @@ RSpec.describe "tasks/uploads" do invoke_task expect(upload2.reload.secure).to eq(true) expect(upload1.reload.secure).to eq(false) + expect(upload3.reload.secure).to eq(false) end it "sets the upload in the PM topic to secure" do @@ -86,10 +91,95 @@ RSpec.describe "tasks/uploads" do expect(post2.reload.baked_at).not_to eq(post2_baked) expect(post3.reload.baked_at).not_to eq(post3_baked) end + + context "for an upload that is already secure and does not need to change" do + before do + post3.topic.update(archetype: 'private_message', category: nil) + upload2.update(access_control_post: post3) + upload2.update_secure_status + end + + it "does not rebake the associated post" do + post3_baked = post3.baked_at.to_s + invoke_task + expect(post3.reload.baked_at.to_s).to eq(post3_baked) + end + + it "does not attempt to update the acl" do + Discourse.store.expects(:update_upload_ACL).with(upload2).never + invoke_task + end + end + + context "for an upload that is already secure and is changing to not secure" do + it "changes the upload to not secure and updates the ACL" do + upload_to_mark_not_secure = Fabricate(:upload_s3, secure: true) + post_for_upload = Fabricate(:post) + PostUpload.create(post: post_for_upload, upload: upload_to_mark_not_secure) + enable_s3_uploads(uploads.concat([upload_to_mark_not_secure])) + invoke_task + expect(upload_to_mark_not_secure.reload.secure).to eq(false) + end + end end end end + describe "uploads:disable_secure_media" do + def invoke_task + capture_stdout do + Rake::Task['uploads:disable_secure_media'].invoke + end + end + + before do + enable_s3_uploads(uploads) + SiteSetting.secure_media = true + PostUpload.create(post: post1, upload: upload1) + PostUpload.create(post: post1, upload: upload2) + PostUpload.create(post: post2, upload: upload3) + PostUpload.create(post: post2, upload: upload4) + end + + let!(:uploads) do + [ + upload1, upload2, upload3, upload4, upload5 + ] + end + let(:post1) { Fabricate(:post) } + let(:post2) { Fabricate(:post) } + let(:upload1) { Fabricate(:upload_s3, secure: true, access_control_post: post1) } + let(:upload2) { Fabricate(:upload_s3, secure: true, access_control_post: post1) } + let(:upload3) { Fabricate(:upload_s3, secure: true, access_control_post: post2) } + let(:upload4) { Fabricate(:upload_s3, secure: true, access_control_post: post2) } + let(:upload5) { Fabricate(:upload_s3, secure: false) } + + it "disables the secure media setting" do + invoke_task + expect(SiteSetting.secure_media).to eq(false) + end + + it "updates all secure uploads to secure: false" do + invoke_task + [upload1, upload2, upload3, upload4].each do |upl| + expect(upl.reload.secure).to eq(false) + end + end + + it "rebakes the associated posts" do + baked_post1 = post1.baked_at + baked_post2 = post2.baked_at + invoke_task + expect(post1.reload.baked_at).not_to eq(baked_post1) + expect(post2.reload.baked_at).not_to eq(baked_post2) + end + + it "updates the affected ACLs" do + FileStore::S3Store.any_instance.expects(:update_upload_ACL).times(4) + invoke_task + end + end + def enable_s3_uploads(uploads) SiteSetting.enable_s3_uploads = true SiteSetting.s3_upload_bucket = "s3-upload-bucket"