DEV: Secure upload rake task improvements (#29484)

This commit changes the uploads:secure_upload_analyse_and_update
and uploads:disable_secure_uploads to no longer rebake affected
posts inline. This just took way too long, and if the task stalled
you couldn't be sure if the rest of it completed.

Instead, we can update the baked_version of affected posts and
utilize our PeriodicalUpdates job to gradually rebake them. I added
warnings about increasing the site setting rebake_old_posts_count and
the global setting max_old_rebakes_per_15_minutes before doing this
as well.

For good measure, the affected post IDs are written to a JSON file too.
This commit is contained in:
Martin Brennan 2024-10-31 13:33:11 +10:00 committed by GitHub
parent d5b328b193
commit 1a3b9a7352
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 153 additions and 108 deletions

View File

@ -545,6 +545,10 @@ task "uploads:sync_s3_acls" => :environment do
end end
end end
def secure_upload_rebake_warning
puts "This task may mark a lot of posts for rebaking. To get through these quicker, the max_old_rebakes_per_15_minutes global setting (current value #{GlobalSetting.max_old_rebakes_per_15_minutes}) should be changed and the rebake_old_posts_count site setting (current value #{SiteSetting.rebake_old_posts_count}) increased as well. Do you want to proceed? (y/n)"
end
# NOTE: This needs to be updated to use the _first_ UploadReference # NOTE: This needs to be updated to use the _first_ UploadReference
# record for each upload to determine security, and do not mark things # record for each upload to determine security, and do not mark things
# as secure if the first record is something public e.g. a site setting. # as secure if the first record is something public e.g. a site setting.
@ -558,6 +562,9 @@ task "uploads:disable_secure_uploads" => :environment do
exit 1 exit 1
end end
secure_upload_rebake_warning
exit 1 if STDIN.gets.chomp.downcase != "y"
puts "Disabling secure upload and resetting uploads to not secure in #{db}...", "" puts "Disabling secure upload and resetting uploads to not secure in #{db}...", ""
SiteSetting.secure_uploads = false SiteSetting.secure_uploads = false
@ -583,8 +590,7 @@ task "uploads:disable_secure_uploads" => :environment do
secure_upload_ids, secure_upload_ids,
) )
adjust_acls(secure_upload_ids) adjust_acls(secure_upload_ids)
post_rebake_errors = rebake_upload_posts(post_ids_to_rebake) mark_upload_posts_for_rebake(post_ids_to_rebake)
log_rebake_errors(post_rebake_errors)
puts "", "Rebaking and uploading complete!", "" puts "", "Rebaking and uploading complete!", ""
end end
@ -612,6 +618,9 @@ task "uploads:secure_upload_analyse_and_update" => :environment do
exit 1 exit 1
end end
secure_upload_rebake_warning
exit 1 if STDIN.gets.chomp.downcase != "y"
puts "Analyzing security for uploads in #{db}...", "" puts "Analyzing security for uploads in #{db}...", ""
all_upload_ids_changed, post_ids_to_rebake = nil all_upload_ids_changed, post_ids_to_rebake = nil
Upload.transaction do Upload.transaction do
@ -627,11 +636,17 @@ task "uploads:secure_upload_analyse_and_update" => :environment do
# Simply mark all uploads linked to posts secure if login_required because # Simply mark all uploads linked to posts secure if login_required because
# no anons will be able to access them; however if secure_uploads_pm_only is # no anons will be able to access them; however if secure_uploads_pm_only is
# true then login_required will not mark other uploads secure. # true then login_required will not mark other uploads secure.
puts "",
"Site is login_required, and secure_uploads_pm_only is false. Continuing with strategy to mark all post uploads as secure.",
""
post_ids_to_rebake, all_upload_ids_changed = mark_all_as_secure_login_required post_ids_to_rebake, all_upload_ids_changed = mark_all_as_secure_login_required
else else
# Otherwise only mark uploads linked to posts either: # Otherwise only mark uploads linked to posts either:
# * In secure categories or PMs if !SiteSetting.secure_uploads_pm_only # * In secure categories or PMs if !SiteSetting.secure_uploads_pm_only
# * In PMs if SiteSetting.secure_uploads_pm_only # * In PMs if SiteSetting.secure_uploads_pm_only
puts "",
"Site is not login_required. Continuing with normal strategy to mark uploads in secure contexts as secure.",
""
post_ids_to_rebake, all_upload_ids_changed = post_ids_to_rebake, all_upload_ids_changed =
update_specific_upload_security_no_login_required update_specific_upload_security_no_login_required
end end
@ -639,8 +654,14 @@ task "uploads:secure_upload_analyse_and_update" => :environment do
# Enqueue rebakes AFTER upload transaction complete, so there is no race condition # Enqueue rebakes AFTER upload transaction complete, so there is no race condition
# between updating the DB and the rebakes occurring. # between updating the DB and the rebakes occurring.
post_rebake_errors = rebake_upload_posts(post_ids_to_rebake) #
log_rebake_errors(post_rebake_errors) # This is done asynchronously by changing the baked_version to NULL on
# affected posts and relying on Post.rebake_old in the PeriodicalUpdates
# job. To speed this up, these levers can be adjusted:
#
# * SiteSetting.rebake_old_posts_count
# * GlobalSetting.max_old_rebakes_per_15_minutes
mark_upload_posts_for_rebake(post_ids_to_rebake)
# Also do this AFTER upload transaction complete so we don't end up with any # Also do this AFTER upload transaction complete so we don't end up with any
# errors leaving ACLs in a bad state (the ACL sync task can be run to fix any # errors leaving ACLs in a bad state (the ACL sync task can be run to fix any
@ -677,7 +698,7 @@ def mark_all_as_secure_login_required
security_last_changed_reason = 'upload security rake task mark as secure', security_last_changed_reason = 'upload security rake task mark as secure',
security_last_changed_at = NOW() security_last_changed_at = NOW()
FROM upl FROM upl
WHERE uploads.id = upl.upload_id AND NOT uploads.secure WHERE uploads.id = upl.upload_id
RETURNING uploads.id RETURNING uploads.id
SQL SQL
puts "Marked #{post_upload_ids_marked_secure.count} upload(s) as secure because login_required is true.", puts "Marked #{post_upload_ids_marked_secure.count} upload(s) as secure because login_required is true.",
@ -687,7 +708,7 @@ def mark_all_as_secure_login_required
SET secure = false, SET secure = false,
security_last_changed_reason = 'upload security rake task mark as not secure', security_last_changed_reason = 'upload security rake task mark as not secure',
security_last_changed_at = NOW() security_last_changed_at = NOW()
WHERE id NOT IN (?) AND uploads.secure WHERE id NOT IN (?)
RETURNING uploads.id RETURNING uploads.id
SQL SQL
puts "Marked #{upload_ids_marked_not_secure.count} upload(s) as not secure because they are not linked to posts.", puts "Marked #{upload_ids_marked_not_secure.count} upload(s) as not secure because they are not linked to posts.",
@ -797,24 +818,20 @@ def update_uploads_access_control_post
SQL SQL
end end
def rebake_upload_posts(post_ids_to_rebake) def mark_upload_posts_for_rebake(post_ids_to_rebake)
posts_to_rebake = Post.where(id: post_ids_to_rebake) posts_to_rebake = Post.where(id: post_ids_to_rebake)
post_rebake_errors = [] post_rebake_errors = []
puts "", "Rebaking #{posts_to_rebake.length} posts with affected uploads.", "" puts "",
begin "Marking #{posts_to_rebake.length} posts with affected uploads for rebake. Every 15 minutes, a batch of these will be enqueued for rebaking.",
i = 0 ""
posts_to_rebake.each do |post| posts_to_rebake.update_all(baked_version: nil)
RakeHelpers.print_status_with_label("Rebaking posts.....", i, posts_to_rebake.length) File.write(
post.rebake! "secure_upload_analyse_and_update_posts_for_rebake.json",
i += 1 MultiJson.dump({ post_ids: post_ids_to_rebake }),
end )
puts "",
RakeHelpers.print_status_with_label("Rebaking complete! ", i, posts_to_rebake.length) "Post IDs written to secure_upload_analyse_and_update_posts_for_rebake.json for reference",
puts "" ""
rescue => e
post_rebake_errors << e.message
end
post_rebake_errors
end end
def inline_uploads(post) def inline_uploads(post)

View File

@ -5,25 +5,26 @@ RSpec.describe "tasks/uploads" do
Rake::Task.clear Rake::Task.clear
Discourse::Application.load_tasks Discourse::Application.load_tasks
SiteSetting.authorized_extensions += "|pdf" SiteSetting.authorized_extensions += "|pdf"
STDIN.stubs(:gets).returns("y\n")
end end
describe "uploads:secure_upload_analyse_and_update" do describe "uploads:secure_upload_analyse_and_update" do
let!(:uploads) { [multi_post_upload1, upload1, upload2, upload3] } let!(:uploads) { [multi_post_upload_1, upload_1, upload_2, upload_3] }
let(:multi_post_upload1) { Fabricate(:upload_s3) } let(:multi_post_upload_1) { Fabricate(:upload_s3) }
let(:upload1) { Fabricate(:upload_s3) } let(:upload_1) { Fabricate(:upload_s3) }
let(:upload2) { Fabricate(:upload_s3) } let(:upload_2) { Fabricate(:upload_s3) }
let(:upload3) { Fabricate(:upload_s3, original_filename: "test.pdf", extension: "pdf") } let(:upload_3) { Fabricate(:upload_s3, original_filename: "test.pdf", extension: "pdf") }
let!(:post1) { Fabricate(:post) } let!(:post_1) { Fabricate(:post) }
let!(:post2) { Fabricate(:post) } let!(:post_2) { Fabricate(:post) }
let!(:post3) { Fabricate(:post) } let!(:post_3) { Fabricate(:post) }
before do before do
UploadReference.create(target: post1, upload: multi_post_upload1) UploadReference.create(target: post_1, upload: multi_post_upload_1)
UploadReference.create(target: post2, upload: multi_post_upload1) UploadReference.create(target: post_2, upload: multi_post_upload_1)
UploadReference.create(target: post2, upload: upload1) UploadReference.create(target: post_2, upload: upload_1)
UploadReference.create(target: post3, upload: upload2) UploadReference.create(target: post_3, upload: upload_2)
UploadReference.create(target: post3, upload: upload3) UploadReference.create(target: post_3, upload: upload_3)
end end
def invoke_task def invoke_task
@ -48,105 +49,121 @@ RSpec.describe "tasks/uploads" do
it "sets an access_control_post for each post upload, using the first linked post in the case of multiple links" do it "sets an access_control_post for each post upload, using the first linked post in the case of multiple links" do
invoke_task invoke_task
expect(multi_post_upload1.reload.access_control_post).to eq(post1) expect(multi_post_upload_1.reload.access_control_post).to eq(post_1)
expect(upload1.reload.access_control_post).to eq(post2) expect(upload_1.reload.access_control_post).to eq(post_2)
expect(upload2.reload.access_control_post).to eq(post3) expect(upload_2.reload.access_control_post).to eq(post_3)
expect(upload3.reload.access_control_post).to eq(post3) expect(upload_3.reload.access_control_post).to eq(post_3)
end end
context "when login_required" do context "when login_required" do
before { SiteSetting.login_required = true } before { SiteSetting.login_required = true }
it "sets everything attached to a post as secure and rebakes all those posts" do after do
freeze_time if File.exist?("secure_upload_analyse_and_update_posts_for_rebake.json")
File.delete("secure_upload_analyse_and_update_posts_for_rebake.json")
post1.update_columns(baked_at: 1.week.ago) end
post2.update_columns(baked_at: 1.week.ago) end
post3.update_columns(baked_at: 1.week.ago)
it "sets everything attached to a post as secure" do
invoke_task invoke_task
expect(post1.reload.baked_at).not_to eq_time(1.week.ago) expect(upload_2.reload.secure).to eq(true)
expect(post2.reload.baked_at).not_to eq_time(1.week.ago) expect(upload_1.reload.secure).to eq(true)
expect(post3.reload.baked_at).not_to eq_time(1.week.ago) expect(upload_3.reload.secure).to eq(true)
expect(upload2.reload.secure).to eq(true) end
expect(upload1.reload.secure).to eq(true)
expect(upload3.reload.secure).to eq(true) it "writes a file with the post IDs to rebake" do
invoke_task
expect(File.exist?("secure_upload_analyse_and_update_posts_for_rebake.json")).to eq(
true,
)
expect(
JSON.parse(File.read("secure_upload_analyse_and_update_posts_for_rebake.json")),
).to eq({ "post_ids" => [post_1.id, post_2.id, post_3.id] })
end
it "sets the baked_version to NULL for affected posts" do
invoke_task
expect(post_1.reload.baked_version).to eq(nil)
expect(post_2.reload.baked_version).to eq(nil)
expect(post_3.reload.baked_version).to eq(nil)
end end
context "when secure_uploads_pm_only" do context "when secure_uploads_pm_only" do
before { SiteSetting.secure_uploads_pm_only = true } before { SiteSetting.secure_uploads_pm_only = true }
it "only sets everything attached to a private message post as secure and rebakes all those posts" do it "only sets everything attached to a private message post as secure and rebakes all those posts" do
freeze_time post_3.topic.update(archetype: "private_message", category: nil)
post1.update_columns(baked_at: 1.week.ago)
post2.update_columns(baked_at: 1.week.ago)
post3.update_columns(baked_at: 1.week.ago)
post3.topic.update(archetype: "private_message", category: nil)
invoke_task invoke_task
expect(post1.reload.baked_at).to eq_time(1.week.ago) expect(post_1.reload.baked_version).not_to eq(nil)
expect(post2.reload.baked_at).to eq_time(1.week.ago) expect(post_2.reload.baked_version).not_to eq(nil)
expect(post3.reload.baked_at).not_to eq_time(1.week.ago) expect(post_3.reload.baked_version).to eq(nil)
expect(upload1.reload.secure).to eq(false) expect(upload_1.reload.secure).to eq(false)
expect(upload2.reload.secure).to eq(true) expect(upload_2.reload.secure).to eq(true)
expect(upload3.reload.secure).to eq(true) expect(upload_3.reload.secure).to eq(true)
end end
end end
end end
context "when secure_uploads_pm_only" do
before { SiteSetting.secure_uploads_pm_only = true }
it "sets a non-PM post upload to not secure" do
upload_2.update!(secure: true)
invoke_task
expect(upload_2.reload.secure).to eq(false)
end
end
it "sets the uploads that are media and attachments in the read restricted topic category to secure" do it "sets the uploads that are media and attachments in the read restricted topic category to secure" do
post3.topic.update(category: Fabricate(:private_category, group: Fabricate(:group))) post_3.topic.update(category: Fabricate(:private_category, group: Fabricate(:group)))
invoke_task invoke_task
expect(upload2.reload.secure).to eq(true) expect(upload_2.reload.secure).to eq(true)
expect(upload1.reload.secure).to eq(false) expect(upload_1.reload.secure).to eq(false)
expect(upload3.reload.secure).to eq(true) expect(upload_3.reload.secure).to eq(true)
end end
it "sets the upload in the PM topic to secure" do it "sets the upload in the PM topic to secure" do
post3.topic.update(archetype: "private_message", category: nil) post_3.topic.update(archetype: "private_message", category: nil)
invoke_task invoke_task
expect(upload2.reload.secure).to eq(true) expect(upload_2.reload.secure).to eq(true)
expect(upload1.reload.secure).to eq(false) expect(upload_1.reload.secure).to eq(false)
end end
it "rebakes the posts attached for uploads that change secure status" do it "sets the baked_version version to NULL for the posts attached for uploads that change secure status" do
post3.topic.update(category: Fabricate(:private_category, group: Fabricate(:group))) post_3.topic.update(category: Fabricate(:private_category, group: Fabricate(:group)))
freeze_time
post1.update_columns(baked_at: 1.week.ago)
post2.update_columns(baked_at: 1.week.ago)
post3.update_columns(baked_at: 1.week.ago)
invoke_task invoke_task
expect(post1.reload.baked_at).to eq_time(1.week.ago) expect(post_1.reload.baked_version).not_to eq(nil)
expect(post2.reload.baked_at).to eq_time(1.week.ago) expect(post_2.reload.baked_version).not_to eq(nil)
expect(post3.reload.baked_at).not_to eq_time(1.week.ago) expect(post_3.reload.baked_version).to eq(nil)
end end
context "for an upload that is already secure and does not need to change" do context "for an upload that is already secure and does not need to change" do
before do before do
post3.topic.update(archetype: "private_message", category: nil) post_3.topic.update(archetype: "private_message", category: nil)
upload2.update(access_control_post: post3) upload_2.update(access_control_post: post_3)
upload2.update_secure_status upload_2.update_secure_status
upload3.update(access_control_post: post3) upload_3.update(access_control_post: post_3)
upload3.update_secure_status upload_3.update_secure_status
end end
it "does not rebake the associated post" do it "does not rebake the associated post" do
freeze_time freeze_time
post3.update_columns(baked_at: 1.week.ago) post_3.update_columns(baked_at: 1.week.ago)
invoke_task invoke_task
expect(post3.reload.baked_at).to eq_time(1.week.ago) expect(post_3.reload.baked_at).to eq_time(1.week.ago)
end end
it "does not attempt to update the acl" do it "does not attempt to update the acl" do
FileStore::S3Store.any_instance.expects(:update_upload_ACL).with(upload2).never FileStore::S3Store.any_instance.expects(:update_upload_ACL).with(upload_2).never
invoke_task invoke_task
end end
end end
@ -179,19 +196,25 @@ RSpec.describe "tasks/uploads" do
uploads.each { |upload| stub_upload(upload) } uploads.each { |upload| stub_upload(upload) }
SiteSetting.secure_uploads = true SiteSetting.secure_uploads = true
UploadReference.create(target: post1, upload: upload1) UploadReference.create(target: post_1, upload: upload_1)
UploadReference.create(target: post1, upload: upload2) UploadReference.create(target: post_1, upload: upload_2)
UploadReference.create(target: post2, upload: upload3) UploadReference.create(target: post_2, upload: upload_3)
UploadReference.create(target: post2, upload: upload4) UploadReference.create(target: post_2, upload: upload4)
end end
let!(:uploads) { [upload1, upload2, upload3, upload4, upload5] } after do
let(:post1) { Fabricate(:post) } if File.exist?("secure_upload_analyse_and_update_posts_for_rebake.json")
let(:post2) { Fabricate(:post) } File.delete("secure_upload_analyse_and_update_posts_for_rebake.json")
let(:upload1) { Fabricate(:upload_s3, secure: true, access_control_post: post1) } end
let(:upload2) { Fabricate(:upload_s3, secure: true, access_control_post: post1) } end
let(:upload3) { Fabricate(:upload_s3, secure: true, access_control_post: post2) }
let(:upload4) { Fabricate(:upload_s3, secure: true, access_control_post: post2) } let!(:uploads) { [upload_1, upload_2, upload_3, upload4, upload5] }
let(:post_1) { Fabricate(:post) }
let(:post_2) { Fabricate(:post) }
let(:upload_1) { Fabricate(:upload_s3, secure: true, access_control_post: post_1) }
let(:upload_2) { Fabricate(:upload_s3, secure: true, access_control_post: post_1) }
let(:upload_3) { Fabricate(:upload_s3, secure: true, access_control_post: post_2) }
let(:upload4) { Fabricate(:upload_s3, secure: true, access_control_post: post_2) }
let(:upload5) { Fabricate(:upload_s3, secure: false) } let(:upload5) { Fabricate(:upload_s3, secure: false) }
it "disables the secure upload setting" do it "disables the secure upload setting" do
@ -201,24 +224,29 @@ RSpec.describe "tasks/uploads" do
it "updates all secure uploads to secure: false" do it "updates all secure uploads to secure: false" do
invoke_task invoke_task
[upload1, upload2, upload3, upload4].each { |upl| expect(upl.reload.secure).to eq(false) } [upload_1, upload_2, upload_3, upload4].each { |upl| expect(upl.reload.secure).to eq(false) }
end end
it "rebakes the associated posts" do it "sets the baked_version to NULL for affected posts" do
freeze_time
post1.update_columns(baked_at: 1.week.ago)
post2.update_columns(baked_at: 1.week.ago)
invoke_task invoke_task
expect(post1.reload.baked_at).not_to eq_time(1.week.ago) expect(post_1.reload.baked_version).to eq(nil)
expect(post2.reload.baked_at).not_to eq_time(1.week.ago) expect(post_2.reload.baked_version).to eq(nil)
end
it "writes a file with the post IDs to rebake" do
invoke_task
expect(File.exist?("secure_upload_analyse_and_update_posts_for_rebake.json")).to eq(true)
expect(JSON.parse(File.read("secure_upload_analyse_and_update_posts_for_rebake.json"))).to eq(
{ "post_ids" => [post_1.id, post_2.id] },
)
end end
it "updates the affected ACLs via the SyncAclsForUploads job" do it "updates the affected ACLs via the SyncAclsForUploads job" do
invoke_task invoke_task
expect(Jobs::SyncAclsForUploads.jobs.last["args"][0]["upload_ids"]).to match_array( expect(Jobs::SyncAclsForUploads.jobs.last["args"][0]["upload_ids"]).to match_array(
[upload1.id, upload2.id, upload3.id, upload4.id], [upload_1.id, upload_2.id, upload_3.id, upload4.id],
) )
end end
end end