DEV: Secure upload rake task improvements (#29484)

This commit changes the uploads:secure_upload_analyse_and_update
and uploads:disable_secure_uploads to no longer rebake affected
posts inline. This just took way too long, and if the task stalled
you couldn't be sure if the rest of it completed.

Instead, we can update the baked_version of affected posts and
utilize our PeriodicalUpdates job to gradually rebake them. I added
warnings about increasing the site setting rebake_old_posts_count and
the global setting max_old_rebakes_per_15_minutes before doing this
as well.

For good measure, the affected post IDs are written to a JSON file too.
This commit is contained in:
Martin Brennan 2024-10-31 13:33:11 +10:00 committed by GitHub
parent d5b328b193
commit 1a3b9a7352
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 153 additions and 108 deletions

View File

@ -545,6 +545,10 @@ task "uploads:sync_s3_acls" => :environment do
end
end
def secure_upload_rebake_warning
puts "This task may mark a lot of posts for rebaking. To get through these quicker, the max_old_rebakes_per_15_minutes global setting (current value #{GlobalSetting.max_old_rebakes_per_15_minutes}) should be changed and the rebake_old_posts_count site setting (current value #{SiteSetting.rebake_old_posts_count}) increased as well. Do you want to proceed? (y/n)"
end
# NOTE: This needs to be updated to use the _first_ UploadReference
# record for each upload to determine security, and do not mark things
# as secure if the first record is something public e.g. a site setting.
@ -558,6 +562,9 @@ task "uploads:disable_secure_uploads" => :environment do
exit 1
end
secure_upload_rebake_warning
exit 1 if STDIN.gets.chomp.downcase != "y"
puts "Disabling secure upload and resetting uploads to not secure in #{db}...", ""
SiteSetting.secure_uploads = false
@ -583,8 +590,7 @@ task "uploads:disable_secure_uploads" => :environment do
secure_upload_ids,
)
adjust_acls(secure_upload_ids)
post_rebake_errors = rebake_upload_posts(post_ids_to_rebake)
log_rebake_errors(post_rebake_errors)
mark_upload_posts_for_rebake(post_ids_to_rebake)
puts "", "Rebaking and uploading complete!", ""
end
@ -612,6 +618,9 @@ task "uploads:secure_upload_analyse_and_update" => :environment do
exit 1
end
secure_upload_rebake_warning
exit 1 if STDIN.gets.chomp.downcase != "y"
puts "Analyzing security for uploads in #{db}...", ""
all_upload_ids_changed, post_ids_to_rebake = nil
Upload.transaction do
@ -627,11 +636,17 @@ task "uploads:secure_upload_analyse_and_update" => :environment do
# Simply mark all uploads linked to posts secure if login_required because
# no anons will be able to access them; however if secure_uploads_pm_only is
# true then login_required will not mark other uploads secure.
puts "",
"Site is login_required, and secure_uploads_pm_only is false. Continuing with strategy to mark all post uploads as secure.",
""
post_ids_to_rebake, all_upload_ids_changed = mark_all_as_secure_login_required
else
# Otherwise only mark uploads linked to posts either:
# * In secure categories or PMs if !SiteSetting.secure_uploads_pm_only
# * In PMs if SiteSetting.secure_uploads_pm_only
puts "",
"Site is not login_required. Continuing with normal strategy to mark uploads in secure contexts as secure.",
""
post_ids_to_rebake, all_upload_ids_changed =
update_specific_upload_security_no_login_required
end
@ -639,8 +654,14 @@ task "uploads:secure_upload_analyse_and_update" => :environment do
# Enqueue rebakes AFTER upload transaction complete, so there is no race condition
# between updating the DB and the rebakes occurring.
post_rebake_errors = rebake_upload_posts(post_ids_to_rebake)
log_rebake_errors(post_rebake_errors)
#
# This is done asynchronously by changing the baked_version to NULL on
# affected posts and relying on Post.rebake_old in the PeriodicalUpdates
# job. To speed this up, these levers can be adjusted:
#
# * SiteSetting.rebake_old_posts_count
# * GlobalSetting.max_old_rebakes_per_15_minutes
mark_upload_posts_for_rebake(post_ids_to_rebake)
# Also do this AFTER upload transaction complete so we don't end up with any
# errors leaving ACLs in a bad state (the ACL sync task can be run to fix any
@ -677,7 +698,7 @@ def mark_all_as_secure_login_required
security_last_changed_reason = 'upload security rake task mark as secure',
security_last_changed_at = NOW()
FROM upl
WHERE uploads.id = upl.upload_id AND NOT uploads.secure
WHERE uploads.id = upl.upload_id
RETURNING uploads.id
SQL
puts "Marked #{post_upload_ids_marked_secure.count} upload(s) as secure because login_required is true.",
@ -687,7 +708,7 @@ def mark_all_as_secure_login_required
SET secure = false,
security_last_changed_reason = 'upload security rake task mark as not secure',
security_last_changed_at = NOW()
WHERE id NOT IN (?) AND uploads.secure
WHERE id NOT IN (?)
RETURNING uploads.id
SQL
puts "Marked #{upload_ids_marked_not_secure.count} upload(s) as not secure because they are not linked to posts.",
@ -797,24 +818,20 @@ def update_uploads_access_control_post
SQL
end
def rebake_upload_posts(post_ids_to_rebake)
def mark_upload_posts_for_rebake(post_ids_to_rebake)
posts_to_rebake = Post.where(id: post_ids_to_rebake)
post_rebake_errors = []
puts "", "Rebaking #{posts_to_rebake.length} posts with affected uploads.", ""
begin
i = 0
posts_to_rebake.each do |post|
RakeHelpers.print_status_with_label("Rebaking posts.....", i, posts_to_rebake.length)
post.rebake!
i += 1
end
RakeHelpers.print_status_with_label("Rebaking complete! ", i, posts_to_rebake.length)
puts ""
rescue => e
post_rebake_errors << e.message
end
post_rebake_errors
puts "",
"Marking #{posts_to_rebake.length} posts with affected uploads for rebake. Every 15 minutes, a batch of these will be enqueued for rebaking.",
""
posts_to_rebake.update_all(baked_version: nil)
File.write(
"secure_upload_analyse_and_update_posts_for_rebake.json",
MultiJson.dump({ post_ids: post_ids_to_rebake }),
)
puts "",
"Post IDs written to secure_upload_analyse_and_update_posts_for_rebake.json for reference",
""
end
def inline_uploads(post)

View File

@ -5,25 +5,26 @@ RSpec.describe "tasks/uploads" do
Rake::Task.clear
Discourse::Application.load_tasks
SiteSetting.authorized_extensions += "|pdf"
STDIN.stubs(:gets).returns("y\n")
end
describe "uploads:secure_upload_analyse_and_update" do
let!(:uploads) { [multi_post_upload1, upload1, upload2, upload3] }
let(:multi_post_upload1) { Fabricate(:upload_s3) }
let(:upload1) { Fabricate(:upload_s3) }
let(:upload2) { Fabricate(:upload_s3) }
let(:upload3) { Fabricate(:upload_s3, original_filename: "test.pdf", extension: "pdf") }
let!(:uploads) { [multi_post_upload_1, upload_1, upload_2, upload_3] }
let(:multi_post_upload_1) { Fabricate(:upload_s3) }
let(:upload_1) { Fabricate(:upload_s3) }
let(:upload_2) { Fabricate(:upload_s3) }
let(:upload_3) { Fabricate(:upload_s3, original_filename: "test.pdf", extension: "pdf") }
let!(:post1) { Fabricate(:post) }
let!(:post2) { Fabricate(:post) }
let!(:post3) { Fabricate(:post) }
let!(:post_1) { Fabricate(:post) }
let!(:post_2) { Fabricate(:post) }
let!(:post_3) { Fabricate(:post) }
before do
UploadReference.create(target: post1, upload: multi_post_upload1)
UploadReference.create(target: post2, upload: multi_post_upload1)
UploadReference.create(target: post2, upload: upload1)
UploadReference.create(target: post3, upload: upload2)
UploadReference.create(target: post3, upload: upload3)
UploadReference.create(target: post_1, upload: multi_post_upload_1)
UploadReference.create(target: post_2, upload: multi_post_upload_1)
UploadReference.create(target: post_2, upload: upload_1)
UploadReference.create(target: post_3, upload: upload_2)
UploadReference.create(target: post_3, upload: upload_3)
end
def invoke_task
@ -48,105 +49,121 @@ RSpec.describe "tasks/uploads" do
it "sets an access_control_post for each post upload, using the first linked post in the case of multiple links" do
invoke_task
expect(multi_post_upload1.reload.access_control_post).to eq(post1)
expect(upload1.reload.access_control_post).to eq(post2)
expect(upload2.reload.access_control_post).to eq(post3)
expect(upload3.reload.access_control_post).to eq(post3)
expect(multi_post_upload_1.reload.access_control_post).to eq(post_1)
expect(upload_1.reload.access_control_post).to eq(post_2)
expect(upload_2.reload.access_control_post).to eq(post_3)
expect(upload_3.reload.access_control_post).to eq(post_3)
end
context "when login_required" do
before { SiteSetting.login_required = true }
it "sets everything attached to a post as secure and rebakes all those posts" do
freeze_time
post1.update_columns(baked_at: 1.week.ago)
post2.update_columns(baked_at: 1.week.ago)
post3.update_columns(baked_at: 1.week.ago)
after do
if File.exist?("secure_upload_analyse_and_update_posts_for_rebake.json")
File.delete("secure_upload_analyse_and_update_posts_for_rebake.json")
end
end
it "sets everything attached to a post as secure" do
invoke_task
expect(post1.reload.baked_at).not_to eq_time(1.week.ago)
expect(post2.reload.baked_at).not_to eq_time(1.week.ago)
expect(post3.reload.baked_at).not_to eq_time(1.week.ago)
expect(upload2.reload.secure).to eq(true)
expect(upload1.reload.secure).to eq(true)
expect(upload3.reload.secure).to eq(true)
expect(upload_2.reload.secure).to eq(true)
expect(upload_1.reload.secure).to eq(true)
expect(upload_3.reload.secure).to eq(true)
end
it "writes a file with the post IDs to rebake" do
invoke_task
expect(File.exist?("secure_upload_analyse_and_update_posts_for_rebake.json")).to eq(
true,
)
expect(
JSON.parse(File.read("secure_upload_analyse_and_update_posts_for_rebake.json")),
).to eq({ "post_ids" => [post_1.id, post_2.id, post_3.id] })
end
it "sets the baked_version to NULL for affected posts" do
invoke_task
expect(post_1.reload.baked_version).to eq(nil)
expect(post_2.reload.baked_version).to eq(nil)
expect(post_3.reload.baked_version).to eq(nil)
end
context "when secure_uploads_pm_only" do
before { SiteSetting.secure_uploads_pm_only = true }
it "only sets everything attached to a private message post as secure and rebakes all those posts" do
freeze_time
post1.update_columns(baked_at: 1.week.ago)
post2.update_columns(baked_at: 1.week.ago)
post3.update_columns(baked_at: 1.week.ago)
post3.topic.update(archetype: "private_message", category: nil)
post_3.topic.update(archetype: "private_message", category: nil)
invoke_task
expect(post1.reload.baked_at).to eq_time(1.week.ago)
expect(post2.reload.baked_at).to eq_time(1.week.ago)
expect(post3.reload.baked_at).not_to eq_time(1.week.ago)
expect(upload1.reload.secure).to eq(false)
expect(upload2.reload.secure).to eq(true)
expect(upload3.reload.secure).to eq(true)
expect(post_1.reload.baked_version).not_to eq(nil)
expect(post_2.reload.baked_version).not_to eq(nil)
expect(post_3.reload.baked_version).to eq(nil)
expect(upload_1.reload.secure).to eq(false)
expect(upload_2.reload.secure).to eq(true)
expect(upload_3.reload.secure).to eq(true)
end
end
end
context "when secure_uploads_pm_only" do
before { SiteSetting.secure_uploads_pm_only = true }
it "sets a non-PM post upload to not secure" do
upload_2.update!(secure: true)
invoke_task
expect(upload_2.reload.secure).to eq(false)
end
end
it "sets the uploads that are media and attachments in the read restricted topic category to secure" do
post3.topic.update(category: Fabricate(:private_category, group: Fabricate(:group)))
post_3.topic.update(category: Fabricate(:private_category, group: Fabricate(:group)))
invoke_task
expect(upload2.reload.secure).to eq(true)
expect(upload1.reload.secure).to eq(false)
expect(upload3.reload.secure).to eq(true)
expect(upload_2.reload.secure).to eq(true)
expect(upload_1.reload.secure).to eq(false)
expect(upload_3.reload.secure).to eq(true)
end
it "sets the upload in the PM topic to secure" do
post3.topic.update(archetype: "private_message", category: nil)
post_3.topic.update(archetype: "private_message", category: nil)
invoke_task
expect(upload2.reload.secure).to eq(true)
expect(upload1.reload.secure).to eq(false)
expect(upload_2.reload.secure).to eq(true)
expect(upload_1.reload.secure).to eq(false)
end
it "rebakes the posts attached for uploads that change secure status" do
post3.topic.update(category: Fabricate(:private_category, group: Fabricate(:group)))
freeze_time
post1.update_columns(baked_at: 1.week.ago)
post2.update_columns(baked_at: 1.week.ago)
post3.update_columns(baked_at: 1.week.ago)
it "sets the baked_version version to NULL for the posts attached for uploads that change secure status" do
post_3.topic.update(category: Fabricate(:private_category, group: Fabricate(:group)))
invoke_task
expect(post1.reload.baked_at).to eq_time(1.week.ago)
expect(post2.reload.baked_at).to eq_time(1.week.ago)
expect(post3.reload.baked_at).not_to eq_time(1.week.ago)
expect(post_1.reload.baked_version).not_to eq(nil)
expect(post_2.reload.baked_version).not_to eq(nil)
expect(post_3.reload.baked_version).to eq(nil)
end
context "for an upload that is already secure and does not need to change" do
before do
post3.topic.update(archetype: "private_message", category: nil)
upload2.update(access_control_post: post3)
upload2.update_secure_status
upload3.update(access_control_post: post3)
upload3.update_secure_status
post_3.topic.update(archetype: "private_message", category: nil)
upload_2.update(access_control_post: post_3)
upload_2.update_secure_status
upload_3.update(access_control_post: post_3)
upload_3.update_secure_status
end
it "does not rebake the associated post" do
freeze_time
post3.update_columns(baked_at: 1.week.ago)
post_3.update_columns(baked_at: 1.week.ago)
invoke_task
expect(post3.reload.baked_at).to eq_time(1.week.ago)
expect(post_3.reload.baked_at).to eq_time(1.week.ago)
end
it "does not attempt to update the acl" do
FileStore::S3Store.any_instance.expects(:update_upload_ACL).with(upload2).never
FileStore::S3Store.any_instance.expects(:update_upload_ACL).with(upload_2).never
invoke_task
end
end
@ -179,19 +196,25 @@ RSpec.describe "tasks/uploads" do
uploads.each { |upload| stub_upload(upload) }
SiteSetting.secure_uploads = true
UploadReference.create(target: post1, upload: upload1)
UploadReference.create(target: post1, upload: upload2)
UploadReference.create(target: post2, upload: upload3)
UploadReference.create(target: post2, upload: upload4)
UploadReference.create(target: post_1, upload: upload_1)
UploadReference.create(target: post_1, upload: upload_2)
UploadReference.create(target: post_2, upload: upload_3)
UploadReference.create(target: post_2, upload: upload4)
end
let!(:uploads) { [upload1, upload2, upload3, upload4, upload5] }
let(:post1) { Fabricate(:post) }
let(:post2) { Fabricate(:post) }
let(:upload1) { Fabricate(:upload_s3, secure: true, access_control_post: post1) }
let(:upload2) { Fabricate(:upload_s3, secure: true, access_control_post: post1) }
let(:upload3) { Fabricate(:upload_s3, secure: true, access_control_post: post2) }
let(:upload4) { Fabricate(:upload_s3, secure: true, access_control_post: post2) }
after do
if File.exist?("secure_upload_analyse_and_update_posts_for_rebake.json")
File.delete("secure_upload_analyse_and_update_posts_for_rebake.json")
end
end
let!(:uploads) { [upload_1, upload_2, upload_3, upload4, upload5] }
let(:post_1) { Fabricate(:post) }
let(:post_2) { Fabricate(:post) }
let(:upload_1) { Fabricate(:upload_s3, secure: true, access_control_post: post_1) }
let(:upload_2) { Fabricate(:upload_s3, secure: true, access_control_post: post_1) }
let(:upload_3) { Fabricate(:upload_s3, secure: true, access_control_post: post_2) }
let(:upload4) { Fabricate(:upload_s3, secure: true, access_control_post: post_2) }
let(:upload5) { Fabricate(:upload_s3, secure: false) }
it "disables the secure upload setting" do
@ -201,24 +224,29 @@ RSpec.describe "tasks/uploads" do
it "updates all secure uploads to secure: false" do
invoke_task
[upload1, upload2, upload3, upload4].each { |upl| expect(upl.reload.secure).to eq(false) }
[upload_1, upload_2, upload_3, upload4].each { |upl| expect(upl.reload.secure).to eq(false) }
end
it "rebakes the associated posts" do
freeze_time
post1.update_columns(baked_at: 1.week.ago)
post2.update_columns(baked_at: 1.week.ago)
it "sets the baked_version to NULL for affected posts" do
invoke_task
expect(post1.reload.baked_at).not_to eq_time(1.week.ago)
expect(post2.reload.baked_at).not_to eq_time(1.week.ago)
expect(post_1.reload.baked_version).to eq(nil)
expect(post_2.reload.baked_version).to eq(nil)
end
it "writes a file with the post IDs to rebake" do
invoke_task
expect(File.exist?("secure_upload_analyse_and_update_posts_for_rebake.json")).to eq(true)
expect(JSON.parse(File.read("secure_upload_analyse_and_update_posts_for_rebake.json"))).to eq(
{ "post_ids" => [post_1.id, post_2.id] },
)
end
it "updates the affected ACLs via the SyncAclsForUploads job" do
invoke_task
expect(Jobs::SyncAclsForUploads.jobs.last["args"][0]["upload_ids"]).to match_array(
[upload1.id, upload2.id, upload3.id, upload4.id],
[upload_1.id, upload_2.id, upload_3.id, upload4.id],
)
end
end