FIX: Improvements and fixes for update_upload_acl rake task (#8980)

The rake task was broken, because the addition of the
UploadSecurity check returned true/false instead of the
upload ID to determine which uploads to set secure.
Also it was rebaking the posts in the wrong place and
pretty inefficiently at that. Also it was rebaking before
the upload was being changed to secure in the DB.
This also updates the task to set the access_control_post_id
for all uploads. the first post the upload is linked to is used
for the access control. if the upload doesn't get changed to
secure this doesn't affect anything.
Added a spec for the rake task to cover common cases.
This commit is contained in:
Martin Brennan 2020-02-17 14:21:43 +10:00 committed by GitHub
parent dac923379a
commit 9dcc454a07
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 173 additions and 21 deletions

View File

@ -668,6 +668,15 @@ task "uploads:ensure_correct_acl" => :environment do
Upload.transaction do
mark_secure_in_loop_because_no_login_required = false
# If secure media is enabled we need to first set the access control post of
# all post uploads (even uploads that are linked to multiple posts). If the
# upload is not set to secure media then this has no other effect on the upload,
# but we _must_ know what the access control post is because the with_secure_media?
# method is on the post, and this knows about the category security & PM status
if SiteSetting.secure_media?
update_uploads_access_control_post
end
# First of all only get relevant uploads (supported media).
#
# Also only get uploads that are not for a theme or a site setting, so only
@ -684,19 +693,21 @@ task "uploads:ensure_correct_acl" => :environment do
else
# If NOT login_required, then we have to go for the other slower flow, where in the loop
# we mark the upload as secure if the first post it is used in is with_secure_media?
# we mark the upload secure based on UploadSecurity.should_be_secure?
mark_secure_in_loop_because_no_login_required = true
puts "Marking posts as secure in the next step because login_required is false."
end
puts "", "Rebaking #{uploads_with_supported_media.count} upload posts and updating ACLs in S3.", ""
puts "", "Determining which of #{uploads_with_supported_media.count} upload posts need to be marked secure and be rebaked.", ""
upload_ids_to_mark_as_secure, uploads_skipped_because_of_error = update_acls_and_rebake_upload_posts(
upload_ids_to_mark_as_secure, posts_to_rebake = determine_upload_security_and_posts_to_rebake(
uploads_with_supported_media, mark_secure_in_loop_because_no_login_required
)
log_rebake_errors(uploads_skipped_because_of_error)
mark_specific_uploads_as_secure_no_login_required(upload_ids_to_mark_as_secure)
post_rebake_errors = rebake_upload_posts(posts_to_rebake)
log_rebake_errors(post_rebake_errors)
end
end
puts "", "Done"
@ -708,48 +719,83 @@ def mark_all_as_secure_login_required(uploads_with_supported_media)
puts "Finished marking upload(s) as secure."
end
def log_rebake_errors(uploads_skipped_because_of_error)
return if uploads_skipped_because_of_error.empty?
puts "Skipped the following uploads due to error:", ""
uploads_skipped_because_of_error.each do |message|
def log_rebake_errors(rebake_errors)
return if rebake_errors.empty?
puts "The following post rebakes failed with error:", ""
rebake_errors.each do |message|
puts message
end
end
def mark_specific_uploads_as_secure_no_login_required(upload_ids_to_mark_as_secure)
return if upload_ids_to_mark_as_secure.empty?
puts "Marking #{upload_ids_to_mark_as_secure.length} uploads as secure because their first post contains secure media."
puts "Marking #{upload_ids_to_mark_as_secure.length} uploads as secure because UploadSecurity determined them to be secure."
Upload.where(id: upload_ids_to_mark_as_secure).update_all(secure: true)
puts "Finished marking uploads as secure."
end
def update_acls_and_rebake_upload_posts(uploads_with_supported_media, mark_secure_in_loop_because_no_login_required)
def update_uploads_access_control_post
access_control_post_updates = []
uploads_with_post_ids = DB.query(<<-SQL
SELECT upload_id, (
SELECT string_agg(CAST(post_uploads.post_id AS varchar), ',' ORDER BY post_uploads.id) as post_ids
FROM post_uploads
WHERE pu.upload_id = post_uploads.upload_id
) FROM post_uploads pu
SQL
)
uploads_with_post_ids.each do |row|
first_post_id = row.post_ids.split(",").first.to_i
access_control_post_updates << "UPDATE uploads SET access_control_post_id = #{first_post_id} WHERE id = #{row.upload_id};"
end
DB.exec(access_control_post_updates.join("\n"))
end
def rebake_upload_posts(posts_to_rebake)
post_rebake_errors = []
puts "", "Rebaking #{posts_to_rebake.length} posts with affected uploads.", ""
begin
i = 0
posts_to_rebake.each do |post|
RakeHelpers.print_status_with_label("Determining which uploads to mark secure and rebake.....", i, posts_to_rebake.length)
post.rebake!
i += 1
end
RakeHelpers.print_status_with_label("Rebaking complete! ", i, posts_to_rebake.length)
puts ""
rescue => e
post_rebake_errors << e.message
end
post_rebake_errors
end
def determine_upload_security_and_posts_to_rebake(uploads_with_supported_media, mark_secure_in_loop_because_no_login_required)
upload_ids_to_mark_as_secure = []
uploads_skipped_because_of_error = []
posts_to_rebake = []
i = 0
uploads_with_supported_media.find_each(batch_size: 50) do |upload_with_supported_media|
RakeHelpers.print_status_with_label("Updating ACL for upload.......", i, uploads_with_supported_media.count)
# we just need to determine the post security here so the ACL is set to the correct thing,
# because the update_upload_ACL method uses upload.secure?
upload_with_supported_media.secure = UploadSecurity.new(upload_with_supported_media).should_be_secure?
Discourse.store.update_upload_ACL(upload_with_supported_media)
RakeHelpers.print_status_with_label("Rebaking posts for upload.....", i, uploads_with_supported_media.count)
begin
upload_with_supported_media.posts.each { |post| post.rebake! }
RakeHelpers.print_status_with_label("Determining which uploads to mark secure and rebake.....", i, uploads_with_supported_media.count)
upload_with_supported_media.posts.each { |post| posts_to_rebake << post }
if mark_secure_in_loop_because_no_login_required
upload_ids_to_mark_as_secure << UploadSecurity.new(upload_with_supported_media).should_be_secure?
end
rescue => e
uploads_skipped_because_of_error << "#{upload_with_supported_media.original_filename} (#{upload_with_supported_media.url}) #{e.message}"
if mark_secure_in_loop_because_no_login_required && upload_with_supported_media.secure?
upload_ids_to_mark_as_secure << upload_with_supported_media.id
end
i += 1
end
RakeHelpers.print_status_with_label("Rebaking complete! ", i, uploads_with_supported_media.count)
RakeHelpers.print_status_with_label("Determination complete! ", i, uploads_with_supported_media.count)
puts ""
[upload_ids_to_mark_as_secure, uploads_skipped_because_of_error]
[upload_ids_to_mark_as_secure, posts_to_rebake]
end
def inline_uploads(post)

106
spec/tasks/uploads_spec.rb Normal file
View File

@ -0,0 +1,106 @@
# frozen_string_literal: true
require 'rails_helper'
RSpec.describe "tasks/uploads" do
before do
Rake::Task.clear
Discourse::Application.load_tasks
end
describe "uploads:ensure_correct_acl" do
let!(:uploads) do
[
multi_post_upload1,
upload1,
upload2
]
end
let(:multi_post_upload1) { Fabricate(:upload_s3) }
let(:upload1) { Fabricate(:upload_s3) }
let(:upload2) { Fabricate(:upload_s3) }
let!(:post1) { Fabricate(:post) }
let!(:post2) { Fabricate(:post) }
let!(:post3) { Fabricate(:post) }
before do
PostUpload.create(post: post1, upload: multi_post_upload1)
PostUpload.create(post: post2, upload: multi_post_upload1)
PostUpload.create(post: post2, upload: upload1)
PostUpload.create(post: post3, upload: upload2)
end
def invoke_task
Rake::Task['uploads:ensure_correct_acl'].invoke
end
context "when the store is internal" do
it "does nothing; this is for external store only" do
Upload.expects(:transaction).never
invoke_task
end
end
context "when store is external" do
before do
enable_s3_uploads(uploads)
end
context "when secure media is enabled" do
before do
SiteSetting.secure_media = true
end
it "sets an access_control_post for each post upload, using the first linked post in the case of multiple links" do
invoke_task
expect(multi_post_upload1.reload.access_control_post).to eq(post1)
expect(upload1.reload.access_control_post).to eq(post2)
expect(upload2.reload.access_control_post).to eq(post3)
end
it "sets the upload in the read restricted topic category to secure" do
post3.topic.update(category: Fabricate(:private_category, group: Fabricate(:group)))
invoke_task
expect(upload2.reload.secure).to eq(true)
expect(upload1.reload.secure).to eq(false)
end
it "sets the upload in the PM topic to secure" do
post3.topic.update(archetype: 'private_message', category: nil)
invoke_task
expect(upload2.reload.secure).to eq(true)
expect(upload1.reload.secure).to eq(false)
end
it "rebakes the posts attached" do
post1_baked = post1.baked_at
post2_baked = post2.baked_at
post3_baked = post3.baked_at
invoke_task
expect(post1.reload.baked_at).not_to eq(post1_baked)
expect(post2.reload.baked_at).not_to eq(post2_baked)
expect(post3.reload.baked_at).not_to eq(post3_baked)
end
end
end
end
def enable_s3_uploads(uploads)
SiteSetting.enable_s3_uploads = true
SiteSetting.s3_upload_bucket = "s3-upload-bucket"
SiteSetting.s3_access_key_id = "some key"
SiteSetting.s3_secret_access_key = "some secrets3_region key"
stub_request(:head, "https://#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/")
uploads.each do |upload|
stub_request(
:put,
"https://#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/original/1X/#{upload.sha1}.#{upload.extension}?acl"
)
end
end
end