DEV: Introduce hidden s3_inventory_bucket site setting (#27304)

This commit introduces a hidden `s3_inventory_bucket` site setting which
replaces the `enable_s3_inventory` and `s3_configure_inventory_policy`
site setting.

The reason `enable_s3_inventory` and `s3_configure_inventory_policy`
site settings are removed is because this feature has technically been
broken since it was introduced. When the `enable_s3_inventory` feature
is turned on, the app will because configure a daily inventory policy for the
`s3_upload_bucket` bucket and store the inventories under a prefix in
the bucket. The problem here is that once the inventories are created,
there is nothing cleaning up all these inventories so whoever that has
enabled this feature would have been paying the cost of storing a whole
bunch of inventory files which are never used. Given that we have not
received any complains about inventory files inflating S3 storage costs,
we think that it is very likely that this feature is no longer being
used and we are looking to drop support for this feature in the not too
distance future.

For now, we will still support a hidden `s3_inventory_bucket` site
setting which site administrators can configure via the
`DISCOURSE_S3_INVENTORY_BUCKET` env.
This commit is contained in:
Alan Guo Xiang Tan 2024-06-10 13:16:00 +08:00 committed by GitHub
parent 3bff633ce0
commit 8cf4ed5f88
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 62 additions and 260 deletions

View File

@ -1,21 +0,0 @@
# frozen_string_literal: true
require "s3_inventory"
module Jobs
# if upload bucket changes or inventory bucket changes we want to update s3 bucket policy and inventory configuration
class UpdateS3Inventory < ::Jobs::Base
def execute(args)
unless SiteSetting.enable_s3_inventory? && SiteSetting.Upload.enable_s3_uploads &&
SiteSetting.s3_configure_inventory_policy
return
end
%i[upload optimized].each do |type|
s3_inventory = S3Inventory.new(Discourse.store.s3_helper, type)
s3_inventory.update_bucket_policy if type == :upload
s3_inventory.update_bucket_inventory_configuration
end
end
end
end

View File

@ -15,10 +15,6 @@ module Jobs
end end
end end
def s3_helper
Discourse.store.s3_helper
end
def prepare_for_all_sites def prepare_for_all_sites
inventory = S3Inventory.new(s3_helper, :upload) inventory = S3Inventory.new(s3_helper, :upload)
@db_inventories = inventory.prepare_for_all_sites @db_inventories = inventory.prepare_for_all_sites
@ -26,8 +22,7 @@ module Jobs
end end
def execute(args) def execute(args)
return if !SiteSetting.enable_s3_inventory return if (s3_inventory_bucket = SiteSetting.s3_inventory_bucket).blank?
require "s3_inventory"
if !@db_inventories && Rails.configuration.multisite && GlobalSetting.use_s3? if !@db_inventories && Rails.configuration.multisite && GlobalSetting.use_s3?
prepare_for_all_sites prepare_for_all_sites
@ -37,13 +32,13 @@ module Jobs
preloaded_inventory_file = preloaded_inventory_file =
@db_inventories[RailsMultisite::ConnectionManagement.current_db] @db_inventories[RailsMultisite::ConnectionManagement.current_db]
S3Inventory.new( S3Inventory.new(
s3_helper,
:upload, :upload,
s3_inventory_bucket:,
preloaded_inventory_file: preloaded_inventory_file, preloaded_inventory_file: preloaded_inventory_file,
preloaded_inventory_date: @inventory_date, preloaded_inventory_date: @inventory_date,
).backfill_etags_and_list_missing ).backfill_etags_and_list_missing
else else
S3Inventory.new(s3_helper, :upload).backfill_etags_and_list_missing S3Inventory.new(:upload, s3_inventory_bucket:).backfill_etags_and_list_missing
end end
end end
end end

View File

@ -53,8 +53,6 @@ DiscourseEvent.on(:site_setting_changed) do |name, old_value, new_value|
Scheduler::Defer.later("Null topic slug") { Topic.update_all(slug: nil) } Scheduler::Defer.later("Null topic slug") { Topic.update_all(slug: nil) }
end end
Jobs.enqueue(:update_s3_inventory) if %i[enable_s3_inventory s3_upload_bucket].include?(name)
SvgSprite.expire_cache if name.to_s.include?("_icon") SvgSprite.expire_cache if name.to_s.include?("_icon")
SiteIconManager.ensure_optimized! if SiteIconManager::WATCHED_SETTINGS.include?(name) SiteIconManager.ensure_optimized! if SiteIconManager::WATCHED_SETTINGS.include?(name)

View File

@ -1973,7 +1973,6 @@ en:
s3_endpoint: "The endpoint can be modified to backup to an S3 compatible service like DigitalOcean Spaces or Minio. WARNING: Leave blank if using AWS S3." s3_endpoint: "The endpoint can be modified to backup to an S3 compatible service like DigitalOcean Spaces or Minio. WARNING: Leave blank if using AWS S3."
s3_configure_tombstone_policy: "Enable automatic deletion policy for tombstone uploads. IMPORTANT: If disabled, no space will be reclaimed after uploads are deleted." s3_configure_tombstone_policy: "Enable automatic deletion policy for tombstone uploads. IMPORTANT: If disabled, no space will be reclaimed after uploads are deleted."
s3_disable_cleanup: "Prevent removal of old backups from S3 when there are more backups than the maximum allowed." s3_disable_cleanup: "Prevent removal of old backups from S3 when there are more backups than the maximum allowed."
enable_s3_inventory: "Generate reports and verify uploads using Amazon S3 inventory. IMPORTANT: requires valid S3 credentials (both access key id & secret access key)."
s3_use_acls: "AWS recommends not using ACLs on S3 buckets; if you are following this advice, uncheck this option. This must be enabled if you are using secure uploads." s3_use_acls: "AWS recommends not using ACLs on S3 buckets; if you are following this advice, uncheck this option. This must be enabled if you are using secure uploads."
backup_time_of_day: "Time of day UTC when the backup should occur." backup_time_of_day: "Time of day UTC when the backup should occur."
backup_with_uploads: "Include uploads in scheduled backups. Disabling this will only backup the database." backup_with_uploads: "Include uploads in scheduled backups. Disabling this will only backup the database."

View File

@ -1581,11 +1581,6 @@ files:
default: true default: true
s3_use_acls: s3_use_acls:
default: true default: true
enable_s3_inventory:
default: false
s3_configure_inventory_policy:
default: true
hidden: true
s3_presigned_get_url_expires_after_seconds: s3_presigned_get_url_expires_after_seconds:
default: 300 default: 300
hidden: true hidden: true
@ -2487,6 +2482,10 @@ backups:
s3_backup_bucket: s3_backup_bucket:
default: "" default: ""
regex: '^[a-z0-9\-\/]+$' # can't use '.' when using HTTPS regex: '^[a-z0-9\-\/]+$' # can't use '.' when using HTTPS
s3_inventory_bucket:
hidden: true
default: ""
regex: '^[a-z0-9\-\/_]+$' # can't use '.' when using HTTPS
s3_disable_cleanup: s3_disable_cleanup:
default: false default: false
backup_time_of_day: backup_time_of_day:

View File

@ -297,10 +297,12 @@ module FileStore
end end
def list_missing_uploads(skip_optimized: false) def list_missing_uploads(skip_optimized: false)
if SiteSetting.enable_s3_inventory if s3_inventory_bucket = SiteSetting.s3_inventory_bucket
require "s3_inventory" S3Inventory.new(:upload, s3_inventory_bucket:).backfill_etags_and_list_missing
S3Inventory.new(s3_helper, :upload).backfill_etags_and_list_missing
S3Inventory.new(s3_helper, :optimized).backfill_etags_and_list_missing unless skip_optimized unless skip_optimized
S3Inventory.new(:optimized, s3_inventory_bucket:).backfill_etags_and_list_missing
end
else else
list_missing(Upload.by_users, "original/") list_missing(Upload.by_users, "original/")
list_missing(OptimizedImage, "optimized/") unless skip_optimized list_missing(OptimizedImage, "optimized/") unless skip_optimized

View File

@ -282,7 +282,12 @@ class S3Helper
end end
def s3_client def s3_client
@s3_client ||= Aws::S3::Client.new(@s3_options) @s3_client ||= init_aws_s3_client
end
def stub_client_responses!
raise "This method is only allowed to be used in the testing environment" if !Rails.env.test?
@s3_client = init_aws_s3_client(stub_responses: true)
end end
def s3_inventory_path(path = "inventory") def s3_inventory_path(path = "inventory")
@ -381,6 +386,12 @@ class S3Helper
private private
def init_aws_s3_client(stub_responses: false)
options = @s3_options
options = options.merge(stub_responses: true) if stub_responses
Aws::S3::Client.new(options)
end
def fetch_bucket_cors_rules def fetch_bucket_cors_rules
begin begin
s3_resource.client.get_bucket_cors(bucket: @s3_bucket_name).cors_rules&.map(&:to_h) || [] s3_resource.client.get_bucket_cors(bucket: @s3_bucket_name).cors_rules&.map(&:to_h) || []

View File

@ -4,17 +4,21 @@ require "aws-sdk-s3"
require "csv" require "csv"
class S3Inventory class S3Inventory
attr_reader :type, :inventory_date attr_reader :type, :inventory_date, :s3_helper
CSV_KEY_INDEX = 1 CSV_KEY_INDEX = 1
CSV_ETAG_INDEX = 2 CSV_ETAG_INDEX = 2
INVENTORY_PREFIX = "inventory" INVENTORY_PREFIX = "inventory"
INVENTORY_VERSION = "1"
INVENTORY_LAG = 2.days INVENTORY_LAG = 2.days
WAIT_AFTER_RESTORE_DAYS = 2 WAIT_AFTER_RESTORE_DAYS = 2
def initialize(s3_helper, type, preloaded_inventory_file: nil, preloaded_inventory_date: nil) def initialize(
@s3_helper = s3_helper type,
s3_inventory_bucket:,
preloaded_inventory_file: nil,
preloaded_inventory_date: nil
)
@s3_helper = S3Helper.new(s3_inventory_bucket)
if preloaded_inventory_file && preloaded_inventory_date if preloaded_inventory_file && preloaded_inventory_date
# Data preloaded, so we don't need to fetch it again # Data preloaded, so we don't need to fetch it again
@ -189,43 +193,6 @@ class S3Inventory
) )
end end
def update_bucket_policy
@s3_helper.s3_client.put_bucket_policy(
bucket: bucket_name,
policy: {
Version: "2012-10-17",
Statement: [
{
Sid: "InventoryAndAnalyticsPolicy",
Effect: "Allow",
Principal: {
Service: "s3.amazonaws.com",
},
Action: ["s3:PutObject"],
Resource: ["#{inventory_path_arn}/*"],
Condition: {
ArnLike: {
"aws:SourceArn": bucket_arn,
},
StringEquals: {
"s3:x-amz-acl": "bucket-owner-full-control",
},
},
},
],
}.to_json,
)
end
def update_bucket_inventory_configuration
@s3_helper.s3_client.put_bucket_inventory_configuration(
bucket: bucket_name,
id: inventory_id,
inventory_configuration: inventory_configuration,
use_accelerate_endpoint: false,
)
end
def prepare_for_all_sites def prepare_for_all_sites
db_names = RailsMultisite::ConnectionManagement.all_dbs db_names = RailsMultisite::ConnectionManagement.all_dbs
db_files = {} db_files = {}
@ -248,6 +215,10 @@ class S3Inventory
cleanup! cleanup!
end end
def s3_client
@s3_helper.s3_client
end
private private
def cleanup! def cleanup!
@ -318,31 +289,6 @@ class S3Inventory
end end
end end
def inventory_configuration
filter_prefix = type
filter_prefix = bucket_folder_path if bucket_folder_path.present?
{
destination: {
s3_bucket_destination: {
bucket: bucket_arn,
prefix: inventory_path,
format: "CSV",
},
},
filter: {
prefix: filter_prefix,
},
is_enabled: SiteSetting.enable_s3_inventory,
id: inventory_id,
included_object_versions: "Current",
optional_fields: ["ETag"],
schedule: {
frequency: "Daily",
},
}
end
def bucket_name def bucket_name
@s3_helper.s3_bucket_name @s3_helper.s3_bucket_name
end end
@ -353,8 +299,7 @@ class S3Inventory
def unsorted_files def unsorted_files
objects = [] objects = []
hive_path = File.join(bucket_folder_path, "hive")
hive_path = File.join(inventory_path, bucket_name, inventory_id, "hive")
@s3_helper.list(hive_path).each { |obj| objects << obj if obj.key.match?(/symlink\.txt\z/i) } @s3_helper.list(hive_path).each { |obj| objects << obj if obj.key.match?(/symlink\.txt\z/i) }
objects objects
@ -363,28 +308,6 @@ class S3Inventory
[] []
end end
def inventory_id
@inventory_id ||=
begin
id = Rails.configuration.multisite ? "original" : type # TODO: rename multisite path to "uploads"
bucket_folder_path.present? ? "#{bucket_folder_path}-#{id}" : id
end
end
def inventory_path_arn
File.join(bucket_arn, inventory_path)
end
def inventory_path
path = File.join(INVENTORY_PREFIX, INVENTORY_VERSION)
path = File.join(bucket_folder_path, path) if bucket_folder_path.present?
path
end
def bucket_arn
"arn:aws:s3:::#{bucket_name}"
end
def log(message, ex = nil) def log(message, ex = nil)
puts(message) puts(message)
Rails.logger.error("#{ex}\n" + (ex.backtrace || []).join("\n")) if ex Rails.logger.error("#{ex}\n" + (ex.backtrace || []).join("\n")) if ex

View File

@ -187,12 +187,6 @@ module SiteSettings::Validations
end end
end end
def validate_enable_s3_inventory(new_val)
if new_val == "t" && !SiteSetting.Upload.enable_s3_uploads
validate_error :enable_s3_uploads_is_required
end
end
def validate_backup_location(new_val) def validate_backup_location(new_val)
return unless new_val == BackupLocationSiteSetting::S3 return unless new_val == BackupLocationSiteSetting::S3
if SiteSetting.s3_backup_bucket.blank? if SiteSetting.s3_backup_bucket.blank?

View File

@ -3,11 +3,8 @@
RSpec.describe Jobs::EnsureS3UploadsExistence do RSpec.describe Jobs::EnsureS3UploadsExistence do
subject(:job) { described_class.new } subject(:job) { described_class.new }
context "with S3 inventory enabled" do context "when `s3_inventory_bucket` has been set" do
before do before { SiteSetting.s3_inventory_bucket = "some-bucket-name" }
setup_s3
SiteSetting.enable_s3_inventory = true
end
it "works" do it "works" do
S3Inventory.any_instance.expects(:backfill_etags_and_list_missing).once S3Inventory.any_instance.expects(:backfill_etags_and_list_missing).once
@ -15,8 +12,8 @@ RSpec.describe Jobs::EnsureS3UploadsExistence do
end end
end end
context "with S3 inventory disabled" do context "when `s3_inventory_bucket` has not been set" do
before { SiteSetting.enable_s3_inventory = false } before { SiteSetting.s3_inventory_bucket = nil }
it "doesn't execute" do it "doesn't execute" do
S3Inventory.any_instance.expects(:backfill_etags_and_list_missing).never S3Inventory.any_instance.expects(:backfill_etags_and_list_missing).never

View File

@ -1,63 +0,0 @@
# frozen_string_literal: true
require "file_store/s3_store"
RSpec.describe Jobs::UpdateS3Inventory do
before do
setup_s3
SiteSetting.s3_upload_bucket = "special-bucket"
SiteSetting.enable_s3_inventory = true
store = FileStore::S3Store.new
@client = Aws::S3::Client.new(stub_responses: true)
store.s3_helper.stubs(:s3_client).returns(@client)
Discourse.stubs(:store).returns(store)
end
it "updates the bucket policy and inventory configuration in S3" do
id = "original"
path = File.join(S3Inventory::INVENTORY_PREFIX, S3Inventory::INVENTORY_VERSION)
@client.expects(:put_bucket_policy).with(
bucket: "special-bucket",
policy:
%Q|{"Version":"2012-10-17","Statement":[{"Sid":"InventoryAndAnalyticsPolicy","Effect":"Allow","Principal":{"Service":"s3.amazonaws.com"},"Action":["s3:PutObject"],"Resource":["arn:aws:s3:::special-bucket/#{path}/*"],"Condition":{"ArnLike":{"aws:SourceArn":"arn:aws:s3:::special-bucket"},"StringEquals":{"s3:x-amz-acl":"bucket-owner-full-control"}}}]}|,
)
@client.expects(:put_bucket_inventory_configuration)
@client.expects(:put_bucket_inventory_configuration).with(
bucket: "special-bucket",
id: id,
inventory_configuration: {
destination: {
s3_bucket_destination: {
bucket: "arn:aws:s3:::special-bucket",
prefix: path,
format: "CSV",
},
},
filter: {
prefix: id,
},
is_enabled: true,
id: id,
included_object_versions: "Current",
optional_fields: ["ETag"],
schedule: {
frequency: "Daily",
},
},
use_accelerate_endpoint: false,
)
described_class.new.execute(nil)
end
it "doesn't update the policy with s3_configure_inventory_policy disabled" do
SiteSetting.s3_configure_inventory_policy = false
@client.expects(:put_bucket_policy).never
@client.expects(:put_bucket_inventory_configuration).never
described_class.new.execute(nil)
end
end

View File

@ -5,15 +5,16 @@ require "s3_inventory"
require "file_store/s3_store" require "file_store/s3_store"
RSpec.describe "S3Inventory", type: :multisite do RSpec.describe "S3Inventory", type: :multisite do
let(:client) { Aws::S3::Client.new(stub_responses: true) } let(:inventory) do
let(:helper) { S3Helper.new(SiteSetting.Upload.s3_upload_bucket.downcase, "", client: client) } S3Inventory.new(:upload, s3_inventory_bucket: "some-inventory-bucket/some/prefix")
let(:inventory) { S3Inventory.new(helper, :upload) } end
let(:csv_filename) { "#{Rails.root}/spec/fixtures/csv/s3_inventory.csv" } let(:csv_filename) { "#{Rails.root}/spec/fixtures/csv/s3_inventory.csv" }
it "can create per-site files" do it "can create per-site files" do
freeze_time freeze_time
setup_s3 setup_s3
SiteSetting.enable_s3_inventory = true
inventory.stubs(:files).returns([{ key: "Key", filename: "#{csv_filename}.gz" }]) inventory.stubs(:files).returns([{ key: "Key", filename: "#{csv_filename}.gz" }])
inventory.stubs(:cleanup!) inventory.stubs(:cleanup!)
@ -23,6 +24,7 @@ RSpec.describe "S3Inventory", type: :multisite do
expect(db1.lines.count).to eq(3) expect(db1.lines.count).to eq(3)
expect(db2.lines.count).to eq(1) expect(db2.lines.count).to eq(1)
files.values.each do |f| files.values.each do |f|
f.close f.close
f.unlink f.unlink

View File

@ -1,25 +1,19 @@
# frozen_string_literal: true # frozen_string_literal: true
require "s3_helper"
require "s3_inventory"
require "file_store/s3_store"
RSpec.describe S3Inventory do RSpec.describe S3Inventory do
let(:client) { Aws::S3::Client.new(stub_responses: true) } let(:inventory) do
let(:resource) { Aws::S3::Resource.new(client: client) } S3Inventory.new(:upload, s3_inventory_bucket: "some-inventory-bucket/inventoried-bucket/prefix")
let(:bucket) { resource.bucket(SiteSetting.Upload.s3_upload_bucket.downcase) } end
let(:helper) { S3Helper.new(bucket.name, "", client: client, bucket: bucket) }
let(:inventory) { S3Inventory.new(helper, :upload) }
let(:csv_filename) { "#{Rails.root}/spec/fixtures/csv/s3_inventory.csv" } let(:csv_filename) { "#{Rails.root}/spec/fixtures/csv/s3_inventory.csv" }
before do before do
setup_s3 inventory.s3_helper.stub_client_responses!
SiteSetting.enable_s3_inventory = true
inventory.stubs(:cleanup!) inventory.stubs(:cleanup!)
end end
it "should raise error if an inventory file is not found" do it "should raise error if an inventory file is not found" do
client.stub_responses(:list_objects, contents: []) inventory.s3_client.stub_responses(:list_objects, contents: [])
output = capture_stdout { inventory.backfill_etags_and_list_missing } output = capture_stdout { inventory.backfill_etags_and_list_missing }
expect(output).to eq("Failed to list inventory from S3\n") expect(output).to eq("Failed to list inventory from S3\n")
end end
@ -141,7 +135,7 @@ RSpec.describe S3Inventory do
end end
it "should run if inventory files are at least #{described_class::WAIT_AFTER_RESTORE_DAYS.days} days older than the last restore date" do it "should run if inventory files are at least #{described_class::WAIT_AFTER_RESTORE_DAYS.days} days older than the last restore date" do
client.stub_responses( inventory.s3_client.stub_responses(
:list_objects_v2, :list_objects_v2,
{ {
contents: [ contents: [
@ -155,16 +149,13 @@ RSpec.describe S3Inventory do
}, },
) )
client.expects(:get_object).once inventory.s3_client.expects(:get_object).once
capture_stdout do capture_stdout { inventory.backfill_etags_and_list_missing }
inventory = described_class.new(helper, :upload)
inventory.backfill_etags_and_list_missing
end
end end
it "should not run if inventory files are not at least #{described_class::WAIT_AFTER_RESTORE_DAYS.days} days older than the last restore date" do it "should not run if inventory files are not at least #{described_class::WAIT_AFTER_RESTORE_DAYS.days} days older than the last restore date" do
client.stub_responses( inventory.s3_client.stub_responses(
:list_objects_v2, :list_objects_v2,
{ {
contents: [ contents: [
@ -177,12 +168,9 @@ RSpec.describe S3Inventory do
}, },
) )
client.expects(:get_object).never inventory.s3_client.expects(:get_object).never
capture_stdout do capture_stdout { inventory.backfill_etags_and_list_missing }
inventory = described_class.new(helper, :upload)
inventory.backfill_etags_and_list_missing
end
end end
end end
@ -203,11 +191,12 @@ RSpec.describe S3Inventory do
File.open(csv_filename) do |f| File.open(csv_filename) do |f|
preloaded_inventory = preloaded_inventory =
S3Inventory.new( S3Inventory.new(
helper,
:upload, :upload,
s3_inventory_bucket: "some-inventory-bucket",
preloaded_inventory_file: f, preloaded_inventory_file: f,
preloaded_inventory_date: Time.now, preloaded_inventory_date: Time.now,
) )
preloaded_inventory.backfill_etags_and_list_missing preloaded_inventory.backfill_etags_and_list_missing
end end
end end
@ -215,27 +204,4 @@ RSpec.describe S3Inventory do
expect(output).to eq("#{upload.url}\n#{no_etag.url}\n2 of 5 uploads are missing\n") expect(output).to eq("#{upload.url}\n#{no_etag.url}\n2 of 5 uploads are missing\n")
expect(Discourse.stats.get("missing_s3_uploads")).to eq(2) expect(Discourse.stats.get("missing_s3_uploads")).to eq(2)
end end
describe "s3 inventory configuration" do
let(:bucket_name) { "s3-upload-bucket" }
let(:subfolder_path) { "subfolder" }
before { SiteSetting.s3_upload_bucket = "#{bucket_name}/#{subfolder_path}" }
it "is formatted correctly for subfolders" do
s3_helper = S3Helper.new(SiteSetting.Upload.s3_upload_bucket.downcase, "", client: client)
config = S3Inventory.new(s3_helper, :upload).send(:inventory_configuration)
expect(config[:destination][:s3_bucket_destination][:bucket]).to eq(
"arn:aws:s3:::#{bucket_name}",
)
expect(config[:destination][:s3_bucket_destination][:prefix]).to eq(
"#{subfolder_path}/inventory/1",
)
expect(config[:id]).to eq("#{subfolder_path}-original")
expect(config[:schedule][:frequency]).to eq("Daily")
expect(config[:included_object_versions]).to eq("Current")
expect(config[:optional_fields]).to eq(["ETag"])
expect(config[:filter][:prefix]).to eq(subfolder_path)
end
end
end end