mirror of
https://github.com/discourse/discourse.git
synced 2024-11-25 09:42:07 +08:00
FIX: S3Inventory
to ignore files older than last backup restore date (#27166)
This commit updates `S3Inventory#files` to ignore S3 inventory files which have a `last_modified` timestamp which are not at least 2 days older than `BackupMetadata.last_restore_date` timestamp. This check was previously only in `Jobs::EnsureS3UploadsExistence` but `S3Inventory` can also be used via Rake tasks so this protection needs to be in `S3Inventory` and not in the scheduled job.
This commit is contained in:
parent
a4c5f85b10
commit
df16ab0758
|
@ -4,8 +4,6 @@ module Jobs
|
||||||
class EnsureS3UploadsExistence < ::Jobs::Scheduled
|
class EnsureS3UploadsExistence < ::Jobs::Scheduled
|
||||||
every 1.day
|
every 1.day
|
||||||
|
|
||||||
WAIT_AFTER_RESTORE_HOURS = 48
|
|
||||||
|
|
||||||
def perform(*args)
|
def perform(*args)
|
||||||
super
|
super
|
||||||
ensure
|
ensure
|
||||||
|
@ -28,7 +26,7 @@ module Jobs
|
||||||
end
|
end
|
||||||
|
|
||||||
def execute(args)
|
def execute(args)
|
||||||
return if !executable?
|
return if !SiteSetting.enable_s3_inventory
|
||||||
require "s3_inventory"
|
require "s3_inventory"
|
||||||
|
|
||||||
if !@db_inventories && Rails.configuration.multisite && GlobalSetting.use_s3?
|
if !@db_inventories && Rails.configuration.multisite && GlobalSetting.use_s3?
|
||||||
|
@ -48,15 +46,5 @@ module Jobs
|
||||||
S3Inventory.new(s3_helper, :upload).backfill_etags_and_list_missing
|
S3Inventory.new(s3_helper, :upload).backfill_etags_and_list_missing
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def executable?
|
|
||||||
return false if !SiteSetting.enable_s3_inventory
|
|
||||||
|
|
||||||
if last_restore_date = BackupMetadata.last_restore_date
|
|
||||||
return false if last_restore_date > WAIT_AFTER_RESTORE_HOURS.hours.ago
|
|
||||||
end
|
|
||||||
|
|
||||||
true
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -27,6 +27,7 @@ class S3Helper
|
||||||
|
|
||||||
def initialize(s3_bucket_name, tombstone_prefix = "", options = {})
|
def initialize(s3_bucket_name, tombstone_prefix = "", options = {})
|
||||||
@s3_client = options.delete(:client)
|
@s3_client = options.delete(:client)
|
||||||
|
@s3_bucket = options.delete(:bucket)
|
||||||
@s3_options = default_s3_options.merge(options)
|
@s3_options = default_s3_options.merge(options)
|
||||||
|
|
||||||
@s3_bucket_name, @s3_bucket_folder_path =
|
@s3_bucket_name, @s3_bucket_folder_path =
|
||||||
|
|
|
@ -6,11 +6,12 @@ require "csv"
|
||||||
class S3Inventory
|
class S3Inventory
|
||||||
attr_reader :type, :model, :inventory_date
|
attr_reader :type, :model, :inventory_date
|
||||||
|
|
||||||
CSV_KEY_INDEX ||= 1
|
CSV_KEY_INDEX = 1
|
||||||
CSV_ETAG_INDEX ||= 2
|
CSV_ETAG_INDEX = 2
|
||||||
INVENTORY_PREFIX ||= "inventory"
|
INVENTORY_PREFIX = "inventory"
|
||||||
INVENTORY_VERSION ||= "1"
|
INVENTORY_VERSION = "1"
|
||||||
INVENTORY_LAG ||= 2.days
|
INVENTORY_LAG = 2.days
|
||||||
|
WAIT_AFTER_RESTORE_DAYS = 2
|
||||||
|
|
||||||
def initialize(s3_helper, type, preloaded_inventory_file: nil, preloaded_inventory_date: nil)
|
def initialize(s3_helper, type, preloaded_inventory_file: nil, preloaded_inventory_date: nil)
|
||||||
@s3_helper = s3_helper
|
@s3_helper = s3_helper
|
||||||
|
@ -41,11 +42,13 @@ class S3Inventory
|
||||||
download_and_decompress_files if !@preloaded_inventory_file
|
download_and_decompress_files if !@preloaded_inventory_file
|
||||||
|
|
||||||
multisite_prefix = Discourse.store.upload_path
|
multisite_prefix = Discourse.store.upload_path
|
||||||
|
|
||||||
ActiveRecord::Base.transaction do
|
ActiveRecord::Base.transaction do
|
||||||
begin
|
begin
|
||||||
connection.exec(
|
connection.exec(
|
||||||
"CREATE TEMP TABLE #{table_name}(url text UNIQUE, etag text, PRIMARY KEY(etag, url))",
|
"CREATE TEMP TABLE #{table_name}(url text UNIQUE, etag text, PRIMARY KEY(etag, url))",
|
||||||
)
|
)
|
||||||
|
|
||||||
connection.copy_data("COPY #{table_name} FROM STDIN CSV") do
|
connection.copy_data("COPY #{table_name} FROM STDIN CSV") do
|
||||||
for_each_inventory_row do |row|
|
for_each_inventory_row do |row|
|
||||||
key = row[CSV_KEY_INDEX]
|
key = row[CSV_KEY_INDEX]
|
||||||
|
@ -258,17 +261,28 @@ class S3Inventory
|
||||||
|
|
||||||
def files
|
def files
|
||||||
return if @preloaded_inventory_file
|
return if @preloaded_inventory_file
|
||||||
|
|
||||||
@files ||=
|
@files ||=
|
||||||
begin
|
begin
|
||||||
symlink_file = unsorted_files.sort_by { |file| -file.last_modified.to_i }.first
|
symlink_file = unsorted_files.sort_by { |file| -file.last_modified.to_i }.first
|
||||||
|
|
||||||
return [] if symlink_file.blank?
|
return [] if symlink_file.blank?
|
||||||
|
|
||||||
|
if BackupMetadata.last_restore_date.present? &&
|
||||||
|
(symlink_file.last_modified - WAIT_AFTER_RESTORE_DAYS.days) <
|
||||||
|
BackupMetadata.last_restore_date
|
||||||
|
return []
|
||||||
|
end
|
||||||
|
|
||||||
@inventory_date = symlink_file.last_modified - INVENTORY_LAG
|
@inventory_date = symlink_file.last_modified - INVENTORY_LAG
|
||||||
log "Downloading symlink file to tmp directory..."
|
log "Downloading symlink file to tmp directory..."
|
||||||
failure_message = "Failed to download symlink file to tmp directory."
|
failure_message = "Failed to download symlink file to tmp directory."
|
||||||
filename = File.join(tmp_directory, File.basename(symlink_file.key))
|
filename = File.join(tmp_directory, File.basename(symlink_file.key))
|
||||||
|
|
||||||
@s3_helper.download_file(symlink_file.key, filename, failure_message)
|
@s3_helper.download_file(symlink_file.key, filename, failure_message)
|
||||||
|
|
||||||
|
return [] if !File.exist?(filename)
|
||||||
|
|
||||||
File
|
File
|
||||||
.readlines(filename)
|
.readlines(filename)
|
||||||
.map do |key|
|
.map do |key|
|
||||||
|
|
|
@ -13,20 +13,6 @@ RSpec.describe Jobs::EnsureS3UploadsExistence do
|
||||||
S3Inventory.any_instance.expects(:backfill_etags_and_list_missing).once
|
S3Inventory.any_instance.expects(:backfill_etags_and_list_missing).once
|
||||||
job.execute({})
|
job.execute({})
|
||||||
end
|
end
|
||||||
|
|
||||||
it "doesn't execute when the site was restored within the last 48 hours" do
|
|
||||||
S3Inventory.any_instance.expects(:backfill_etags_and_list_missing).never
|
|
||||||
BackupMetadata.update_last_restore_date(47.hours.ago)
|
|
||||||
|
|
||||||
job.execute({})
|
|
||||||
end
|
|
||||||
|
|
||||||
it "executes when the site was restored more than 48 hours ago" do
|
|
||||||
S3Inventory.any_instance.expects(:backfill_etags_and_list_missing).once
|
|
||||||
BackupMetadata.update_last_restore_date(49.hours.ago)
|
|
||||||
|
|
||||||
job.execute({})
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
context "with S3 inventory disabled" do
|
context "with S3 inventory disabled" do
|
||||||
|
|
|
@ -4,53 +4,17 @@ require "s3_helper"
|
||||||
require "s3_inventory"
|
require "s3_inventory"
|
||||||
require "file_store/s3_store"
|
require "file_store/s3_store"
|
||||||
|
|
||||||
RSpec.describe "S3Inventory" do
|
RSpec.describe S3Inventory do
|
||||||
let(:client) { Aws::S3::Client.new(stub_responses: true) }
|
let(:client) { Aws::S3::Client.new(stub_responses: true) }
|
||||||
let(:helper) { S3Helper.new(SiteSetting.Upload.s3_upload_bucket.downcase, "", client: client) }
|
let(:resource) { Aws::S3::Resource.new(client: client) }
|
||||||
|
let(:bucket) { resource.bucket(SiteSetting.Upload.s3_upload_bucket.downcase) }
|
||||||
|
let(:helper) { S3Helper.new(bucket.name, "", client: client, bucket: bucket) }
|
||||||
let(:inventory) { S3Inventory.new(helper, :upload) }
|
let(:inventory) { S3Inventory.new(helper, :upload) }
|
||||||
let(:csv_filename) { "#{Rails.root}/spec/fixtures/csv/s3_inventory.csv" }
|
let(:csv_filename) { "#{Rails.root}/spec/fixtures/csv/s3_inventory.csv" }
|
||||||
|
|
||||||
before do
|
before do
|
||||||
setup_s3
|
setup_s3
|
||||||
SiteSetting.enable_s3_inventory = true
|
SiteSetting.enable_s3_inventory = true
|
||||||
|
|
||||||
client.stub_responses(
|
|
||||||
:list_objects,
|
|
||||||
->(context) do
|
|
||||||
expect(context.params[:prefix]).to eq(
|
|
||||||
"#{S3Inventory::INVENTORY_PREFIX}/#{S3Inventory::INVENTORY_VERSION}/bucket/original/hive",
|
|
||||||
)
|
|
||||||
|
|
||||||
{
|
|
||||||
contents: [
|
|
||||||
{
|
|
||||||
etag: "\"70ee1738b6b21e2c8a43f3a5ab0eee71\"",
|
|
||||||
key: "example1.csv.gz",
|
|
||||||
last_modified: Time.parse("2014-11-21T19:40:05.000Z"),
|
|
||||||
owner: {
|
|
||||||
display_name: "myname",
|
|
||||||
id: "12345example25102679df27bb0ae12b3f85be6f290b936c4393484be31bebcc",
|
|
||||||
},
|
|
||||||
size: 11,
|
|
||||||
storage_class: "STANDARD",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
etag: "\"9c8af9a76df052144598c115ef33e511\"",
|
|
||||||
key: "example2.csv.gz",
|
|
||||||
last_modified: Time.parse("2013-11-15T01:10:49.000Z"),
|
|
||||||
owner: {
|
|
||||||
display_name: "myname",
|
|
||||||
id: "12345example25102679df27bb0ae12b3f85be6f290b936c4393484be31bebcc",
|
|
||||||
},
|
|
||||||
size: 713_193,
|
|
||||||
storage_class: "STANDARD",
|
|
||||||
},
|
|
||||||
],
|
|
||||||
next_marker: "eyJNYXJrZXIiOiBudWxsLCAiYm90b190cnVuY2F0ZV9hbW91bnQiOiAyfQ==",
|
|
||||||
}
|
|
||||||
end,
|
|
||||||
)
|
|
||||||
|
|
||||||
inventory.stubs(:cleanup!)
|
inventory.stubs(:cleanup!)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -162,6 +126,58 @@ RSpec.describe "S3Inventory" do
|
||||||
expect(Upload.by_users.order(:url).pluck(:url, :etag)).to eq(files)
|
expect(Upload.by_users.order(:url).pluck(:url, :etag)).to eq(files)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
context "when site was restored from a backup" do
|
||||||
|
before do
|
||||||
|
freeze_time
|
||||||
|
BackupMetadata.update_last_restore_date(Time.now)
|
||||||
|
end
|
||||||
|
|
||||||
|
it "should run if inventory files are at least #{described_class::WAIT_AFTER_RESTORE_DAYS.days} days older than the last restore date" do
|
||||||
|
client.stub_responses(
|
||||||
|
:list_objects_v2,
|
||||||
|
{
|
||||||
|
contents: [
|
||||||
|
{
|
||||||
|
key: "symlink.txt",
|
||||||
|
last_modified:
|
||||||
|
BackupMetadata.last_restore_date + described_class::WAIT_AFTER_RESTORE_DAYS.days,
|
||||||
|
size: 1,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
client.expects(:get_object).once
|
||||||
|
|
||||||
|
capture_stdout do
|
||||||
|
inventory = described_class.new(helper, :upload)
|
||||||
|
inventory.backfill_etags_and_list_missing
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
it "should not run if inventory files are not at least #{described_class::WAIT_AFTER_RESTORE_DAYS.days} days older than the last restore date" do
|
||||||
|
client.stub_responses(
|
||||||
|
:list_objects_v2,
|
||||||
|
{
|
||||||
|
contents: [
|
||||||
|
{
|
||||||
|
key: "symlink.txt",
|
||||||
|
last_modified: BackupMetadata.last_restore_date + 1.day,
|
||||||
|
size: 1,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
client.expects(:get_object).never
|
||||||
|
|
||||||
|
capture_stdout do
|
||||||
|
inventory = described_class.new(helper, :upload)
|
||||||
|
inventory.backfill_etags_and_list_missing
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
it "should work when passed preloaded data" do
|
it "should work when passed preloaded data" do
|
||||||
freeze_time
|
freeze_time
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user