discourse/lib/s3_inventory.rb

# frozen_string_literal: true

require "aws-sdk-s3"
require "csv"

class S3Inventory

  attr_reader :type, :model, :inventory_date

  CSV_KEY_INDEX ||= 1
  CSV_ETAG_INDEX ||= 2
  INVENTORY_PREFIX ||= "inventory"
  INVENTORY_VERSION ||= "1"

  def initialize(s3_helper, type)
    @s3_helper = s3_helper

    if type == :upload
      @type = "original"
      @model = Upload
    elsif type == :optimized
      @type = "optimized"
      @model = OptimizedImage
    end
  end

  def backfill_etags_and_list_missing
    if files.blank?
      error("Failed to list inventory from S3")
      return
    end

    DistributedMutex.synchronize("s3_inventory_list_missing_#{type}", validity: 30.minutes) do
      begin
        files.each do |file|
          next if File.exists?(file[:filename][0...-3])

          download_inventory_file_to_tmp_directory(file)
          decompress_inventory_file(file)
        end

        multisite_prefix = Discourse.store.upload_path
        ActiveRecord::Base.transaction do
          begin
            connection.exec("CREATE TEMP TABLE #{table_name}(url text UNIQUE, etag text, PRIMARY KEY(etag, url))")
            connection.copy_data("COPY #{table_name} FROM STDIN CSV") do
              files.each do |file|
                CSV.foreach(file[:filename][0...-3], headers: false) do |row|
                  key = row[CSV_KEY_INDEX]
                  next if Rails.configuration.multisite && key.exclude?(multisite_prefix)
                  url = File.join(Discourse.store.absolute_base_url, key)
                  connection.put_copy_data("#{url},#{row[CSV_ETAG_INDEX]}\n")
                end
              end
            end

            # backfilling etags
            connection.async_exec("UPDATE #{model.table_name}
              SET etag = #{table_name}.etag
              FROM #{table_name}
              WHERE #{model.table_name}.etag IS NULL
                AND #{model.table_name}.url = #{table_name}.url")

            list_missing_post_uploads if type == "original"

            uploads = (model == Upload) ? model.by_users.where("updated_at < ?", inventory_date) : model
            missing_uploads = uploads
              .joins("LEFT JOIN #{table_name} ON #{table_name}.etag = #{model.table_name}.etag")
              .where("#{table_name}.etag IS NULL AND #{model.table_name}.etag IS NOT NULL")

            if (missing_count = missing_uploads.count) > 0
              missing_uploads.select(:id, :url).find_each do |upload|
                log upload.url
              end

              log "#{missing_count} of #{uploads.count} #{model.name.underscore.pluralize} are missing"
            end

            Discourse.stats.set("missing_s3_#{model.table_name}", missing_count)
          ensure
            connection.exec("DROP TABLE #{table_name}") unless connection.nil?
          end
        end
      ensure
        cleanup!
      end
    end
  end

  def list_missing_post_uploads
    log "Listing missing post uploads..."

    missing = Post.find_missing_uploads(include_local_upload: false) do |post, _, _, sha1|
      next if sha1.blank?

      upload_id = nil
      result = connection.exec("SELECT * FROM #{table_name} WHERE url LIKE '%original/%/#{sha1}%'")

      if result.count >= 1
        begin
          url = result[0]["url"]
          key = url.sub(/^#{Discourse.store.absolute_base_url}\//, "")
          data = @s3_helper.object(key).data
          filename = (data.content_disposition&.match(/filename=\"(.*)\"/) || [])[1]

          upload = Upload.new(
            user_id: Discourse.system_user.id,
            original_filename: filename || File.basename(key),
            filesize: data.content_length,
            url: url,
            sha1: sha1,
            etag: result[0]["etag"]
          )
          upload.save!(validate: false)
          upload_id = upload.id
          post.link_post_uploads
        rescue Aws::S3::Errors::NotFound
          next
        end
      end

      upload_id
    end

    Discourse.stats.set("missing_post_uploads", missing[:count])
    log "#{missing[:count]} post uploads are missing."
  end

  def download_inventory_file_to_tmp_directory(file)
    return if File.exists?(file[:filename])

    log "Downloading inventory file '#{file[:key]}' to tmp directory..."
    failure_message = "Failed to inventory file '#{file[:key]}' to tmp directory."

    @s3_helper.download_file(file[:key], file[:filename], failure_message)
  end

  def decompress_inventory_file(file)
    log "Decompressing inventory file '#{file[:filename]}', this may take a while..."
    Discourse::Utils.execute_command('gzip', '--decompress', file[:filename], failure_message: "Failed to decompress inventory file '#{file[:filename]}'.", chdir: tmp_directory)
  end

  def update_bucket_policy
    @s3_helper.s3_client.put_bucket_policy(
      bucket: bucket_name,
      policy: {
        "Version": "2012-10-17",
        "Statement": [
          {
            "Sid": "InventoryAndAnalyticsPolicy",
            "Effect": "Allow",
            "Principal": { "Service": "s3.amazonaws.com" },
            "Action": ["s3:PutObject"],
            "Resource": ["#{inventory_path_arn}/*"],
            "Condition": {
              "ArnLike": {
                "aws:SourceArn": bucket_arn
              },
              "StringEquals": {
                "s3:x-amz-acl": "bucket-owner-full-control"
              }
            }
          }
        ]
      }.to_json
    )
  end

  def update_bucket_inventory_configuration
    @s3_helper.s3_client.put_bucket_inventory_configuration(
      bucket: bucket_name,
      id: inventory_id,
      inventory_configuration: inventory_configuration,
      use_accelerate_endpoint: false
    )
  end

  private

  def cleanup!
    files.each do |file|
      File.delete(file[:filename]) if File.exists?(file[:filename])
      File.delete(file[:filename][0...-3]) if File.exists?(file[:filename][0...-3])
    end
  end

  def connection
    @connection ||= ActiveRecord::Base.connection.raw_connection
  end

  def table_name
    "#{type}_inventory"
  end

  def files
    @files ||= begin
      symlink_file = unsorted_files.sort_by { |file| -file.last_modified.to_i }.first
      return [] if symlink_file.blank?

      @inventory_date = symlink_file.last_modified - 1.day
      log "Downloading symlink file to tmp directory..."
      failure_message = "Failed to download symlink file to tmp directory."
      filename = File.join(tmp_directory, File.basename(symlink_file.key))

      @s3_helper.download_file(symlink_file.key, filename, failure_message)
      File.readlines(filename).map do |key|
        key = key.sub("s3://#{bucket_name}/", "").sub("\n", "")
        { key: key, filename: File.join(tmp_directory, File.basename(key)) }
      end
    end
  end

  def tmp_directory
    @tmp_directory ||= begin
      current_db = RailsMultisite::ConnectionManagement.current_db
      directory = File.join(Rails.root, "tmp", INVENTORY_PREFIX, current_db)
      FileUtils.mkdir_p(directory)
      directory
    end
  end

  def inventory_configuration
    filter_prefix = type
    filter_prefix = File.join(bucket_folder_path, filter_prefix) if bucket_folder_path.present?

    {
      destination: {
        s3_bucket_destination: {
          bucket: bucket_arn,
          prefix: inventory_path,
          format: "CSV"
        }
      },
      filter: {
        prefix: filter_prefix
      },
      is_enabled: SiteSetting.enable_s3_inventory,
      id: inventory_id,
      included_object_versions: "Current",
      optional_fields: ["ETag"],
      schedule: {
        frequency: "Daily"
      }
    }
  end

  def bucket_name
    @s3_helper.s3_bucket_name
  end

  def bucket_folder_path
    @s3_helper.s3_bucket_folder_path
  end

  def unsorted_files
    objects = []

    hive_path = File.join(inventory_path, bucket_name, inventory_id, "hive")
    @s3_helper.list(hive_path).each do |obj|
      if obj.key.match?(/symlink\.txt$/i)
        objects << obj
      end
    end

    objects
  rescue Aws::Errors::ServiceError => e
    log("Failed to list inventory from S3", e)
    []
  end

  def inventory_id
    @inventory_id ||= begin
      id = Rails.configuration.multisite ? "original" : type  # TODO: rename multisite path to "uploads"
      bucket_folder_path.present? ? "#{bucket_folder_path}-#{id}" : id
    end
  end

  def inventory_path_arn
    File.join(bucket_arn, inventory_path)
  end

  def inventory_path
    path = File.join(INVENTORY_PREFIX, INVENTORY_VERSION)
    path = File.join(bucket_folder_path, path) if bucket_folder_path.present?
    path
  end

  def bucket_arn
    "arn:aws:s3:::#{bucket_name}"
  end

  def log(message, ex = nil)
    puts(message)
    Rails.logger.error("#{ex}\n" + (ex.backtrace || []).join("\n")) if ex
  end

  def error(message)
    log(message, StandardError.new(message))
  end
end
FEATURE: Use amazon s3 inventory to manage upload stats (#6867) 2019-02-01 12:40:48 +08:00			`# frozen_string_literal: true`

			`require "aws-sdk-s3"`
			`require "csv"`

			`class S3Inventory`

DEV: Add bucket folder path to inventory id 2019-05-02 07:05:35 +08:00			`attr_reader :type, :model, :inventory_date`
FEATURE: Use amazon s3 inventory to manage upload stats (#6867) 2019-02-01 12:40:48 +08:00
			`CSV_KEY_INDEX \|\|= 1`
			`CSV_ETAG_INDEX \|\|= 2`
			`INVENTORY_PREFIX \|\|= "inventory"`
FIX: Bucket name is missing in S3 inventory data path 2019-02-06 21:46:08 +08:00			`INVENTORY_VERSION \|\|= "1"`
FEATURE: Use amazon s3 inventory to manage upload stats (#6867) 2019-02-01 12:40:48 +08:00
			`def initialize(s3_helper, type)`
			`@s3_helper = s3_helper`

			`if type == :upload`
DEV: Add bucket folder path to inventory id 2019-05-02 07:05:35 +08:00			`@type = "original"`
FEATURE: Use amazon s3 inventory to manage upload stats (#6867) 2019-02-01 12:40:48 +08:00			`@model = Upload`
			`elsif type == :optimized`
DEV: Add bucket folder path to inventory id 2019-05-02 07:05:35 +08:00			`@type = "optimized"`
FEATURE: Use amazon s3 inventory to manage upload stats (#6867) 2019-02-01 12:40:48 +08:00			`@model = OptimizedImage`
			`end`
			`end`

DEV: Add 'backfill_etags_' to the method name since it also backfilling the etags 2019-02-20 00:24:35 +08:00			`def backfill_etags_and_list_missing`
FIX: S3 inventory data can be splitted into multiple csv files 2019-02-14 06:11:52 +08:00			`if files.blank?`
FEATURE: Use amazon s3 inventory to manage upload stats (#6867) 2019-02-01 12:40:48 +08:00			`error("Failed to list inventory from S3")`
			`return`
			`end`

FIX: hold s3 related distributed locks longer These operations are pretty expensive and can take multiple minutes due to networking. Hold distributed mutex for much longer. 2019-08-15 09:48:08 +08:00			`DistributedMutex.synchronize("s3_inventory_list_missing_#{type}", validity: 30.minutes) do`
FIX: remove the tmp inventory files after the s3 uploads check. 2019-08-13 13:59:31 +08:00			`begin`
			`files.each do \|file\|`
			`next if File.exists?(file[:filename][0...-3])`
FEATURE: Use amazon s3 inventory to manage upload stats (#6867) 2019-02-01 12:40:48 +08:00
FIX: remove the tmp inventory files after the s3 uploads check. 2019-08-13 13:59:31 +08:00			`download_inventory_file_to_tmp_directory(file)`
			`decompress_inventory_file(file)`
			`end`

FIX: parallel spec system needs a dedicated upload folder for each worker. (#8547) 2019-12-18 13:51:57 +08:00			`multisite_prefix = Discourse.store.upload_path`
FIX: remove the tmp inventory files after the s3 uploads check. 2019-08-13 13:59:31 +08:00			`ActiveRecord::Base.transaction do`
			`begin`
			`connection.exec("CREATE TEMP TABLE #{table_name}(url text UNIQUE, etag text, PRIMARY KEY(etag, url))")`
			`connection.copy_data("COPY #{table_name} FROM STDIN CSV") do`
			`files.each do \|file\|`
			`CSV.foreach(file[:filename][0...-3], headers: false) do \|row\|`
			`key = row[CSV_KEY_INDEX]`
			`next if Rails.configuration.multisite && key.exclude?(multisite_prefix)`
			`url = File.join(Discourse.store.absolute_base_url, key)`
			`connection.put_copy_data("#{url},#{row[CSV_ETAG_INDEX]}\n")`
			`end`
FIX: Create and consume temp table inside a transaction (#7030) To prevent access issue in pgbouncer which runs in transaction pooling 2019-02-20 10:52:40 +08:00			`end`
FIX: S3 inventory data can be splitted into multiple csv files 2019-02-14 06:11:52 +08:00			`end`
FEATURE: Use amazon s3 inventory to manage upload stats (#6867) 2019-02-01 12:40:48 +08:00
FIX: remove the tmp inventory files after the s3 uploads check. 2019-08-13 13:59:31 +08:00			`# backfilling etags`
			`connection.async_exec("UPDATE #{model.table_name}`
			`SET etag = #{table_name}.etag`
			`FROM #{table_name}`
			`WHERE #{model.table_name}.etag IS NULL`
			`AND #{model.table_name}.url = #{table_name}.url")`
Update rake task to backfill etags from s3 inventory 2019-02-14 07:48:06 +08:00
FIX: remove the tmp inventory files after the s3 uploads check. 2019-08-13 13:59:31 +08:00			`list_missing_post_uploads if type == "original"`
List and restore missing post uploads from S3 inventory. 2019-05-04 03:46:20 +08:00
FIX: Use updated_at in the S3 inventory job (#8823) When we change upload's sha1 (e.g. when resizing images) it won't match the data in the most recent S3 inventory index. With this change the uploads that have been updated since the inventory has been generated are ignored. 2020-01-31 18:02:44 +08:00			`uploads = (model == Upload) ? model.by_users.where("updated_at < ?", inventory_date) : model`
FIX: remove the tmp inventory files after the s3 uploads check. 2019-08-13 13:59:31 +08:00			`missing_uploads = uploads`
			`.joins("LEFT JOIN #{table_name} ON #{table_name}.etag = #{model.table_name}.etag")`
			`.where("#{table_name}.etag IS NULL AND #{model.table_name}.etag IS NOT NULL")`
FEATURE: Use amazon s3 inventory to manage upload stats (#6867) 2019-02-01 12:40:48 +08:00
FIX: remove the tmp inventory files after the s3 uploads check. 2019-08-13 13:59:31 +08:00			`if (missing_count = missing_uploads.count) > 0`
			`missing_uploads.select(:id, :url).find_each do \|upload\|`
			`log upload.url`
			`end`

			`log "#{missing_count} of #{uploads.count} #{model.name.underscore.pluralize} are missing"`
FIX: Create and consume temp table inside a transaction (#7030) To prevent access issue in pgbouncer which runs in transaction pooling 2019-02-20 10:52:40 +08:00			`end`
FEATURE: Use amazon s3 inventory to manage upload stats (#6867) 2019-02-01 12:40:48 +08:00
FIX: remove the tmp inventory files after the s3 uploads check. 2019-08-13 13:59:31 +08:00			`Discourse.stats.set("missing_s3_#{model.table_name}", missing_count)`
			`ensure`
			`connection.exec("DROP TABLE #{table_name}") unless connection.nil?`
FIX: Create and consume temp table inside a transaction (#7030) To prevent access issue in pgbouncer which runs in transaction pooling 2019-02-20 10:52:40 +08:00			`end`
FEATURE: Use amazon s3 inventory to manage upload stats (#6867) 2019-02-01 12:40:48 +08:00			`end`
FIX: remove the tmp inventory files after the s3 uploads check. 2019-08-13 13:59:31 +08:00			`ensure`
			`cleanup!`
FEATURE: Use amazon s3 inventory to manage upload stats (#6867) 2019-02-01 12:40:48 +08:00			`end`
			`end`
			`end`

List and restore missing post uploads from S3 inventory. 2019-05-04 03:46:20 +08:00			`def list_missing_post_uploads`
			`log "Listing missing post uploads..."`

Revert "DEV: Remove unused kwarg and properly check for local missing uploads." This reverts commit 97769f3d0226642905cf0605aeb0bc69d7295ca1. The code is confusing but this change is quite risky. Defer for now until we can look at it properly. 2019-07-29 14:35:34 +08:00			`missing = Post.find_missing_uploads(include_local_upload: false) do \|post, _, _, sha1\|`
List and restore missing post uploads from S3 inventory. 2019-05-04 03:46:20 +08:00			`next if sha1.blank?`

			`upload_id = nil`
PERF: use url instead of file key in temporary inventory table. 2019-06-14 00:33:58 +08:00			`result = connection.exec("SELECT * FROM #{table_name} WHERE url LIKE '%original/%/#{sha1}%'")`
List and restore missing post uploads from S3 inventory. 2019-05-04 03:46:20 +08:00
Fix typo in condition value. 2019-05-07 19:39:08 +08:00			`if result.count >= 1`
FIX: skip upload recovery if file not found in s3 2019-05-21 02:36:36 +08:00			`begin`
PERF: use url instead of file key in temporary inventory table. 2019-06-14 00:33:58 +08:00			`url = result[0]["url"]`
			`key = url.sub(/^#{Discourse.store.absolute_base_url}\//, "")`
FIX: skip upload recovery if file not found in s3 2019-05-21 02:36:36 +08:00			`data = @s3_helper.object(key).data`
			`filename = (data.content_disposition&.match(/filename=\"(.*)\"/) \|\| [])[1]`

			`upload = Upload.new(`
			`user_id: Discourse.system_user.id,`
			`original_filename: filename \|\| File.basename(key),`
			`filesize: data.content_length,`
PERF: use url instead of file key in temporary inventory table. 2019-06-14 00:33:58 +08:00			`url: url,`
FIX: skip upload recovery if file not found in s3 2019-05-21 02:36:36 +08:00			`sha1: sha1,`
			`etag: result[0]["etag"]`
			`)`
			`upload.save!(validate: false)`
			`upload_id = upload.id`
FIX: remove post upload record creation inside 'find_missing_uploads' method. 2019-07-19 04:14:08 +08:00			`post.link_post_uploads`
FIX: skip upload recovery if file not found in s3 2019-05-21 02:36:36 +08:00			`rescue Aws::S3::Errors::NotFound`
			`next`
			`end`
List and restore missing post uploads from S3 inventory. 2019-05-04 03:46:20 +08:00			`end`

			`upload_id`
			`end`

			`Discourse.stats.set("missing_post_uploads", missing[:count])`
			`log "#{missing[:count]} post uploads are missing."`
			`end`

FIX: remove the tmp inventory files after the s3 uploads check. 2019-08-13 13:59:31 +08:00			`def download_inventory_file_to_tmp_directory(file)`
			`return if File.exists?(file[:filename])`
FIX: use same id for both original & optimized inventories in multisite setup. 2019-07-25 16:46:47 +08:00
FIX: remove the tmp inventory files after the s3 uploads check. 2019-08-13 13:59:31 +08:00			`log "Downloading inventory file '#{file[:key]}' to tmp directory..."`
			`failure_message = "Failed to inventory file '#{file[:key]}' to tmp directory."`
FEATURE: Use amazon s3 inventory to manage upload stats (#6867) 2019-02-01 12:40:48 +08:00
FIX: remove the tmp inventory files after the s3 uploads check. 2019-08-13 13:59:31 +08:00			`@s3_helper.download_file(file[:key], file[:filename], failure_message)`
FEATURE: Use amazon s3 inventory to manage upload stats (#6867) 2019-02-01 12:40:48 +08:00			`end`

FIX: remove the tmp inventory files after the s3 uploads check. 2019-08-13 13:59:31 +08:00			`def decompress_inventory_file(file)`
			`log "Decompressing inventory file '#{file[:filename]}', this may take a while..."`
			`Discourse::Utils.execute_command('gzip', '--decompress', file[:filename], failure_message: "Failed to decompress inventory file '#{file[:filename]}'.", chdir: tmp_directory)`
FEATURE: Use amazon s3 inventory to manage upload stats (#6867) 2019-02-01 12:40:48 +08:00			`end`

			`def update_bucket_policy`
			`@s3_helper.s3_client.put_bucket_policy(`
			`bucket: bucket_name,`
			`policy: {`
			`"Version": "2012-10-17",`
			`"Statement": [`
			`{`
			`"Sid": "InventoryAndAnalyticsPolicy",`
			`"Effect": "Allow",`
			`"Principal": { "Service": "s3.amazonaws.com" },`
			`"Action": ["s3:PutObject"],`
FIX: Bucket name is missing in S3 inventory data path 2019-02-06 21:46:08 +08:00			`"Resource": ["#{inventory_path_arn}/*"],`
FEATURE: Use amazon s3 inventory to manage upload stats (#6867) 2019-02-01 12:40:48 +08:00			`"Condition": {`
			`"ArnLike": {`
FIX: Bucket name is missing in S3 inventory data path 2019-02-06 21:46:08 +08:00			`"aws:SourceArn": bucket_arn`
FEATURE: Use amazon s3 inventory to manage upload stats (#6867) 2019-02-01 12:40:48 +08:00			`},`
			`"StringEquals": {`
			`"s3:x-amz-acl": "bucket-owner-full-control"`
			`}`
			`}`
			`}`
			`]`
			`}.to_json`
			`)`
			`end`

			`def update_bucket_inventory_configuration`
			`@s3_helper.s3_client.put_bucket_inventory_configuration(`
			`bucket: bucket_name,`
			`id: inventory_id,`
			`inventory_configuration: inventory_configuration,`
			`use_accelerate_endpoint: false`
			`)`
			`end`

			`private`

FIX: remove the tmp inventory files after the s3 uploads check. 2019-08-13 13:59:31 +08:00			`def cleanup!`
			`files.each do \|file\|`
			`File.delete(file[:filename]) if File.exists?(file[:filename])`
			`File.delete(file[:filename][0...-3]) if File.exists?(file[:filename][0...-3])`
			`end`
			`end`

List and restore missing post uploads from S3 inventory. 2019-05-04 03:46:20 +08:00			`def connection`
			`@connection \|\|= ActiveRecord::Base.connection.raw_connection`
			`end`

			`def table_name`
			`"#{type}_inventory"`
			`end`

FIX: S3 inventory data can be splitted into multiple csv files 2019-02-14 06:11:52 +08:00			`def files`
			`@files \|\|= begin`
			`symlink_file = unsorted_files.sort_by { \|file\| -file.last_modified.to_i }.first`
			`return [] if symlink_file.blank?`

FIX: run the rake task only for uploads created before a day from inventory date 2019-02-14 20:23:08 +08:00			`@inventory_date = symlink_file.last_modified - 1.day`
FIX: S3 inventory data can be splitted into multiple csv files 2019-02-14 06:11:52 +08:00			`log "Downloading symlink file to tmp directory..."`
			`failure_message = "Failed to download symlink file to tmp directory."`
			`filename = File.join(tmp_directory, File.basename(symlink_file.key))`

			`@s3_helper.download_file(symlink_file.key, filename, failure_message)`
			`File.readlines(filename).map do \|key\|`
Fix typo 2019-02-14 08:26:30 +08:00			`key = key.sub("s3://#{bucket_name}/", "").sub("\n", "")`
FIX: S3 inventory data can be splitted into multiple csv files 2019-02-14 06:11:52 +08:00			`{ key: key, filename: File.join(tmp_directory, File.basename(key)) }`
			`end`
			`end`
			`end`

			`def tmp_directory`
			`@tmp_directory \|\|= begin`
			`current_db = RailsMultisite::ConnectionManagement.current_db`
FIX: remove the tmp inventory files after the s3 uploads check. 2019-08-13 13:59:31 +08:00			`directory = File.join(Rails.root, "tmp", INVENTORY_PREFIX, current_db)`
FIX: S3 inventory data can be splitted into multiple csv files 2019-02-14 06:11:52 +08:00			`FileUtils.mkdir_p(directory)`
			`directory`
			`end`
			`end`

FEATURE: Use amazon s3 inventory to manage upload stats (#6867) 2019-02-01 12:40:48 +08:00			`def inventory_configuration`
DEV: Add bucket folder path to inventory id 2019-05-02 07:05:35 +08:00			`filter_prefix = type`
FIX: Destination prefix in S3 inventory configuration is incorrect 2019-02-06 23:21:28 +08:00			`filter_prefix = File.join(bucket_folder_path, filter_prefix) if bucket_folder_path.present?`
FEATURE: Use amazon s3 inventory to manage upload stats (#6867) 2019-02-01 12:40:48 +08:00
			`{`
			`destination: {`
			`s3_bucket_destination: {`
FIX: Bucket name is missing in S3 inventory data path 2019-02-06 21:46:08 +08:00			`bucket: bucket_arn,`
FIX: Destination prefix in S3 inventory configuration is incorrect 2019-02-06 23:21:28 +08:00			`prefix: inventory_path,`
FEATURE: Use amazon s3 inventory to manage upload stats (#6867) 2019-02-01 12:40:48 +08:00			`format: "CSV"`
			`}`
			`},`
			`filter: {`
			`prefix: filter_prefix`
			`},`
			`is_enabled: SiteSetting.enable_s3_inventory,`
			`id: inventory_id,`
			`included_object_versions: "Current",`
			`optional_fields: ["ETag"],`
			`schedule: {`
			`frequency: "Daily"`
			`}`
			`}`
			`end`

			`def bucket_name`
			`@s3_helper.s3_bucket_name`
			`end`

			`def bucket_folder_path`
			`@s3_helper.s3_bucket_folder_path`
			`end`

			`def unsorted_files`
			`objects = []`

FIX: S3 inventory data can be splitted into multiple csv files 2019-02-14 06:11:52 +08:00			`hive_path = File.join(inventory_path, bucket_name, inventory_id, "hive")`
			`@s3_helper.list(hive_path).each do \|obj\|`
			`if obj.key.match?(/symlink\.txt$/i)`
FEATURE: Use amazon s3 inventory to manage upload stats (#6867) 2019-02-01 12:40:48 +08:00			`objects << obj`
			`end`
			`end`

			`objects`
			`rescue Aws::Errors::ServiceError => e`
			`log("Failed to list inventory from S3", e)`
DEV: `S3Inventory#unsorted_files` should always return an array (#8034) 2019-08-23 15:59:31 +08:00			`[]`
FEATURE: Use amazon s3 inventory to manage upload stats (#6867) 2019-02-01 12:40:48 +08:00			`end`

DEV: Add bucket folder path to inventory id 2019-05-02 07:05:35 +08:00			`def inventory_id`
			`@inventory_id \|\|= begin`
FIX: use same id for both original & optimized inventories in multisite setup. 2019-07-25 16:46:47 +08:00			`id = Rails.configuration.multisite ? "original" : type # TODO: rename multisite path to "uploads"`
			`bucket_folder_path.present? ? "#{bucket_folder_path}-#{id}" : id`
DEV: Add bucket folder path to inventory id 2019-05-02 07:05:35 +08:00			`end`
			`end`

FIX: Bucket name is missing in S3 inventory data path 2019-02-06 21:46:08 +08:00			`def inventory_path_arn`
			`File.join(bucket_arn, inventory_path)`
			`end`

FEATURE: Use amazon s3 inventory to manage upload stats (#6867) 2019-02-01 12:40:48 +08:00			`def inventory_path`
FIX: Bucket name is missing in S3 inventory data path 2019-02-06 21:46:08 +08:00			`path = File.join(INVENTORY_PREFIX, INVENTORY_VERSION)`
			`path = File.join(bucket_folder_path, path) if bucket_folder_path.present?`
			`path`
FEATURE: Use amazon s3 inventory to manage upload stats (#6867) 2019-02-01 12:40:48 +08:00			`end`

FIX: Bucket name is missing in S3 inventory data path 2019-02-06 21:46:08 +08:00			`def bucket_arn`
			`"arn:aws:s3:::#{bucket_name}"`
FEATURE: Use amazon s3 inventory to manage upload stats (#6867) 2019-02-01 12:40:48 +08:00			`end`

			`def log(message, ex = nil)`
			`puts(message)`
			`Rails.logger.error("#{ex}\n" + (ex.backtrace \|\| []).join("\n")) if ex`
			`end`

			`def error(message)`
			`log(message, StandardError.new(message))`
			`end`
			`end`