discourse/lib/upload_recovery.rb

# frozen_string_literal: true

class UploadRecovery
  def initialize(dry_run: false)
    @dry_run = dry_run
  end

  def recover(posts = Post)
    posts.have_uploads.find_each do |post|

      begin
        analyzer = PostAnalyzer.new(post.raw, post.topic_id)

        analyzer.cooked_stripped.css("img", "a").each do |media|
          if media.name == "img" && orig_src = media["data-orig-src"]
            if dom_class = media["class"]
              if (Post.white_listed_image_classes & dom_class.split).count > 0
                next
              end
            end

            if @dry_run
              puts "#{post.full_url} #{orig_src}"
            else
              recover_post_upload(post, Upload.sha1_from_short_url(orig_src))
            end
          elsif url = (media["href"] || media["src"])
            data = Upload.extract_url(url)
            next unless data

            sha1 = data[2]

            unless upload = Upload.get_from_url(url)
              if @dry_run
                puts "#{post.full_url} #{url}"
              else
                recover_post_upload(post, sha1)
              end
            end
          end
        end
      rescue => e
        raise e unless @dry_run
        puts "#{post.full_url} #{e.class}: #{e.message}"
      end
    end
  end

  private

  def recover_post_upload(post, sha1)
    return unless valid_sha1?(sha1)

    attributes = {
      post: post,
      sha1: sha1
    }

    if Discourse.store.external?
      recover_post_upload_from_s3(attributes)
    else
      recover_post_upload_from_local(attributes)
    end
  end

  def recover_post_upload_from_local(post:, sha1:)
    recover_from_local(sha1: sha1, user_id: post.user_id) do |upload|
      post.rebake! if upload.persisted?
    end
  end

  def recover_post_upload_from_s3(post:, sha1:)
    recover_from_s3(sha1: sha1, user_id: post.user_id) do |upload|
      post.rebake! if upload.persisted?
    end
  end

  def recover_from_local(sha1:, user_id:)
    public_path = Rails.root.join("public")

    @paths ||= begin
      Dir.glob(File.join(
        public_path,
        'uploads',
        'tombstone',
        RailsMultisite::ConnectionManagement.current_db,
        'original',
        '**',
        '*.*'
      )).concat(Dir.glob(File.join(
        public_path,
        'uploads',
        RailsMultisite::ConnectionManagement.current_db,
        'original',
        '**',
        '*.*'
      )))
    end

    @paths.each do |path|
      if path =~ /#{sha1}/
        begin
          tmp = Tempfile.new
          tmp.write(File.read(path))
          tmp.rewind

          upload = create_upload(tmp, File.basename(path), user_id)
          yield upload if block_given?
        ensure
          tmp&.close
        end
      end
    end
  end

  def recover_from_s3(sha1:, user_id:)
    @object_keys ||= begin
      s3_helper = Discourse.store.s3_helper

      s3_helper.list("original").map(&:key).concat(
        s3_helper.list("#{FileStore::S3Store::TOMBSTONE_PREFIX}original").map(&:key)
      )
    end

    @object_keys.each do |key|
      if key =~ /#{sha1}/
        tombstone_prefix = FileStore::S3Store::TOMBSTONE_PREFIX

        if key.include?(tombstone_prefix)
          old_key = key
          key = key.sub(tombstone_prefix, "")

          Discourse.store.s3_helper.copy(
            old_key,
            key,
            options: { acl: "public-read" }
          )
        end

        url = "https:#{SiteSetting.Upload.absolute_base_url}/#{key}"

        begin
          tmp = FileHelper.download(
            url,
            max_file_size: SiteSetting.max_image_size_kb.kilobytes,
            tmp_file_name: "recover_from_s3"
          )

          if tmp
            upload = create_upload(tmp, File.basename(key), user_id)
            yield upload if block_given?
          end
        ensure
          tmp&.close
        end
      end
    end
  end

  def create_upload(file, filename, user_id)
    UploadCreator.new(file, filename).create_for(user_id)
  end

  def valid_sha1?(sha1)
    sha1.present? && sha1.length == Upload::SHA1_LENGTH
  end
end