diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d3ad7394285..ce246dc12e8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,5 +1,5 @@ name: CI - + on: push: branches: @@ -7,7 +7,7 @@ on: pull_request: branches-ignore: - 'tests-passed' - + jobs: build: name: "${{ matrix.target }}-${{ matrix.build_types }}" @@ -38,7 +38,7 @@ jobs: services: postgres: image: postgres:${{ matrix.postgres }} - ports: + ports: - 5432:5432 env: POSTGRES_USER: discourse @@ -88,14 +88,14 @@ jobs: key: ${{ runner.os }}-gem-${{ hashFiles('**/Gemfile.lock') }} restore-keys: | ${{ runner.os }}-gem- - + - name: Setup gems run: bundle install --without development --deployment --jobs 4 --retry 3 - name: Get yarn cache directory id: yarn-cache-dir run: echo "::set-output name=dir::$(yarn cache dir)" - + - name: Yarn cache uses: actions/cache@v1 id: yarn-cache @@ -113,7 +113,7 @@ jobs: run: bin/rake plugin:install_all_official - name: Create database - if: env.BUILD_TYPE != 'LINT' + if: env.BUILD_TYPE != 'LINT' run: bin/rake db:create && bin/rake db:migrate - name: Create parallel databases @@ -123,7 +123,7 @@ jobs: - name: Rubocop if: env.BUILD_TYPE == 'LINT' run: bundle exec rubocop . - + - name: ESLint if: env.BUILD_TYPE == 'LINT' run: yarn eslint app/assets/javascripts test/javascripts && yarn eslint --ext .es6 app/assets/javascripts test/javascripts plugins @@ -133,7 +133,7 @@ jobs: run: | yarn prettier -v yarn prettier --list-different "app/assets/stylesheets/**/*.scss" "app/assets/javascripts/**/*.es6" "test/javascripts/**/*.es6" "plugins/**/*.scss" "plugins/**/*.es6" - + - name: Core RSpec if: env.BUILD_TYPE == 'BACKEND' && env.TARGET == 'CORE' run: bin/turbo_rspec && bin/rake plugin:spec @@ -146,12 +146,12 @@ jobs: if: env.BUILD_TYPE == 'FRONTEND' && env.TARGET == 'CORE' run: bundle exec rake qunit:test['1200000'] timeout-minutes: 30 - + - name: Wizard QUnit if: env.BUILD_TYPE == 'FRONTEND' && env.TARGET == 'CORE' run: bundle exec rake qunit:test['1200000','/wizard/qunit'] timeout-minutes: 30 - + - name: Plugin QUnit # Tests core plugins in TARGET=CORE, and all plugins in TARGET=PLUGINS if: env.BUILD_TYPE == 'FRONTEND' run: bundle exec rake plugin:qunit diff --git a/.gitignore b/.gitignore index 5bc058976e7..59ad3c0b6f3 100644 --- a/.gitignore +++ b/.gitignore @@ -32,6 +32,7 @@ config/discourse.conf # Ignore the default SQLite database and db dumps *.sql *.sql.gz +!/spec/fixtures/**/*.sql /db/*.sqlite3 /db/structure.sql /db/schema.rb diff --git a/lib/backup_restore.rb b/lib/backup_restore.rb index 3b7e189d077..6259044dcb2 100644 --- a/lib/backup_restore.rb +++ b/lib/backup_restore.rb @@ -4,10 +4,8 @@ module BackupRestore class OperationRunningError < RuntimeError; end - VERSION_PREFIX = "v".freeze - DUMP_FILE = "dump.sql.gz".freeze - OLD_DUMP_FILE = "dump.sql".freeze - METADATA_FILE = "meta.json" + VERSION_PREFIX = "v" + DUMP_FILE = "dump.sql.gz" LOGS_CHANNEL = "/admin/backups/logs" def self.backup!(user_id, opts = {}) @@ -19,7 +17,16 @@ module BackupRestore end def self.restore!(user_id, opts = {}) - start! BackupRestore::Restorer.new(user_id, opts) + restorer = BackupRestore::Restorer.new( + user_id: user_id, + filename: opts[:filename], + factory: BackupRestore::Factory.new( + user_id: user_id, + client_id: opts[:client_id] + ) + ) + + start! restorer end def self.rollback! @@ -75,16 +82,18 @@ module BackupRestore end def self.move_tables_between_schemas(source, destination) - DB.exec(move_tables_between_schemas_sql(source, destination)) + ActiveRecord::Base.transaction do + DB.exec(move_tables_between_schemas_sql(source, destination)) + end end def self.move_tables_between_schemas_sql(source, destination) - <<-SQL + <<~SQL DO $$DECLARE row record; BEGIN -- create schema if it does not exists already -- NOTE: DROP & CREATE SCHEMA is easier, but we don't want to drop the public schema - -- ortherwise extensions (like hstore & pg_trgm) won't work anymore... + -- otherwise extensions (like hstore & pg_trgm) won't work anymore... CREATE SCHEMA IF NOT EXISTS #{destination}; -- move all tables to schema FOR row IN SELECT tablename FROM pg_tables WHERE schemaname = '#{source}' @@ -108,11 +117,17 @@ module BackupRestore config = ActiveRecord::Base.connection_pool.spec.config config = config.with_indifferent_access + # credentials for PostgreSQL in CI environment + if Rails.env.test? + username = ENV["PGUSER"] + password = ENV["PGPASSWORD"] + end + DatabaseConfiguration.new( config["backup_host"] || config["host"], config["backup_port"] || config["port"], - config["username"] || ENV["USER"] || "postgres", - config["password"], + config["username"] || username || ENV["USER"] || "postgres", + config["password"] || password, config["database"] ) end diff --git a/lib/backup_restore/backup_file_handler.rb b/lib/backup_restore/backup_file_handler.rb new file mode 100644 index 00000000000..c5e37024880 --- /dev/null +++ b/lib/backup_restore/backup_file_handler.rb @@ -0,0 +1,96 @@ +# frozen_string_literal: true + +module BackupRestore + class BackupFileHandler + OLD_DUMP_FILENAME = "dump.sql" + + delegate :log, to: :@logger, private: true + + def initialize(logger, filename, current_db, root_tmp_directory = Rails.root) + @logger = logger + @filename = filename + @current_db = current_db + @root_tmp_directory = root_tmp_directory + @is_archive = !(@filename =~ /\.sql\.gz$/) + end + + def decompress + create_tmp_directory + @archive_path = File.join(@tmp_directory, @filename) + + copy_archive_to_tmp_directory + decompress_archive + extract_db_dump + + [@tmp_directory, @db_dump_path] + end + + def clean_up + return if @tmp_directory.blank? + + log "Removing tmp '#{@tmp_directory}' directory..." + FileUtils.rm_rf(@tmp_directory) if Dir[@tmp_directory].present? + rescue => ex + log "Something went wrong while removing the following tmp directory: #{@tmp_directory}", ex + end + + protected + + def create_tmp_directory + timestamp = Time.zone.now.strftime("%Y-%m-%d-%H%M%S") + @tmp_directory = File.join(@root_tmp_directory, "tmp", "restores", @current_db, timestamp) + ensure_directory_exists(@tmp_directory) + end + + def ensure_directory_exists(directory) + log "Making sure #{directory} exists..." + FileUtils.mkdir_p(directory) + end + + def copy_archive_to_tmp_directory + store = BackupRestore::BackupStore.create + + if store.remote? + log "Downloading archive to tmp directory..." + failure_message = "Failed to download archive to tmp directory." + else + log "Copying archive to tmp directory..." + failure_message = "Failed to copy archive to tmp directory." + end + + store.download_file(@filename, @archive_path, failure_message) + end + + def decompress_archive + return if !@is_archive + + log "Unzipping archive, this may take a while..." + pipeline = Compression::Pipeline.new([Compression::Tar.new, Compression::Gzip.new]) + unzipped_path = pipeline.decompress(@tmp_directory, @archive_path, available_size) + pipeline.strip_directory(unzipped_path, @tmp_directory) + end + + def extract_db_dump + @db_dump_path = + if @is_archive + # for compatibility with backups from Discourse v1.5 and below + old_dump_path = File.join(@tmp_directory, OLD_DUMP_FILENAME) + File.exists?(old_dump_path) ? old_dump_path : File.join(@tmp_directory, BackupRestore::DUMP_FILE) + else + File.join(@tmp_directory, @filename) + end + + if File.extname(@db_dump_path) == '.gz' + log "Extracting dump file..." + Compression::Gzip.new.decompress(@tmp_directory, @db_dump_path, available_size) + @db_dump_path.delete_suffix!('.gz') + end + + @db_dump_path + end + + def available_size + SiteSetting.decompressed_backup_max_file_size_mb + end + end +end diff --git a/lib/backup_restore/backup_store.rb b/lib/backup_restore/backup_store.rb index 45da21d643d..2f95551dd55 100644 --- a/lib/backup_restore/backup_store.rb +++ b/lib/backup_restore/backup_store.rb @@ -3,8 +3,8 @@ module BackupRestore # @abstract class BackupStore - class BackupFileExists < RuntimeError; end - class StorageError < RuntimeError; end + BackupFileExists = Class.new(RuntimeError) + StorageError = Class.new(RuntimeError) # @return [BackupStore] def self.create(opts = {}) diff --git a/lib/backup_restore/database_restorer.rb b/lib/backup_restore/database_restorer.rb new file mode 100644 index 00000000000..033f8758d13 --- /dev/null +++ b/lib/backup_restore/database_restorer.rb @@ -0,0 +1,182 @@ +# frozen_string_literal: true + +module BackupRestore + DatabaseRestoreError = Class.new(RuntimeError) + + class DatabaseRestorer + delegate :log, to: :@logger, private: true + + MAIN_SCHEMA = "public" + BACKUP_SCHEMA = "backup" + + def initialize(logger, current_db) + @logger = logger + @db_was_changed = false + @current_db = current_db + end + + def restore(db_dump_path) + BackupRestore.move_tables_between_schemas(MAIN_SCHEMA, BACKUP_SCHEMA) + + @db_dump_path = db_dump_path + @db_was_changed = true + + create_missing_discourse_functions + restore_dump + migrate_database + reconnect_database + end + + def rollback + log "Trying to rollback..." + + if @db_was_changed && BackupRestore.can_rollback? + log "Rolling back..." + BackupRestore.move_tables_between_schemas(BACKUP_SCHEMA, MAIN_SCHEMA) + else + log "There was no need to rollback" + end + end + + def clean_up + drop_created_discourse_functions + end + + protected + + def restore_dump + log "Restoring dump file... (this may take a while)" + + logs = Queue.new + last_line = nil + psql_running = true + + log_thread = Thread.new do + RailsMultisite::ConnectionManagement::establish_connection(db: @current_db) + while psql_running + message = logs.pop.strip + log(message) if message.present? + end + end + + IO.popen(restore_dump_command) do |pipe| + begin + while line = pipe.readline + logs << line + last_line = line + end + rescue EOFError + # finished reading... + ensure + psql_running = false + end + end + + logs << "" + log_thread.join + + raise DatabaseRestoreError.new("psql failed: #{last_line}") if Process.last_status&.exitstatus != 0 + end + + # Removes unwanted SQL added by certain versions of pg_dump. + def sed_command + unwanted_sql = [ + "DROP SCHEMA", # Discourse <= v1.5 + "CREATE SCHEMA", # PostgreSQL 11+ + "COMMENT ON SCHEMA", # PostgreSQL 11+ + "SET default_table_access_method" # PostgreSQL 12 + ].join("|") + + "sed -E '/^(#{unwanted_sql})/d'" + end + + def restore_dump_command + "#{sed_command} #{@db_dump_path} | #{psql_command} 2>&1" + end + + def psql_command + db_conf = BackupRestore.database_configuration + + password_argument = "PGPASSWORD='#{db_conf.password}'" if db_conf.password.present? + host_argument = "--host=#{db_conf.host}" if db_conf.host.present? + port_argument = "--port=#{db_conf.port}" if db_conf.port.present? + username_argument = "--username=#{db_conf.username}" if db_conf.username.present? + + [ password_argument, # pass the password to psql (if any) + "psql", # the psql command + "--dbname='#{db_conf.database}'", # connect to database *dbname* + "--single-transaction", # all or nothing (also runs COPY commands faster) + "--variable=ON_ERROR_STOP=1", # stop on first error + host_argument, # the hostname to connect to (if any) + port_argument, # the port to connect to (if any) + username_argument # the username to connect as (if any) + ].compact.join(" ") + end + + def migrate_database + log "Migrating the database..." + + log Discourse::Utils.execute_command( + { "SKIP_POST_DEPLOYMENT_MIGRATIONS" => "0" }, + "rake db:migrate", + failure_message: "Failed to migrate database.", + chdir: Rails.root + ) + end + + def reconnect_database + log "Reconnecting to the database..." + RailsMultisite::ConnectionManagement::reload if RailsMultisite::ConnectionManagement::instance + RailsMultisite::ConnectionManagement::establish_connection(db: @current_db) + end + + def create_missing_discourse_functions + log "Creating missing functions in the discourse_functions schema..." + + @created_functions_for_table_columns = [] + all_readonly_table_columns = [] + + Dir[Rails.root.join(Migration::SafeMigrate.post_migration_path, "**/*.rb")].each do |path| + require path + class_name = File.basename(path, ".rb").sub(/^\d+_/, "").camelize + migration_class = class_name.constantize + + if migration_class.const_defined?(:DROPPED_TABLES) + migration_class::DROPPED_TABLES.each do |table_name| + all_readonly_table_columns << [table_name] + end + end + + if migration_class.const_defined?(:DROPPED_COLUMNS) + migration_class::DROPPED_COLUMNS.each do |table_name, column_names| + column_names.each do |column_name| + all_readonly_table_columns << [table_name, column_name] + end + end + end + end + + existing_function_names = Migration::BaseDropper.existing_discourse_function_names.map { |name| "#{name}()" } + + all_readonly_table_columns.each do |table_name, column_name| + function_name = Migration::BaseDropper.readonly_function_name(table_name, column_name, with_schema: false) + + if !existing_function_names.include?(function_name) + Migration::BaseDropper.create_readonly_function(table_name, column_name) + @created_functions_for_table_columns << [table_name, column_name] + end + end + end + + def drop_created_discourse_functions + return if @created_functions_for_table_columns.blank? + + log "Dropping functions from the discourse_functions schema..." + @created_functions_for_table_columns.each do |table_name, column_name| + Migration::BaseDropper.drop_readonly_function(table_name, column_name) + end + rescue => ex + log "Something went wrong while dropping functions from the discourse_functions schema", ex + end + end +end diff --git a/lib/backup_restore/factory.rb b/lib/backup_restore/factory.rb new file mode 100644 index 00000000000..5bc044b9242 --- /dev/null +++ b/lib/backup_restore/factory.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +module BackupRestore + class Factory + def initialize(user_id: nil, client_id: nil) + @user_id = user_id + @client_id = client_id + end + + def logger + @logger ||= Logger.new(user_id: @user_id, client_id: @client_id) + end + + def create_system_interface + SystemInterface.new(logger) + end + + def create_uploads_restorer + UploadsRestorer.new(logger) + end + + def create_database_restorer(current_db) + DatabaseRestorer.new(logger, current_db) + end + + def create_meta_data_handler(filename, tmp_directory) + MetaDataHandler.new(logger, filename, tmp_directory) + end + + def create_backup_file_handler(filename, current_db) + BackupFileHandler.new(logger, filename, current_db) + end + end +end diff --git a/lib/backup_restore/logger.rb b/lib/backup_restore/logger.rb new file mode 100644 index 00000000000..f33acbfd46d --- /dev/null +++ b/lib/backup_restore/logger.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +module BackupRestore + class Logger + attr_reader :logs + + def initialize(user_id: nil, client_id: nil) + @user_id = user_id + @client_id = client_id + @publish_to_message_bus = @user_id.present? && @client_id.present? + + @logs = [] + end + + def log(message, ex = nil) + return if Rails.env.test? + + timestamp = Time.now.strftime("%Y-%m-%d %H:%M:%S") + puts(message) + publish_log(message, timestamp) + save_log(message, timestamp) + Rails.logger.error("#{ex}\n" + ex.backtrace.join("\n")) if ex + end + + protected + + def publish_log(message, timestamp) + return unless @publish_to_message_bus + data = { timestamp: timestamp, operation: "restore", message: message } + MessageBus.publish(BackupRestore::LOGS_CHANNEL, data, user_ids: [@user_id], client_ids: [@client_id]) + end + + def save_log(message, timestamp) + @logs << "[#{timestamp}] #{message}" + end + end +end diff --git a/lib/backup_restore/meta_data_handler.rb b/lib/backup_restore/meta_data_handler.rb new file mode 100644 index 00000000000..e61809a5642 --- /dev/null +++ b/lib/backup_restore/meta_data_handler.rb @@ -0,0 +1,60 @@ +# frozen_string_literal: true + +module BackupRestore + MetaDataError = Class.new(RuntimeError) + MigrationRequiredError = Class.new(RuntimeError) + + class MetaDataHandler + METADATA_FILE = "meta.json" + + delegate :log, to: :@logger, private: true + + def initialize(logger, filename, tmp_directory) + @logger = logger + @current_version = BackupRestore.current_version + @filename = filename + @tmp_directory = tmp_directory + end + + def validate + metadata = extract_metadata + + log "Validating metadata..." + log " Current version: #{@current_version}" + log " Restored version: #{metadata[:version]}" + + if metadata[:version] > @current_version + raise MigrationRequiredError.new("You're trying to restore a more recent version of the schema. " \ + "You should migrate first!") + end + + metadata + end + + protected + + # Tries to extract the backup version from an existing + # metadata file (used in Discourse < v1.6) or from the filename. + def extract_metadata + metadata_path = File.join(@tmp_directory, METADATA_FILE) if @tmp_directory.present? + + if metadata_path.present? && File.exists?(metadata_path) + metadata = load_metadata_file(metadata_path) + elsif @filename =~ /-#{BackupRestore::VERSION_PREFIX}(\d{14})/ + metadata = { version: Regexp.last_match[1].to_i } + else + raise MetaDataError.new("Migration version is missing from the filename.") + end + + metadata + end + + def load_metadata_file(path) + metadata = Oj.load_file(path, symbol_keys: true) + raise MetaDataError.new("Failed to load metadata file.") if metadata.blank? + metadata + rescue Oj::ParseError + raise MetaDataError.new("Failed to parse metadata file.") + end + end +end diff --git a/lib/backup_restore/restorer.rb b/lib/backup_restore/restorer.rb index 6d1e1f0c179..8068e0aab7c 100644 --- a/lib/backup_restore/restorer.rb +++ b/lib/backup_restore/restorer.rb @@ -1,107 +1,74 @@ # frozen_string_literal: true -require_dependency "db_helper" - module BackupRestore - - class RestoreDisabledError < RuntimeError; end - class FilenameMissingError < RuntimeError; end + RestoreDisabledError = Class.new(RuntimeError) + FilenameMissingError = Class.new(RuntimeError) class Restorer + delegate :log, to: :@logger, private: true + attr_reader :success - def self.pg_produces_portable_dump?(version) - # anything pg 11 or above will produce a non-portable dump - return false if version.to_i >= 11 - - # below 11, the behaviour was changed in multiple different minor - # versions depending on major release line - we list those versions below - gem_version = Gem::Version.new(version) - - %w{ - 10.3 - 9.6.8 - 9.5.12 - 9.4.17 - 9.3.22 - }.each do |unportable_version| - return false if Gem::Dependency.new("", "~> #{unportable_version}").match?("", gem_version) - end - - true - end - - def initialize(user_id, opts = {}) + def initialize(user_id:, filename:, factory:, disable_emails: true) @user_id = user_id - @client_id = opts[:client_id] - @filename = opts[:filename] - @publish_to_message_bus = opts[:publish_to_message_bus] || false - @disable_emails = opts.fetch(:disable_emails, true) + @filename = filename + @factory = factory + @logger = factory.logger + @disable_emails = disable_emails ensure_restore_is_enabled - ensure_no_operation_is_running ensure_we_have_a_user ensure_we_have_a_filename - initialize_state + @success = false + @current_db = RailsMultisite::ConnectionManagement.current_db + + @system = factory.create_system_interface + @backup_file_handler = factory.create_backup_file_handler(@filename, @current_db) + @database_restorer = factory.create_database_restorer(@current_db) + @uploads_restorer = factory.create_uploads_restorer end def run log "[STARTED]" log "'#{@user_info[:username]}' has started the restore!" - mark_restore_as_running + # FIXME not atomic! + ensure_no_operation_is_running + @system.mark_restore_as_running - listen_for_shutdown_signal + @system.listen_for_shutdown_signal - ensure_directory_exists(@tmp_directory) + @tmp_directory, db_dump_path = @backup_file_handler.decompress + validate_backup_metadata - copy_archive_to_tmp_directory - decompress_archive + @system.enable_readonly_mode + @system.pause_sidekiq + @system.wait_for_sidekiq - extract_metadata - validate_metadata + @database_restorer.restore(db_dump_path) - extract_dump - create_missing_discourse_functions - - if !can_restore_into_different_schema? - log "Cannot restore into different schema, restoring in-place" - enable_readonly_mode - pause_sidekiq - wait_for_sidekiq - BackupRestore.move_tables_between_schemas("public", "backup") - @db_was_changed = true - restore_dump - else - log "Restoring into 'backup' schema" - restore_dump - enable_readonly_mode - pause_sidekiq - wait_for_sidekiq - switch_schema! - end - - migrate_database - reconnect_database reload_site_settings + + @system.disable_readonly_mode + clear_emoji_cache - disable_readonly_mode clear_theme_cache - extract_uploads + @uploads_restorer.restore(@tmp_directory) after_restore_hook rescue Compression::Strategy::ExtractFailed - log "The uncompressed file is too big. Consider increasing the decompressed_theme_max_file_size_mb hidden setting." - rollback + log 'ERROR: The uncompressed file is too big. Consider increasing the hidden ' \ + '"decompressed_backup_max_file_size_mb" setting.' + @database_restorer.rollback rescue SystemExit log "Restore process was cancelled!" - rollback + @database_restorer.rollback rescue => ex log "EXCEPTION: " + ex.message log ex.backtrace.join("\n") - rollback + @database_restorer.rollback else @success = true ensure @@ -112,78 +79,11 @@ module BackupRestore @success ? log("[SUCCESS]") : log("[FAILED]") end - ### The methods listed below are public just for testing purposes. - ### This is not a good practice, but we need to be sure that our new compression API will work. - - attr_reader :tmp_directory - - def ensure_directory_exists(directory) - log "Making sure #{directory} exists..." - FileUtils.mkdir_p(directory) - end - - def copy_archive_to_tmp_directory - if @store.remote? - log "Downloading archive to tmp directory..." - failure_message = "Failed to download archive to tmp directory." - else - log "Copying archive to tmp directory..." - failure_message = "Failed to copy archive to tmp directory." - end - - @store.download_file(@filename, @archive_filename, failure_message) - end - - def decompress_archive - return unless @is_archive - - log "Unzipping archive, this may take a while..." - - pipeline = Compression::Pipeline.new([Compression::Tar.new, Compression::Gzip.new]) - - unzipped_path = pipeline.decompress(@tmp_directory, @archive_filename, available_size) - pipeline.strip_directory(unzipped_path, @tmp_directory) - end - - def extract_metadata - metadata_path = File.join(@tmp_directory, BackupRestore::METADATA_FILE) - @metadata = if File.exists?(metadata_path) - data = Oj.load_file(@meta_filename) - raise "Failed to load metadata file." if !data - data - else - log "No metadata file to extract." - if @filename =~ /-#{BackupRestore::VERSION_PREFIX}(\d{14})/ - { "version" => Regexp.last_match[1].to_i } - else - raise "Migration version is missing from the filename." - end - end - end - - def extract_dump - @dump_filename = - if @is_archive - # For backwards compatibility - old_dump_path = File.join(@tmp_directory, BackupRestore::OLD_DUMP_FILE) - File.exists?(old_dump_path) ? old_dump_path : File.join(@tmp_directory, BackupRestore::DUMP_FILE) - else - File.join(@tmp_directory, @filename) - end - - log "Extracting dump file..." - - Compression::Gzip.new.decompress(@tmp_directory, @dump_filename, available_size) - end - protected - def available_size - SiteSetting.decompressed_backup_max_file_size_mb - end - def ensure_restore_is_enabled - raise BackupRestore::RestoreDisabledError unless Rails.env.development? || SiteSetting.allow_restore? + return if Rails.env.development? || SiteSetting.allow_restore? + raise BackupRestore::RestoreDisabledError end def ensure_no_operation_is_running @@ -192,7 +92,8 @@ module BackupRestore def ensure_we_have_a_user user = User.find_by(id: @user_id) - raise Discourse::InvalidParameters.new(:user_id) unless user + raise Discourse::InvalidParameters.new(:user_id) if user.blank? + # keep some user data around to check them against the newly restored database @user_info = { id: user.id, username: user.username, email: user.email } end @@ -201,211 +102,8 @@ module BackupRestore raise BackupRestore::FilenameMissingError if @filename.nil? end - def initialize_state - @success = false - @store = BackupRestore::BackupStore.create - @db_was_changed = false - @current_db = RailsMultisite::ConnectionManagement.current_db - @current_version = BackupRestore.current_version - @timestamp = Time.now.strftime("%Y-%m-%d-%H%M%S") - @tmp_directory = File.join(Rails.root, "tmp", "restores", @current_db, @timestamp) - @archive_filename = File.join(@tmp_directory, @filename) - @tar_filename = @archive_filename[0...-3] - @is_archive = !(@filename =~ /.sql.gz$/) - - @logs = [] - @readonly_mode_was_enabled = Discourse.readonly_mode? - @created_functions_for_table_columns = [] - end - - def listen_for_shutdown_signal - Thread.new do - while BackupRestore.is_operation_running? - exit if BackupRestore.should_shutdown? - sleep 0.1 - end - end - end - - def mark_restore_as_running - log "Marking restore as running..." - BackupRestore.mark_as_running! - end - - def enable_readonly_mode - return if @readonly_mode_was_enabled - log "Enabling readonly mode..." - Discourse.enable_readonly_mode - end - - def pause_sidekiq - log "Pausing sidekiq..." - Sidekiq.pause! - end - - def wait_for_sidekiq - log "Waiting for sidekiq to finish running jobs..." - iterations = 1 - while sidekiq_has_running_jobs? - log "Waiting for sidekiq to finish running jobs... ##{iterations}" - sleep 5 - iterations += 1 - raise "Sidekiq did not finish running all the jobs in the allowed time!" if iterations > 6 - end - end - - def sidekiq_has_running_jobs? - Sidekiq::Workers.new.each do |_, _, worker| - payload = worker.try(:payload) - return true if payload.try(:all_sites) - return true if payload.try(:current_site_id) == @current_db - end - - false - end - - def validate_metadata - log "Validating metadata..." - log " Current version: #{@current_version}" - - raise "Metadata has not been extracted correctly." if !@metadata - - log " Restored version: #{@metadata["version"]}" - - error = "You're trying to restore a more recent version of the schema. You should migrate first!" - raise error if @metadata["version"] > @current_version - end - - def get_dumped_by_version - output = Discourse::Utils.execute_command( - File.extname(@dump_filename) == '.gz' ? 'zgrep' : 'grep', - '-m1', @dump_filename, '-e', "-- Dumped by pg_dump version", - failure_message: "Failed to check version of pg_dump used to generate the dump file" - ) - - output.match(/version (\d+(\.\d+)+)/)[1] - end - - def can_restore_into_different_schema? - self.class.pg_produces_portable_dump?(get_dumped_by_version) - end - - def restore_dump_command - if File.extname(@dump_filename) == '.gz' - "#{sed_command} #{@dump_filename.gsub('.gz', '')} | #{psql_command} 2>&1" - else - "#{psql_command} 2>&1 < #{@dump_filename}" - end - end - - def restore_dump - log "Restoring dump file... (can be quite long)" - - logs = Queue.new - psql_running = true - has_error = false - - Thread.new do - RailsMultisite::ConnectionManagement::establish_connection(db: @current_db) - while psql_running - message = logs.pop.strip - has_error ||= (message =~ /ERROR:/) - log(message) unless message.blank? - end - end - - IO.popen(restore_dump_command) do |pipe| - begin - while line = pipe.readline - logs << line - end - rescue EOFError - # finished reading... - ensure - psql_running = false - logs << "" - end - end - - # psql does not return a valid exit code when an error happens - raise "psql failed" if has_error - end - - def psql_command - db_conf = BackupRestore.database_configuration - - password_argument = "PGPASSWORD='#{db_conf.password}'" if db_conf.password.present? - host_argument = "--host=#{db_conf.host}" if db_conf.host.present? - port_argument = "--port=#{db_conf.port}" if db_conf.port.present? - username_argument = "--username=#{db_conf.username}" if db_conf.username.present? - - [ password_argument, # pass the password to psql (if any) - "psql", # the psql command - "--dbname='#{db_conf.database}'", # connect to database *dbname* - "--single-transaction", # all or nothing (also runs COPY commands faster) - host_argument, # the hostname to connect to (if any) - port_argument, # the port to connect to (if any) - username_argument # the username to connect as (if any) - ].join(" ") - end - - def sed_command - # in order to limit the downtime when restoring as much as possible - # we force the restoration to happen in the "restore" schema - - # during the restoration, this make sure we - # - drop the "restore" schema if it exists - # - create the "restore" schema - # - prepend the "restore" schema into the search_path - - regexp = "SET search_path = public, pg_catalog;" - - replacement = [ "DROP SCHEMA IF EXISTS restore CASCADE;", - "CREATE SCHEMA restore;", - "SET search_path = restore, public, pg_catalog;", - ].join(" ") - - # we only want to replace the VERY first occurence of the search_path command - expression = "1,/^#{regexp}$/s/#{regexp}/#{replacement}/" - - "sed -e '#{expression}'" - end - - def switch_schema! - log "Switching schemas... try reloading the site in 5 minutes, if successful, then reboot and restore is complete." - - sql = [ - "BEGIN;", - BackupRestore.move_tables_between_schemas_sql("public", "backup"), - BackupRestore.move_tables_between_schemas_sql("restore", "public"), - "COMMIT;" - ].join("\n") - - @db_was_changed = true - - DB.exec(sql) - end - - def migrate_database - log "Migrating the database..." - - if Discourse.skip_post_deployment_migrations? - ENV["SKIP_POST_DEPLOYMENT_MIGRATIONS"] = "0" - Rails.application.config.paths['db/migrate'] << Rails.root.join( - Discourse::DB_POST_MIGRATE_PATH - ).to_s - end - - Discourse::Application.load_tasks - ENV["VERSION"] = @current_version.to_s - DB.exec("SET search_path = public, pg_catalog;") - Rake::Task["db:migrate"].invoke - end - - def reconnect_database - log "Reconnecting to the database..." - RailsMultisite::ConnectionManagement::reload if RailsMultisite::ConnectionManagement::instance - RailsMultisite::ConnectionManagement::establish_connection(db: @current_db) + def validate_backup_metadata + @factory.create_meta_data_handler(@filename, @tmp_directory).validate end def reload_site_settings @@ -426,201 +124,30 @@ module BackupRestore Emoji.clear_cache end - def extract_uploads - return unless File.exists?(File.join(@tmp_directory, 'uploads')) - log "Extracting uploads..." - - public_uploads_path = File.join(Rails.root, "public") - upload_path = Discourse.store.upload_path - - FileUtils.mkdir_p(File.join(public_uploads_path, "uploads")) - - tmp_uploads_path = Dir.glob(File.join(@tmp_directory, "uploads", "*")).first - return if tmp_uploads_path.blank? - previous_db_name = BackupMetadata.value_for("db_name") || File.basename(tmp_uploads_path) - optimized_images_exist = File.exist?(File.join(tmp_uploads_path, 'optimized')) - - Discourse::Utils.execute_command( - 'rsync', '-avp', '--safe-links', "#{tmp_uploads_path}/", "#{upload_path}/", - failure_message: "Failed to restore uploads.", - chdir: public_uploads_path - ) - - remap_uploads(previous_db_name, upload_path) - - if SiteSetting.Upload.enable_s3_uploads - migrate_to_s3 - remove_local_uploads(File.join(public_uploads_path, upload_path)) - end - - generate_optimized_images unless optimized_images_exist - end - - def remap_uploads(previous_db_name, upload_path) - log "Remapping uploads..." - - was_multisite = BackupMetadata.value_for("multisite") == "t" - uploads_folder = was_multisite ? "/" : "/#{upload_path}/" - - if (old_base_url = BackupMetadata.value_for("base_url")) && old_base_url != Discourse.base_url - remap(old_base_url, Discourse.base_url) - end - - current_s3_base_url = SiteSetting.Upload.enable_s3_uploads ? SiteSetting.Upload.s3_base_url : nil - if (old_s3_base_url = BackupMetadata.value_for("s3_base_url")) && old_base_url != current_s3_base_url - remap("#{old_s3_base_url}/", uploads_folder) - end - - current_s3_cdn_url = SiteSetting.Upload.enable_s3_uploads ? SiteSetting.Upload.s3_cdn_url : nil - if (old_s3_cdn_url = BackupMetadata.value_for("s3_cdn_url")) && old_s3_cdn_url != current_s3_cdn_url - base_url = SiteSetting.Upload.enable_s3_uploads ? SiteSetting.Upload.s3_cdn_url : Discourse.base_url - remap("#{old_s3_cdn_url}/", UrlHelper.schemaless("#{base_url}#{uploads_folder}")) - - old_host = URI.parse(old_s3_cdn_url).host - new_host = URI.parse(base_url).host - remap(old_host, new_host) - end - - if (old_cdn_url = BackupMetadata.value_for("cdn_url")) && old_cdn_url != Discourse.asset_host - base_url = Discourse.asset_host || Discourse.base_url - remap("#{old_cdn_url}/", UrlHelper.schemaless("#{base_url}/")) - - old_host = URI.parse(old_cdn_url).host - new_host = URI.parse(base_url).host - remap(old_host, new_host) - end - - current_db_name = RailsMultisite::ConnectionManagement.current_db - if previous_db_name != current_db_name - remap("uploads/#{previous_db_name}", upload_path) - end - - rescue => ex - log "Something went wrong while remapping uploads.", ex - end - - def remap(from, to) - puts "Remapping '#{from}' to '#{to}'" - DbHelper.remap(from, to, verbose: true, excluded_tables: ["backup_metadata"]) - end - - def migrate_to_s3 - log "Migrating uploads to S3..." - ENV["SKIP_FAILED"] = "1" - ENV["MIGRATE_TO_MULTISITE"] = "1" if Rails.configuration.multisite - Rake::Task["uploads:migrate_to_s3"].invoke - Jobs.run_later! - end - - def remove_local_uploads(directory) - log "Removing local uploads directory..." - FileUtils.rm_rf(directory) if Dir[directory].present? - rescue => ex - log "Something went wrong while removing the following uploads directory: #{directory}", ex - end - - def generate_optimized_images - log 'Optimizing site icons...' - DB.exec("TRUNCATE TABLE optimized_images") - SiteIconManager.ensure_optimized! - - log 'Posts will be rebaked by a background job in sidekiq. You will see missing images until that has completed.' - log 'You can expedite the process by manually running "rake posts:rebake_uncooked_posts"' - - DB.exec(<<~SQL) - UPDATE posts - SET baked_version = NULL - WHERE id IN (SELECT post_id FROM post_uploads) - SQL - - User.where("uploaded_avatar_id IS NOT NULL").find_each do |user| - Jobs.enqueue(:create_avatar_thumbnails, upload_id: user.uploaded_avatar_id) - end - end - - def rollback - log "Trying to rollback..." - if @db_was_changed && BackupRestore.can_rollback? - log "Rolling back..." - BackupRestore.move_tables_between_schemas("backup", "public") - else - log "There was no need to rollback" - end - end - def notify_user if user = User.find_by_email(@user_info[:email]) log "Notifying '#{user.username}' of the end of the restore..." status = @success ? :restore_succeeded : :restore_failed - SystemMessage.create_from_system_user(user, status, + SystemMessage.create_from_system_user( + user, status, logs: Discourse::Utils.pretty_logs(@logs) ) else - log "Could not send notification to '#{@user_info[:username]}' (#{@user_info[:email]}), because the user does not exists..." + log "Could not send notification to '#{@user_info[:username]}' " \ + "(#{@user_info[:email]}), because the user does not exist." end rescue => ex log "Something went wrong while notifying user.", ex end - def create_missing_discourse_functions - log "Creating missing functions in the discourse_functions schema" - - all_readonly_table_columns = [] - - Dir[Rails.root.join(Discourse::DB_POST_MIGRATE_PATH, "*.rb")].each do |path| - require path - class_name = File.basename(path, ".rb").sub(/^\d+_/, "").camelize - migration_class = class_name.constantize - - if migration_class.const_defined?(:DROPPED_TABLES) - migration_class::DROPPED_TABLES.each do |table_name| - all_readonly_table_columns << [table_name] - end - end - - if migration_class.const_defined?(:DROPPED_COLUMNS) - migration_class::DROPPED_COLUMNS.each do |table_name, column_names| - column_names.each do |column_name| - all_readonly_table_columns << [table_name, column_name] - end - end - end - end - - existing_function_names = Migration::BaseDropper.existing_discourse_function_names.map { |name| "#{name}()" } - - all_readonly_table_columns.each do |table_name, column_name| - function_name = Migration::BaseDropper.readonly_function_name(table_name, column_name, with_schema: false) - - if !existing_function_names.include?(function_name) - Migration::BaseDropper.create_readonly_function(table_name, column_name) - @created_functions_for_table_columns << [table_name, column_name] - end - end - end - def clean_up log "Cleaning stuff up..." - drop_created_discourse_functions - remove_tmp_directory - unpause_sidekiq - disable_readonly_mode if Discourse.readonly_mode? - mark_restore_as_not_running - end - - def remove_tmp_directory - log "Removing tmp '#{@tmp_directory}' directory..." - FileUtils.rm_rf(@tmp_directory) if Dir[@tmp_directory].present? - rescue => ex - log "Something went wrong while removing the following tmp directory: #{@tmp_directory}", ex - end - - def unpause_sidekiq - log "Unpausing sidekiq..." - Sidekiq.unpause! - rescue => ex - log "Something went wrong while unpausing Sidekiq.", ex + @database_restorer.clean_up + @backup_file_handler.clean_up + @system.unpause_sidekiq + @system.disable_readonly_mode if Discourse.readonly_mode? + @system.mark_restore_as_not_running end def clear_theme_cache @@ -630,54 +157,9 @@ module BackupRestore Stylesheet::Manager.cache.clear end - def drop_created_discourse_functions - log "Dropping function from the discourse_functions schema" - @created_functions_for_table_columns.each do |table_name, column_name| - Migration::BaseDropper.drop_readonly_function(table_name, column_name) - end - rescue => ex - log "Something went wrong while dropping functions from the discourse_functions schema", ex - end - - def disable_readonly_mode - return if @readonly_mode_was_enabled - log "Disabling readonly mode..." - Discourse.disable_readonly_mode - rescue => ex - log "Something went wrong while disabling readonly mode.", ex - end - - def mark_restore_as_not_running - log "Marking restore as finished..." - BackupRestore.mark_as_not_running! - rescue => ex - log "Something went wrong while marking restore as finished.", ex - end - def after_restore_hook log "Executing the after_restore_hook..." DiscourseEvent.trigger(:restore_complete) end - - def log(message, ex = nil) - return if Rails.env.test? - - timestamp = Time.now.strftime("%Y-%m-%d %H:%M:%S") - puts(message) - publish_log(message, timestamp) - save_log(message, timestamp) - Rails.logger.error("#{ex}\n" + ex.backtrace.join("\n")) if ex - end - - def publish_log(message, timestamp) - return unless @publish_to_message_bus - data = { timestamp: timestamp, operation: "restore", message: message } - MessageBus.publish(BackupRestore::LOGS_CHANNEL, data, user_ids: [@user_id], client_ids: [@client_id]) - end - - def save_log(message, timestamp) - @logs << "[#{timestamp}] #{message}" - end - end end diff --git a/lib/backup_restore/system_interface.rb b/lib/backup_restore/system_interface.rb new file mode 100644 index 00000000000..2ed95137bc8 --- /dev/null +++ b/lib/backup_restore/system_interface.rb @@ -0,0 +1,102 @@ +# frozen_string_literal: true + +module BackupRestore + class RunningSidekiqJobsError < RuntimeError + def initialize + super("Sidekiq did not finish running all the jobs in the allowed time!") + end + end + + class SystemInterface + delegate :log, to: :@logger, private: true + + def initialize(logger) + @logger = logger + + @current_db = RailsMultisite::ConnectionManagement.current_db + @readonly_mode_was_enabled = Discourse.readonly_mode? + end + + def enable_readonly_mode + return if @readonly_mode_was_enabled + log "Enabling readonly mode..." + Discourse.enable_readonly_mode + end + + def disable_readonly_mode + return if @readonly_mode_was_enabled + log "Disabling readonly mode..." + Discourse.disable_readonly_mode + rescue => ex + log "Something went wrong while disabling readonly mode.", ex + end + + def mark_restore_as_running + log "Marking restore as running..." + BackupRestore.mark_as_running! + end + + def mark_restore_as_not_running + log "Marking restore as finished..." + BackupRestore.mark_as_not_running! + rescue => ex + log "Something went wrong while marking restore as finished.", ex + end + + def listen_for_shutdown_signal + Thread.new do + while BackupRestore.is_operation_running? + exit if BackupRestore.should_shutdown? + sleep 0.1 + end + end + end + + def pause_sidekiq + log "Pausing sidekiq..." + Sidekiq.pause! + end + + def unpause_sidekiq + log "Unpausing sidekiq..." + Sidekiq.unpause! + rescue => ex + log "Something went wrong while unpausing Sidekiq.", ex + end + + def wait_for_sidekiq + # Wait at least 6 seconds because the data about workers is updated every 5 seconds + # https://github.com/mperham/sidekiq/wiki/API#workers + max_wait_seconds = 60 + wait_seconds = 6.0 + + log "Waiting up to #{max_wait_seconds} seconds for Sidekiq to finish running jobs..." + + max_iterations = (max_wait_seconds / wait_seconds).ceil + iterations = 1 + + loop do + sleep wait_seconds + break if !sidekiq_has_running_jobs? + + iterations += 1 + raise RunningSidekiqJobsError.new if iterations > max_iterations + + log "Waiting for sidekiq to finish running jobs... ##{iterations}" + end + end + + protected + + def sidekiq_has_running_jobs? + Sidekiq::Workers.new.each do |_, _, work| + args = work&.dig("payload", "args")&.first + current_site_id = args["current_site_id"] if args.present? + + return true if current_site_id.blank? || current_site_id == @current_db + end + + false + end + end +end diff --git a/lib/backup_restore/uploads_restorer.rb b/lib/backup_restore/uploads_restorer.rb new file mode 100644 index 00000000000..ae986bc1648 --- /dev/null +++ b/lib/backup_restore/uploads_restorer.rb @@ -0,0 +1,136 @@ +# frozen_string_literal: true + +module BackupRestore + UploadsRestoreError = Class.new(RuntimeError) + + class UploadsRestorer + delegate :log, to: :@logger, private: true + + def initialize(logger) + @logger = logger + end + + def restore(tmp_directory) + upload_directories = Dir.glob(File.join(tmp_directory, "uploads", "*")) + .reject { |path| File.basename(path).start_with?("PaxHeaders") } + + if upload_directories.count > 1 + raise UploadsRestoreError.new("Could not find uploads, because the uploads " \ + "directory contains multiple folders.") + end + + @tmp_uploads_path = upload_directories.first + return if @tmp_uploads_path.blank? + + @previous_db_name = BackupMetadata.value_for("db_name") || File.basename(@tmp_uploads_path) + @current_db_name = RailsMultisite::ConnectionManagement.current_db + backup_contains_optimized_images = File.exist?(File.join(@tmp_uploads_path, "optimized")) + + remap_uploads + restore_uploads + + generate_optimized_images unless backup_contains_optimized_images + rebake_posts_with_uploads + end + + protected + + def restore_uploads + store = Discourse.store + + if !store.respond_to?(:copy_from) + # a FileStore implementation from a plugin might not support this method, so raise a helpful error + store_name = Discourse.store.class.name + raise UploadsRestoreError.new("The current file store (#{store_name}) does not support restoring uploads.") + end + + log "Restoring uploads, this may take a while..." + store.copy_from(@tmp_uploads_path) + end + + # Remaps upload URLs depending on old and new configuration. + # URLs of uploads differ a little bit between local uploads and uploads stored on S3. + # Multisites are another reason why URLs can be different. + # + # Examples: + # * regular site, local storage + # /uploads/default/original/1X/63b76551662ccea1a594e161c37dd35188d77657.jpeg + # + # * regular site, S3 + # //bucket-name.s3.dualstack.us-west-2.amazonaws.com/original/1X/63b76551662ccea1a594e161c37dd35188d77657.jpeg + # + # * multisite, local storage + # /uploads//original/1X/63b76551662ccea1a594e161c37dd35188d77657.jpeg + # + # * multisite, S3 + # //bucket-name.s3.dualstack.us-west-2.amazonaws.com/uploads//original/1X/63b76551662ccea1a594e161c37dd35188d77657.jpeg + def remap_uploads + log "Remapping uploads..." + + was_multisite = BackupMetadata.value_for("multisite") == "t" + upload_path = "/#{Discourse.store.upload_path}/" + uploads_folder = was_multisite ? "/" : upload_path + + if (old_base_url = BackupMetadata.value_for("base_url")) && old_base_url != Discourse.base_url + remap(old_base_url, Discourse.base_url) + end + + current_s3_base_url = SiteSetting::Upload.enable_s3_uploads ? SiteSetting::Upload.s3_base_url : nil + if (old_s3_base_url = BackupMetadata.value_for("s3_base_url")) && old_s3_base_url != current_s3_base_url + remap("#{old_s3_base_url}/", uploads_folder) + end + + current_s3_cdn_url = SiteSetting::Upload.enable_s3_uploads ? SiteSetting::Upload.s3_cdn_url : nil + if (old_s3_cdn_url = BackupMetadata.value_for("s3_cdn_url")) && old_s3_cdn_url != current_s3_cdn_url + base_url = current_s3_cdn_url || Discourse.base_url + remap("#{old_s3_cdn_url}/", UrlHelper.schemaless("#{base_url}#{uploads_folder}")) + + old_host = URI.parse(old_s3_cdn_url).host + new_host = URI.parse(base_url).host + remap(old_host, new_host) if old_host != new_host + end + + if (old_cdn_url = BackupMetadata.value_for("cdn_url")) && old_cdn_url != Discourse.asset_host + base_url = Discourse.asset_host || Discourse.base_url + remap("#{old_cdn_url}/", UrlHelper.schemaless("#{base_url}/")) + + old_host = URI.parse(old_cdn_url).host + new_host = URI.parse(base_url).host + remap(old_host, new_host) if old_host != new_host + end + + if @previous_db_name != @current_db_name + remap("/uploads/#{@previous_db_name}/", upload_path) + end + + rescue => ex + log "Something went wrong while remapping uploads.", ex + end + + def remap(from, to) + log "Remapping '#{from}' to '#{to}'" + DbHelper.remap(from, to, verbose: true, excluded_tables: ["backup_metadata"]) + end + + def generate_optimized_images + log "Optimizing site icons..." + DB.exec("TRUNCATE TABLE optimized_images") + SiteIconManager.ensure_optimized! + + User.where("uploaded_avatar_id IS NOT NULL").find_each do |user| + Jobs.enqueue(:create_avatar_thumbnails, upload_id: user.uploaded_avatar_id) + end + end + + def rebake_posts_with_uploads + log 'Posts will be rebaked by a background job in sidekiq. You will see missing images until that has completed.' + log 'You can expedite the process by manually running "rake posts:rebake_uncooked_posts"' + + DB.exec(<<~SQL) + UPDATE posts + SET baked_version = NULL + WHERE id IN (SELECT post_id FROM post_uploads) + SQL + end + end +end diff --git a/lib/file_store/local_store.rb b/lib/file_store/local_store.rb index 6c3d3ca99cd..526b82f41b8 100644 --- a/lib/file_store/local_store.rb +++ b/lib/file_store/local_store.rb @@ -100,6 +100,16 @@ module FileStore list_missing(OptimizedImage) unless skip_optimized end + def copy_from(source_path) + FileUtils.mkdir_p(File.join(public_dir, upload_path)) + + Discourse::Utils.execute_command( + 'rsync', '-a', '--safe-links', "#{source_path}/", "#{upload_path}/", + failure_message: "Failed to copy uploads.", + chdir: public_dir + ) + end + private def list_missing(model) diff --git a/lib/file_store/s3_store.rb b/lib/file_store/s3_store.rb index bde900f5c18..04895003aff 100644 --- a/lib/file_store/s3_store.rb +++ b/lib/file_store/s3_store.rb @@ -174,6 +174,32 @@ module FileStore @s3_helper.download_file(get_upload_key(upload), destination_path) end + def copy_from(source_path) + local_store = FileStore::LocalStore.new + public_upload_path = File.join(local_store.public_dir, local_store.upload_path) + + # The migration to S3 and lots of other code expects files to exist in public/uploads, + # so lets move them there before executing the migration. + if public_upload_path != source_path + if Dir.exist?(public_upload_path) + old_upload_path = "#{public_upload_path}_#{SecureRandom.hex}" + FileUtils.mv(public_upload_path, old_upload_path) + end + end + + FileUtils.mkdir_p(File.expand_path("..", public_upload_path)) + FileUtils.symlink(source_path, public_upload_path) + + FileStore::ToS3Migration.new( + s3_options: FileStore::ToS3Migration.s3_options_from_env, + migrate_to_multisite: Rails.configuration.multisite, + ).migrate + + ensure + FileUtils.rm(public_upload_path) if File.symlink?(public_upload_path) + FileUtils.mv(old_upload_path, public_upload_path) if old_upload_path + end + private def presigned_url(url, force_download: false, filename: false) diff --git a/lib/file_store/to_s3_migration.rb b/lib/file_store/to_s3_migration.rb new file mode 100644 index 00000000000..460a0112098 --- /dev/null +++ b/lib/file_store/to_s3_migration.rb @@ -0,0 +1,346 @@ +# frozen_string_literal: true + +module FileStore + ToS3MigrationError = Class.new(RuntimeError) + + class ToS3Migration + def initialize(s3_options:, dry_run: false, migrate_to_multisite: false, skip_etag_verify: false) + + @s3_bucket = s3_options[:bucket] + @s3_client_options = s3_options[:client_options] + @dry_run = dry_run + @migrate_to_multisite = migrate_to_multisite + @skip_etag_verify = skip_etag_verify + @current_db = RailsMultisite::ConnectionManagement.current_db + end + + def self.s3_options_from_site_settings + { + client_options: S3Helper.s3_options(SiteSetting), + bucket: SiteSetting.s3_upload_bucket + } + end + + def self.s3_options_from_env + unless ENV["DISCOURSE_S3_BUCKET"].present? && + ENV["DISCOURSE_S3_REGION"].present? && + ( + ( + ENV["DISCOURSE_S3_ACCESS_KEY_ID"].present? && + ENV["DISCOURSE_S3_SECRET_ACCESS_KEY"].present? + ) || ENV["DISCOURSE_S3_USE_IAM_PROFILE"].present? + ) + + raise ToS3MigrationError.new(<<~TEXT) + Please provide the following environment variables: + - DISCOURSE_S3_BUCKET + - DISCOURSE_S3_REGION + and either + - DISCOURSE_S3_ACCESS_KEY_ID + - DISCOURSE_S3_SECRET_ACCESS_KEY + or + - DISCOURSE_S3_USE_IAM_PROFILE + TEXT + end + + opts = { region: ENV["DISCOURSE_S3_REGION"] } + opts[:endpoint] = ENV["DISCOURSE_S3_ENDPOINT"] if ENV["DISCOURSE_S3_ENDPOINT"].present? + + if ENV["DISCOURSE_S3_USE_IAM_PROFILE"].blank? + opts[:access_key_id] = ENV["DISCOURSE_S3_ACCESS_KEY_ID"] + opts[:secret_access_key] = ENV["DISCOURSE_S3_SECRET_ACCESS_KEY"] + end + + { + client_options: opts, + bucket: ENV["DISCOURSE_S3_BUCKET"] + } + end + + def migrate + migrate_to_s3 + end + + def migration_successful?(should_raise: false) + success = true + + failure_message = "S3 migration failed for db '#{@current_db}'." + prefix = @migrate_to_multisite ? "uploads/#{@current_db}/original/" : "original/" + + base_url = File.join(SiteSetting.Upload.s3_base_url, prefix) + count = Upload.by_users.where("url NOT LIKE '#{base_url}%'").count + if count > 0 + error_message = "#{count} of #{Upload.count} uploads are not migrated to S3. #{failure_message}" + raise_or_log(error_message, should_raise) + success = false + end + + cdn_path = SiteSetting.cdn_path("/uploads/#{@current_db}/original").sub(/https?:/, "") + count = Post.where("cooked LIKE '%#{cdn_path}%'").count + if count > 0 + error_message = "#{count} posts are not remapped to new S3 upload URL. #{failure_message}" + raise_or_log(error_message, should_raise) + success = false + end + + Discourse::Application.load_tasks + Rake::Task['posts:missing_uploads'].invoke('single_site') + count = PostCustomField.where(name: Post::MISSING_UPLOADS).count + if count > 0 + error_message = "rake posts:missing_uploads identified #{count} issues. #{failure_message}" + raise_or_log(error_message, should_raise) + success = false + end + + count = Post.where('baked_version <> ? OR baked_version IS NULL', Post::BAKED_VERSION).count + if count > 0 + log("#{count} posts still require rebaking and will be rebaked during regular job") + log("To speed up migrations of posts we recommend you run 'rake posts:rebake_uncooked_posts'") if count > 100 + success = false + else + log("No posts require rebaking") + end + + success + end + + protected + + def log(message) + puts message + end + + def raise_or_log(message, should_raise) + if should_raise + raise ToS3MigrationError.new(message) + else + log(message) + end + end + + def uploads_migrated_to_new_scheme? + seeded_image_url = "#{GlobalSetting.relative_url_root}/uploads/#{@current_db}/original/_X/" + !Upload.by_users.where("url NOT LIKE '//%' AND url NOT LIKE '#{seeded_image_url}%'").exists? + end + + def migrate_to_s3 + # we don't want have migrated state, ensure we run all jobs here + Jobs.run_immediately! + + log "Checking if #{@current_db} already migrated..." + return log "Already migrated #{@current_db}!" if migration_successful? + + log "*" * 30 + " DRY RUN " + "*" * 30 if @dry_run + log "Migrating uploads to S3 for '#{@current_db}'..." + + if !uploads_migrated_to_new_scheme? + log "Some uploads were not migrated to the new scheme. Running the migration, this may take a while..." + SiteSetting.migrate_to_new_scheme = true + Upload.migrate_to_new_scheme + + if !uploads_migrated_to_new_scheme? + raise ToS3MigrationError.new("Some uploads could not be migrated to the new scheme. " \ + "You need to fix this manually.") + end + end + + bucket_has_folder_path = true if @s3_bucket.include? "/" + public_directory = Rails.root.join("public").to_s + + s3 = Aws::S3::Client.new(@s3_client_options) + + if bucket_has_folder_path + bucket, folder = S3Helper.get_bucket_and_folder_path(@s3_bucket) + folder = File.join(folder, "/") + else + bucket, folder = @s3_bucket, "" + end + + log "Uploading files to S3..." + log " - Listing local files" + + local_files = [] + IO.popen("cd #{public_directory} && find uploads/#{@current_db}/original -type f").each do |file| + local_files << file.chomp + putc "." if local_files.size % 1000 == 0 + end + + log " => #{local_files.size} files" + log " - Listing S3 files" + + s3_objects = [] + prefix = @migrate_to_multisite ? "uploads/#{@current_db}/original/" : "original/" + + options = { bucket: bucket, prefix: folder + prefix } + + loop do + response = s3.list_objects_v2(options) + s3_objects.concat(response.contents) + putc "." + break if response.next_continuation_token.blank? + options[:continuation_token] = response.next_continuation_token + end + + log " => #{s3_objects.size} files" + log " - Syncing files to S3" + + synced = 0 + failed = [] + + local_files.each do |file| + path = File.join(public_directory, file) + name = File.basename(path) + etag = Digest::MD5.file(path).hexdigest unless @skip_etag_verify + key = file[file.index(prefix)..-1] + key.prepend(folder) if bucket_has_folder_path + original_path = file.sub("uploads/#{@current_db}", "") + + if s3_object = s3_objects.find { |obj| obj.key.ends_with?(original_path) } + next if File.size(path) == s3_object.size && (@skip_etag_verify || s3_object.etag[etag]) + end + + options = { + acl: "public-read", + body: File.open(path, "rb"), + bucket: bucket, + content_type: MiniMime.lookup_by_filename(name)&.content_type, + key: key, + } + + if !FileHelper.is_supported_image?(name) + upload = Upload.find_by(url: "/#{file}") + + if upload&.original_filename + options[:content_disposition] = + %Q{attachment; filename="#{upload.original_filename}"} + end + + if upload&.secure + options[:acl] = "private" + end + end + + etag ||= Digest::MD5.file(path).hexdigest + + if @dry_run + log "#{file} => #{options[:key]}" + synced += 1 + elsif s3.put_object(options).etag[etag] + putc "." + synced += 1 + else + putc "X" + failed << path + end + end + + puts + + failure_message = "S3 migration failed for db '#{@current_db}'." + + if failed.size > 0 + log "Failed to upload #{failed.size} files" + log failed.join("\n") + raise failure_message + elsif s3_objects.size + synced >= local_files.size + log "Updating the URLs in the database..." + + from = "/uploads/#{@current_db}/original/" + to = "#{SiteSetting.Upload.s3_base_url}/#{prefix}" + + if @dry_run + log "REPLACING '#{from}' WITH '#{to}'" + else + DbHelper.remap(from, to, anchor_left: true) + end + + [ + [ + "src=\"/uploads/#{@current_db}/original/(\\dX/(?:[a-f0-9]/)*[a-f0-9]{40}[a-z0-9\\.]*)", + "src=\"#{SiteSetting.Upload.s3_base_url}/#{prefix}\\1" + ], + [ + "src='/uploads/#{@current_db}/original/(\\dX/(?:[a-f0-9]/)*[a-f0-9]{40}[a-z0-9\\.]*)", + "src='#{SiteSetting.Upload.s3_base_url}/#{prefix}\\1" + ], + [ + "href=\"/uploads/#{@current_db}/original/(\\dX/(?:[a-f0-9]/)*[a-f0-9]{40}[a-z0-9\\.]*)", + "href=\"#{SiteSetting.Upload.s3_base_url}/#{prefix}\\1" + ], + [ + "href='/uploads/#{@current_db}/original/(\\dX/(?:[a-f0-9]/)*[a-f0-9]{40}[a-z0-9\\.]*)", + "href='#{SiteSetting.Upload.s3_base_url}/#{prefix}\\1" + ], + [ + "\\[img\\]/uploads/#{@current_db}/original/(\\dX/(?:[a-f0-9]/)*[a-f0-9]{40}[a-z0-9\\.]*)\\[/img\\]", + "[img]#{SiteSetting.Upload.s3_base_url}/#{prefix}\\1[/img]" + ] + ].each do |from_url, to_url| + + if @dry_run + log "REPLACING '#{from_url}' WITH '#{to_url}'" + else + DbHelper.regexp_replace(from_url, to_url) + end + end + + unless @dry_run + # Legacy inline image format + Post.where("raw LIKE '%![](/uploads/default/original/%)%'").each do |post| + regexp = /!\[\](\/uploads\/#{@current_db}\/original\/(\dX\/(?:[a-f0-9]\/)*[a-f0-9]{40}[a-z0-9\.]*))/ + + post.raw.scan(regexp).each do |upload_url, _| + upload = Upload.get_from_url(upload_url) + post.raw = post.raw.gsub("![](#{upload_url})", "![](#{upload.short_url})") + end + + post.save!(validate: false) + end + end + + if Discourse.asset_host.present? + # Uploads that were on local CDN will now be on S3 CDN + from = "#{Discourse.asset_host}/uploads/#{@current_db}/original/" + to = "#{SiteSetting.Upload.s3_cdn_url}/#{prefix}" + + if @dry_run + log "REMAPPING '#{from}' TO '#{to}'" + else + DbHelper.remap(from, to) + end + end + + # Uploads that were on base hostname will now be on S3 CDN + from = "#{Discourse.base_url}/uploads/#{@current_db}/original/" + to = "#{SiteSetting.Upload.s3_cdn_url}/#{prefix}" + + if @dry_run + log "REMAPPING '#{from}' TO '#{to}'" + else + DbHelper.remap(from, to) + end + + unless @dry_run + log "Removing old optimized images..." + + OptimizedImage + .joins("LEFT JOIN uploads u ON optimized_images.upload_id = u.id") + .where("u.id IS NOT NULL AND u.url LIKE '//%' AND optimized_images.url NOT LIKE '//%'") + .delete_all + + log "Flagging all posts containing lightboxes for rebake..." + + count = Post.where("cooked LIKE '%class=\"lightbox\"%'").update_all(baked_version: nil) + log "#{count} posts were flagged for a rebake" + end + end + + migration_successful?(should_raise: true) + + log "Done!" + + ensure + Jobs.run_later! + end + end +end diff --git a/lib/migration/safe_migrate.rb b/lib/migration/safe_migrate.rb index 417b11d7123..d9adb518f1b 100644 --- a/lib/migration/safe_migrate.rb +++ b/lib/migration/safe_migrate.rb @@ -67,6 +67,10 @@ class Migration::SafeMigrate end end + def self.post_migration_path + Discourse::DB_POST_MIGRATE_PATH + end + def self.enable! return if PG::Connection.method_defined?(:exec_migrator_unpatched) diff --git a/lib/tasks/uploads.rake b/lib/tasks/uploads.rake index 1bc6a61eddb..913bb61c08f 100644 --- a/lib/tasks/uploads.rake +++ b/lib/tasks/uploads.rake @@ -224,59 +224,19 @@ def migrate_to_s3_all_sites end end -def migration_successful?(db, should_raise = false) - success = true - - failure_message = "S3 migration failed for db '#{db}'." - prefix = ENV["MIGRATE_TO_MULTISITE"] ? "uploads/#{db}/original/" : "original/" - - base_url = File.join(SiteSetting.Upload.s3_base_url, prefix) - count = Upload.by_users.where("url NOT LIKE '#{base_url}%'").count - - error_message = "#{count} of #{Upload.count} uploads are not migrated to S3. #{failure_message}" - - raise error_message if count > 0 && should_raise - success &&= count == 0 - - puts error_message if count > 0 - - cdn_path = SiteSetting.cdn_path("/uploads/#{db}/original").sub(/https?:/, "") - count = Post.where("cooked LIKE '%#{cdn_path}%'").count - error_message = "#{count} posts are not remapped to new S3 upload URL. #{failure_message}" - - raise error_message if count > 0 && should_raise - success &&= count == 0 - - puts error_message if count > 0 - - Rake::Task['posts:missing_uploads'].invoke('single_site') - count = PostCustomField.where(name: Post::MISSING_UPLOADS).count - error_message = "rake posts:missing_uploads identified #{count} issues. #{failure_message}" - raise error_message if count > 0 && should_raise - - success &&= count == 0 - - puts error_message if count > 0 - - count = Post.where('baked_version <> ? OR baked_version IS NULL', Post::BAKED_VERSION).count - if count > 0 - puts "#{count} posts still require rebaking and will be rebaked during regular job" - if count > 100 - puts "To speed up migrations of posts we recommend you run 'rake posts:rebake_uncooked_posts'" - end - success = false - else - puts "No posts require rebaking" - end - - success +def migrate_to_s3 + FileStore::ToS3Migration.new( + s3_options: FileStore::ToS3Migration.s3_options_from_env, + dry_run: !!ENV["DRY_RUN"], + migrate_to_multisite: !!ENV["MIGRATE_TO_MULTISITE"], + skip_etag_verify: !!ENV["SKIP_ETAG_VERIFY"] + ).migrate end task "uploads:s3_migration_status" => :environment do success = true RailsMultisite::ConnectionManagement.each_connection do - db = RailsMultisite::ConnectionManagement.current_db - success &&= migration_successful?(db) + success &&= FileStore::ToS3Migration.new.migration_successful? end queued_jobs = Sidekiq::Stats.new.queues.sum { |_ , x| x } @@ -293,266 +253,6 @@ task "uploads:s3_migration_status" => :environment do puts "All sites appear to have uploads in order!" end -def migrate_to_s3 - - # we don't want have migrated state, ensure we run all jobs here - Jobs.run_immediately! - - db = RailsMultisite::ConnectionManagement.current_db - - dry_run = !!ENV["DRY_RUN"] - - puts "Checking if #{db} already migrated..." - return puts "Already migrated #{db}!" if migration_successful?(db) - - puts "*" * 30 + " DRY RUN " + "*" * 30 if dry_run - puts "Migrating uploads to S3 for '#{db}'..." - - if Upload.by_users.where("url NOT LIKE '//%' AND url NOT LIKE '#{GlobalSetting.relative_url_root}/uploads/#{db}/original/_X/%'").exists? - puts <<~TEXT - Some uploads were not migrated to the new scheme. Please run these commands in the rails console - - SiteSetting.migrate_to_new_scheme = true - Jobs::MigrateUploadScheme.new.execute(nil) - TEXT - exit 1 - end - - unless ENV["DISCOURSE_S3_BUCKET"].present? && - ENV["DISCOURSE_S3_REGION"].present? && - ( - ( - ENV["DISCOURSE_S3_ACCESS_KEY_ID"].present? && - ENV["DISCOURSE_S3_SECRET_ACCESS_KEY"].present? - ) || - ENV["DISCOURSE_S3_USE_IAM_PROFILE"].present? - ) - - puts <<~TEXT - Please provide the following environment variables - - DISCOURSE_S3_BUCKET - - DISCOURSE_S3_REGION - and either - - DISCOURSE_S3_ACCESS_KEY_ID - - DISCOURSE_S3_SECRET_ACCESS_KEY - or - - DISCOURSE_S3_USE_IAM_PROFILE - TEXT - exit 2 - end - - if SiteSetting.Upload.s3_cdn_url.blank? - puts "Please provide the 'DISCOURSE_S3_CDN_URL' environment variable" - exit 3 - end - - bucket_has_folder_path = true if ENV["DISCOURSE_S3_BUCKET"].include? "/" - public_directory = Rails.root.join("public").to_s - - opts = { - region: ENV["DISCOURSE_S3_REGION"], - access_key_id: ENV["DISCOURSE_S3_ACCESS_KEY_ID"], - secret_access_key: ENV["DISCOURSE_S3_SECRET_ACCESS_KEY"] - } - - # S3::Client ignores the `region` option when an `endpoint` is provided. - # Without `region`, non-default region bucket creation will break for S3, so we can only - # define endpoint when not using S3 i.e. when SiteSetting.s3_endpoint is provided. - opts[:endpoint] = SiteSetting.s3_endpoint if SiteSetting.s3_endpoint.present? - s3 = Aws::S3::Client.new(opts) - - if bucket_has_folder_path - bucket, folder = S3Helper.get_bucket_and_folder_path(ENV["DISCOURSE_S3_BUCKET"]) - folder = File.join(folder, "/") - else - bucket, folder = ENV["DISCOURSE_S3_BUCKET"], "" - end - - puts "Uploading files to S3..." - print " - Listing local files" - - local_files = [] - IO.popen("cd #{public_directory} && find uploads/#{db}/original -type f").each do |file| - local_files << file.chomp - putc "." if local_files.size % 1000 == 0 - end - - puts " => #{local_files.size} files" - print " - Listing S3 files" - - s3_objects = [] - prefix = ENV["MIGRATE_TO_MULTISITE"] ? "uploads/#{db}/original/" : "original/" - - options = { bucket: bucket, prefix: folder + prefix } - - loop do - response = s3.list_objects_v2(options) - s3_objects.concat(response.contents) - putc "." - break if response.next_continuation_token.blank? - options[:continuation_token] = response.next_continuation_token - end - - puts " => #{s3_objects.size} files" - puts " - Syncing files to S3" - - synced = 0 - failed = [] - - skip_etag_verify = ENV["SKIP_ETAG_VERIFY"].present? - local_files.each do |file| - path = File.join(public_directory, file) - name = File.basename(path) - etag = Digest::MD5.file(path).hexdigest unless skip_etag_verify - key = file[file.index(prefix)..-1] - key.prepend(folder) if bucket_has_folder_path - original_path = file.sub("uploads/#{db}", "") - - if s3_object = s3_objects.find { |obj| obj.key.ends_with?(original_path) } - next if File.size(path) == s3_object.size && (skip_etag_verify || s3_object.etag[etag]) - end - - options = { - acl: "public-read", - body: File.open(path, "rb"), - bucket: bucket, - content_type: MiniMime.lookup_by_filename(name)&.content_type, - key: key, - } - - if !FileHelper.is_supported_image?(name) - upload = Upload.find_by(url: "/#{file}") - - if upload&.original_filename - options[:content_disposition] = - %Q{attachment; filename="#{upload.original_filename}"} - end - - if upload&.secure - options[:acl] = "private" - end - end - - etag ||= Digest::MD5.file(path).hexdigest - - if dry_run - puts "#{file} => #{options[:key]}" - synced += 1 - elsif s3.put_object(options).etag[etag] - putc "." - synced += 1 - else - putc "X" - failed << path - end - end - - puts - - failure_message = "S3 migration failed for db '#{db}'." - - if failed.size > 0 - puts "Failed to upload #{failed.size} files" - puts failed.join("\n") - raise failure_message - elsif s3_objects.size + synced >= local_files.size - puts "Updating the URLs in the database..." - - from = "/uploads/#{db}/original/" - to = "#{SiteSetting.Upload.s3_base_url}/#{prefix}" - - if dry_run - puts "REPLACING '#{from}' WITH '#{to}'" - else - DbHelper.remap(from, to, anchor_left: true) - end - - [ - [ - "src=\"/uploads/#{db}/original/(\\dX/(?:[a-f0-9]/)*[a-f0-9]{40}[a-z0-9\\.]*)", - "src=\"#{SiteSetting.Upload.s3_base_url}/#{prefix}\\1" - ], - [ - "src='/uploads/#{db}/original/(\\dX/(?:[a-f0-9]/)*[a-f0-9]{40}[a-z0-9\\.]*)", - "src='#{SiteSetting.Upload.s3_base_url}/#{prefix}\\1" - ], - [ - "href=\"/uploads/#{db}/original/(\\dX/(?:[a-f0-9]/)*[a-f0-9]{40}[a-z0-9\\.]*)", - "href=\"#{SiteSetting.Upload.s3_base_url}/#{prefix}\\1" - ], - [ - "href='/uploads/#{db}/original/(\\dX/(?:[a-f0-9]/)*[a-f0-9]{40}[a-z0-9\\.]*)", - "href='#{SiteSetting.Upload.s3_base_url}/#{prefix}\\1" - ], - [ - "\\[img\\]/uploads/#{db}/original/(\\dX/(?:[a-f0-9]/)*[a-f0-9]{40}[a-z0-9\\.]*)\\[/img\\]", - "[img]#{SiteSetting.Upload.s3_base_url}/#{prefix}\\1[/img]" - ] - ].each do |from_url, to_url| - - if dry_run - puts "REPLACING '#{from_url}' WITH '#{to_url}'" - else - DbHelper.regexp_replace(from_url, to_url) - end - end - - unless dry_run - # Legacy inline image format - Post.where("raw LIKE '%![](/uploads/default/original/%)%'").each do |post| - regexp = /!\[\](\/uploads\/#{db}\/original\/(\dX\/(?:[a-f0-9]\/)*[a-f0-9]{40}[a-z0-9\.]*))/ - - post.raw.scan(regexp).each do |upload_url, _| - upload = Upload.get_from_url(upload_url) - post.raw = post.raw.gsub("![](#{upload_url})", "![](#{upload.short_url})") - end - - post.save!(validate: false) - end - end - - if Discourse.asset_host.present? - # Uploads that were on local CDN will now be on S3 CDN - from = "#{Discourse.asset_host}/uploads/#{db}/original/" - to = "#{SiteSetting.Upload.s3_cdn_url}/#{prefix}" - - if dry_run - puts "REMAPPING '#{from}' TO '#{to}'" - else - DbHelper.remap(from, to) - end - end - - # Uploads that were on base hostname will now be on S3 CDN - from = "#{Discourse.base_url}/uploads/#{db}/original/" - to = "#{SiteSetting.Upload.s3_cdn_url}/#{prefix}" - - if dry_run - puts "REMAPPING '#{from}' TO '#{to}'" - else - DbHelper.remap(from, to) - end - - unless dry_run - puts "Removing old optimized images..." - - OptimizedImage - .joins("LEFT JOIN uploads u ON optimized_images.upload_id = u.id") - .where("u.id IS NOT NULL AND u.url LIKE '//%' AND optimized_images.url NOT LIKE '//%'") - .delete_all - - puts "Flagging all posts containing lightboxes for rebake..." - - count = Post.where("cooked LIKE '%class=\"lightbox\"%'").update_all(baked_version: nil) - puts "#{count} posts were flagged for a rebake" - end - end - - migration_successful?(db, true) - - puts "Done!" -end - ################################################################################ # clean_up # ################################################################################ diff --git a/script/discourse b/script/discourse index e1d7b13cfcf..7b4990215bb 100755 --- a/script/discourse +++ b/script/discourse @@ -135,9 +135,10 @@ class DiscourseCLI < Thor begin puts "Starting restore: #{filename}" restorer = BackupRestore::Restorer.new( - Discourse.system_user.id, + user_id: Discourse.system_user.id, filename: filename, - disable_emails: options[:disable_emails] + disable_emails: options[:disable_emails], + factory: BackupRestore::Factory.new(user_id: Discourse.system_user.id) ) restorer.run puts 'Restore done.' diff --git a/spec/fixtures/backups/backup_since_v1.6.tar.gz b/spec/fixtures/backups/backup_since_v1.6.tar.gz new file mode 100644 index 00000000000..1120259fc2b Binary files /dev/null and b/spec/fixtures/backups/backup_since_v1.6.tar.gz differ diff --git a/spec/fixtures/backups/backup_till_v1.5.tar.gz b/spec/fixtures/backups/backup_till_v1.5.tar.gz new file mode 100644 index 00000000000..ddd593932ef Binary files /dev/null and b/spec/fixtures/backups/backup_till_v1.5.tar.gz differ diff --git a/spec/fixtures/backups/sql_only_backup.sql.gz b/spec/fixtures/backups/sql_only_backup.sql.gz new file mode 100644 index 00000000000..ee81e0be506 Binary files /dev/null and b/spec/fixtures/backups/sql_only_backup.sql.gz differ diff --git a/spec/fixtures/db/post_migrate/drop_column/20990309014014_drop_post_columns.rb b/spec/fixtures/db/post_migrate/drop_column/20990309014014_drop_post_columns.rb new file mode 100644 index 00000000000..8390f83207d --- /dev/null +++ b/spec/fixtures/db/post_migrate/drop_column/20990309014014_drop_post_columns.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +class DropPostColumns < ActiveRecord::Migration[5.2] + DROPPED_COLUMNS ||= { + posts: %i{via_email raw_email} + } + + def up + remove_column :posts, :via_email + remove_column :posts, :raw_email + raise ActiveRecord::Rollback + end + + def down + raise "not tested" + end +end diff --git a/spec/fixtures/db/post_migrate/drop_table/20990309014013_drop_email_logs_table.rb b/spec/fixtures/db/post_migrate/drop_table/20990309014013_drop_email_logs_table.rb index 1be76626a4e..5d07960ea7a 100644 --- a/spec/fixtures/db/post_migrate/drop_table/20990309014013_drop_email_logs_table.rb +++ b/spec/fixtures/db/post_migrate/drop_table/20990309014013_drop_email_logs_table.rb @@ -1,6 +1,8 @@ # frozen_string_literal: true class DropEmailLogsTable < ActiveRecord::Migration[5.2] + DROPPED_TABLES ||= %i{email_logs} + def up drop_table :email_logs raise ActiveRecord::Rollback diff --git a/spec/fixtures/db/restore/error.sql b/spec/fixtures/db/restore/error.sql new file mode 100644 index 00000000000..39765df269a --- /dev/null +++ b/spec/fixtures/db/restore/error.sql @@ -0,0 +1,10 @@ +-- +-- PostgreSQL database dump +-- + +-- Dumped from database version 10.11 (Debian 10.11-1.pgdg100+1) +-- Dumped by pg_dump version 10.11 (Debian 10.11-1.pgdg100+1) + +-- Started on 2019-12-28 00:24:29 UTC + +SET foo = 0; diff --git a/spec/fixtures/db/restore/postgresql_10.11.sql b/spec/fixtures/db/restore/postgresql_10.11.sql new file mode 100644 index 00000000000..22bb9e9172b --- /dev/null +++ b/spec/fixtures/db/restore/postgresql_10.11.sql @@ -0,0 +1,31 @@ +-- +-- PostgreSQL database dump +-- + +-- Dumped from database version 10.11 (Debian 10.11-1.pgdg100+1) +-- Dumped by pg_dump version 10.11 (Debian 10.11-1.pgdg100+1) + +-- Started on 2019-12-28 00:24:29 UTC + +SET statement_timeout = 0; +SET lock_timeout = 0; +SET idle_in_transaction_session_timeout = 0; +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = on; +SELECT pg_catalog.set_config('search_path', '', false); +SET check_function_bodies = false; +SET xmloption = content; +SET client_min_messages = warning; +SET row_security = off; + +SET default_tablespace = ''; + +SET default_with_oids = false; + +-- +-- TOC entry 198 (class 1259 OID 16573) +-- Name: foo; Type: TABLE; Schema: public; Owner: - + +CREATE TABLE public.foo ( + id integer NOT NULL +); diff --git a/spec/fixtures/db/restore/postgresql_11.6.sql b/spec/fixtures/db/restore/postgresql_11.6.sql new file mode 100644 index 00000000000..656dbee0a91 --- /dev/null +++ b/spec/fixtures/db/restore/postgresql_11.6.sql @@ -0,0 +1,49 @@ +-- +-- PostgreSQL database dump +-- + +-- Dumped from database version 11.6 (Debian 11.6-1.pgdg90+1) +-- Dumped by pg_dump version 11.6 (Debian 11.6-1.pgdg90+1) + +-- Started on 2019-12-28 00:38:51 UTC + +SET statement_timeout = 0; +SET lock_timeout = 0; +SET idle_in_transaction_session_timeout = 0; +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = on; +SELECT pg_catalog.set_config('search_path', '', false); +SET check_function_bodies = false; +SET xmloption = content; +SET client_min_messages = warning; +SET row_security = off; + +-- +-- TOC entry 5 (class 2615 OID 2200) +-- Name: public; Type: SCHEMA; Schema: -; Owner: - +-- + +CREATE SCHEMA public; + + +-- +-- TOC entry 4782 (class 0 OID 0) +-- Dependencies: 5 +-- Name: SCHEMA public; Type: COMMENT; Schema: -; Owner: - +-- + +COMMENT ON SCHEMA public IS 'standard public schema'; + + +SET default_tablespace = ''; + +SET default_with_oids = false; + +-- +-- TOC entry 198 (class 1259 OID 16585) +-- Name: foo; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.foo ( + id integer NOT NULL +); diff --git a/spec/fixtures/db/restore/postgresql_12.1.sql b/spec/fixtures/db/restore/postgresql_12.1.sql new file mode 100644 index 00000000000..9ee7404ea66 --- /dev/null +++ b/spec/fixtures/db/restore/postgresql_12.1.sql @@ -0,0 +1,49 @@ +-- +-- PostgreSQL database dump +-- + +-- Dumped from database version 12.1 (Debian 12.1-1.pgdg100+1) +-- Dumped by pg_dump version 12.1 (Debian 12.1-1.pgdg100+1) + +-- Started on 2019-12-28 00:35:48 UTC + +SET statement_timeout = 0; +SET lock_timeout = 0; +SET idle_in_transaction_session_timeout = 0; +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = on; +SELECT pg_catalog.set_config('search_path', '', false); +SET check_function_bodies = false; +SET xmloption = content; +SET client_min_messages = warning; +SET row_security = off; + +-- +-- TOC entry 5 (class 2615 OID 2200) +-- Name: public; Type: SCHEMA; Schema: -; Owner: - +-- + +CREATE SCHEMA public; + + +-- +-- TOC entry 4825 (class 0 OID 0) +-- Dependencies: 5 +-- Name: SCHEMA public; Type: COMMENT; Schema: -; Owner: - +-- + +COMMENT ON SCHEMA public IS 'standard public schema'; + + +SET default_tablespace = ''; + +SET default_table_access_method = heap; + +-- +-- TOC entry 204 (class 1259 OID 16587) +-- Name: foo; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.foo ( + id integer NOT NULL +); diff --git a/spec/fixtures/db/restore/postgresql_9.3.11.sql b/spec/fixtures/db/restore/postgresql_9.3.11.sql new file mode 100644 index 00000000000..cffcc11ef25 --- /dev/null +++ b/spec/fixtures/db/restore/postgresql_9.3.11.sql @@ -0,0 +1,29 @@ +-- +-- PostgreSQL database dump +-- + +-- Dumped from database version 9.3.11 +-- Dumped by pg_dump version 9.3.11 +-- Started on 2019-12-27 20:54:40 UTC + +SET statement_timeout = 0; +SET lock_timeout = 0; +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = on; +SET check_function_bodies = false; +SET client_min_messages = warning; + +DROP SCHEMA IF EXISTS restore CASCADE; CREATE SCHEMA restore; SET search_path = restore, public, pg_catalog; + +SET default_tablespace = ''; + +SET default_with_oids = false; + +-- +-- TOC entry 274 (class 1259 OID 18691) +-- Name: foo; Type: TABLE; Schema: public; Owner: -; Tablespace: +-- + +CREATE TABLE foo ( + id integer NOT NULL +); diff --git a/spec/fixtures/db/restore/postgresql_9.5.10.sql b/spec/fixtures/db/restore/postgresql_9.5.10.sql new file mode 100644 index 00000000000..be0e40fe863 --- /dev/null +++ b/spec/fixtures/db/restore/postgresql_9.5.10.sql @@ -0,0 +1,31 @@ +-- +-- PostgreSQL database dump +-- + +-- Dumped from database version 9.5.10 +-- Dumped by pg_dump version 9.5.19 + +-- Started on 2019-12-27 16:08:01 UTC + +SET statement_timeout = 0; +SET lock_timeout = 0; +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = on; +SELECT pg_catalog.set_config('search_path', '', false); +SET check_function_bodies = false; +SET xmloption = content; +SET client_min_messages = warning; +SET row_security = off; + +SET default_tablespace = ''; + +SET default_with_oids = false; + +-- +-- TOC entry 285 (class 1259 OID 18706) +-- Name: foo; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.foo ( + id integer NOT NULL +); diff --git a/spec/fixtures/db/restore/postgresql_9.5.5.sql b/spec/fixtures/db/restore/postgresql_9.5.5.sql new file mode 100644 index 00000000000..7672aee578a --- /dev/null +++ b/spec/fixtures/db/restore/postgresql_9.5.5.sql @@ -0,0 +1,31 @@ +-- +-- PostgreSQL database dump +-- + +-- Dumped from database version 9.5.5 +-- Dumped by pg_dump version 9.5.5 + +-- Started on 2019-11-07 16:41:33 UTC + +SET statement_timeout = 0; +SET lock_timeout = 0; +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = on; +SET check_function_bodies = false; +SET client_min_messages = warning; +SET row_security = off; + +SET search_path = public, pg_catalog; + +SET default_tablespace = ''; + +SET default_with_oids = false; + +-- +-- TOC entry 284 (class 1259 OID 18697) +-- Name: foo; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE foo ( + id integer NOT NULL +); diff --git a/spec/lib/backup_restore/backup_file_handler_spec.rb b/spec/lib/backup_restore/backup_file_handler_spec.rb new file mode 100644 index 00000000000..ba4cf2951df --- /dev/null +++ b/spec/lib/backup_restore/backup_file_handler_spec.rb @@ -0,0 +1,77 @@ +# frozen_string_literal: true + +require 'rails_helper' +require_relative 'shared_context_for_backup_restore' + +describe BackupRestore::BackupFileHandler do + include_context "shared stuff" + + def expect_decompress_and_clean_up_to_work(backup_filename:, expected_dump_filename: "dump.sql", + require_metadata_file:, require_uploads:) + + freeze_time(DateTime.parse('2019-12-24 14:31:48')) + + source_file = File.join(Rails.root, "spec/fixtures/backups", backup_filename) + target_directory = BackupRestore::LocalBackupStore.base_directory + target_file = File.join(target_directory, backup_filename) + FileUtils.copy_file(source_file, target_file) + + Dir.mktmpdir do |root_directory| + current_db = RailsMultisite::ConnectionManagement.current_db + file_handler = BackupRestore::BackupFileHandler.new(logger, backup_filename, current_db, root_directory) + tmp_directory, db_dump_path = file_handler.decompress + + expected_tmp_path = File.join(root_directory, "tmp/restores", current_db, "2019-12-24-143148") + expect(tmp_directory).to eq(expected_tmp_path) + expect(db_dump_path).to eq(File.join(expected_tmp_path, expected_dump_filename)) + + expect(Dir.exist?(tmp_directory)).to eq(true) + expect(File.exist?(db_dump_path)).to eq(true) + + expect(File.exist?(File.join(tmp_directory, "meta.json"))).to eq(require_metadata_file) + + if require_uploads + upload_filename = "uploads/default/original/3X/b/d/bd269860bb508aebcb6f08fe7289d5f117830383.png" + expect(File.exist?(File.join(tmp_directory, upload_filename))).to eq(true) + else + expect(Dir.exist?(File.join(tmp_directory, "uploads"))).to eq(false) + end + + file_handler.clean_up + expect(Dir.exist?(tmp_directory)).to eq(false) + end + ensure + FileUtils.rm(target_file) + + # We don't want to delete the directory unless it is empty, otherwise this could be annoying + # when tests run for the "default" database in a development environment. + FileUtils.rmdir(target_directory) rescue nil + end + + it "works with old backup file format", type: :multisite do + test_multisite_connection("second") do + expect_decompress_and_clean_up_to_work( + backup_filename: "backup_till_v1.5.tar.gz", + require_metadata_file: true, + require_uploads: true + ) + end + end + + it "works with current backup file format" do + expect_decompress_and_clean_up_to_work( + backup_filename: "backup_since_v1.6.tar.gz", + require_metadata_file: false, + require_uploads: true + ) + end + + it "works with SQL only backup file" do + expect_decompress_and_clean_up_to_work( + backup_filename: "sql_only_backup.sql.gz", + expected_dump_filename: "sql_only_backup.sql", + require_metadata_file: false, + require_uploads: false + ) + end +end diff --git a/spec/lib/backup_restore/database_restorer_spec.rb b/spec/lib/backup_restore/database_restorer_spec.rb new file mode 100644 index 00000000000..e4c4d6cb13c --- /dev/null +++ b/spec/lib/backup_restore/database_restorer_spec.rb @@ -0,0 +1,188 @@ +# frozen_string_literal: true + +require 'rails_helper' +require_relative 'shared_context_for_backup_restore' + +describe BackupRestore::DatabaseRestorer do + include_context "shared stuff" + + let(:current_db) { RailsMultisite::ConnectionManagement.current_db } + subject { BackupRestore::DatabaseRestorer.new(logger, current_db) } + + def expect_create_readonly_functions + Migration::BaseDropper.expects(:create_readonly_function).at_least_once + end + + def expect_table_move + BackupRestore.expects(:move_tables_between_schemas).with("public", "backup").once + end + + def expect_psql(output_lines: ["output from psql"], exit_status: 0, stub_thread: false) + status = mock("psql status") + status.expects(:exitstatus).returns(exit_status).once + Process.expects(:last_status).returns(status).once + + if stub_thread + thread = mock("thread") + thread.stubs(:join) + Thread.stubs(:new).returns(thread) + end + + output_lines << nil + psql_io = mock("psql") + psql_io.expects(:readline).returns(*output_lines).times(output_lines.size) + IO.expects(:popen).yields(psql_io).once + end + + def expect_db_migrate + Discourse::Utils.expects(:execute_command).with do |env, command, options| + env["SKIP_POST_DEPLOYMENT_MIGRATIONS"] == "0" && + command == "rake db:migrate" && + options[:chdir] == Rails.root + end.once + end + + def expect_db_reconnect + RailsMultisite::ConnectionManagement.expects(:establish_connection).once + end + + def execute_stubbed_restore(stub_readonly_functions: true, stub_psql: true, stub_migrate: true, + dump_file_path: "foo.sql") + expect_table_move + expect_create_readonly_functions if stub_readonly_functions + expect_psql if stub_psql + expect_db_migrate if stub_migrate + subject.restore(dump_file_path) + end + + describe "#restore" do + it "executes everything in the correct order" do + restore = sequence("restore") + expect_table_move.in_sequence(restore) + expect_create_readonly_functions.in_sequence(restore) + expect_psql(stub_thread: true).in_sequence(restore) + expect_db_migrate.in_sequence(restore) + expect_db_reconnect.in_sequence(restore) + + subject.restore("foo.sql") + end + + context "with real psql" do + after do + DB.exec <<~SQL + -- Drop table and execute a commit to make the drop stick, + -- otherwise rspec will rollback the drop at the end of each test. + -- The tests in this context do not change the DB, so this should be safe. + DROP TABLE IF EXISTS foo; + COMMIT; + + -- Start a new transaction in order to suppress the + -- "there is no transaction in progress" warnings from rspec. + BEGIN TRANSACTION; + SQL + end + + def restore(filename, stub_migrate: true) + path = File.join(Rails.root, "spec/fixtures/db/restore", filename) + execute_stubbed_restore(stub_psql: false, stub_migrate: stub_migrate, dump_file_path: path) + end + + def expect_restore_to_work(filename) + restore(filename, stub_migrate: true) + expect(ActiveRecord::Base.connection.table_exists?("foo")).to eq(true) + end + + it "restores from PostgreSQL 9.3" do + # this covers the defaults of Discourse v1.0 up to v1.5 + expect_restore_to_work("postgresql_9.3.11.sql") + end + + it "restores from PostgreSQL 9.5.5" do + # it uses a slightly different header than later 9.5.x versions + expect_restore_to_work("postgresql_9.5.5.sql") + end + + it "restores from PostgreSQL 9.5" do + # this covers the defaults of Discourse v1.6 up to v1.9 + expect_restore_to_work("postgresql_9.5.10.sql") + end + + it "restores from PostgreSQL 10" do + # this covers the defaults of Discourse v1.7 up to v2.4 + expect_restore_to_work("postgresql_10.11.sql") + end + + it "restores from PostgreSQL 11" do + expect_restore_to_work("postgresql_11.6.sql") + end + + it "restores from PostgreSQL 12" do + expect_restore_to_work("postgresql_12.1.sql") + end + + it "detects error during restore" do + expect { restore("error.sql", stub_migrate: false) } + .to raise_error(BackupRestore::DatabaseRestoreError) + end + end + + context "database connection" do + it 'reconnects to the correct database', type: :multisite do + RailsMultisite::ConnectionManagement.establish_connection(db: 'second') + execute_stubbed_restore + expect(RailsMultisite::ConnectionManagement.current_db).to eq('second') + end + + it 'it is not erroring for non-multisite' do + expect { execute_stubbed_restore }.not_to raise_error + end + end + end + + describe "#rollback" do + it "moves tables back when tables were moved" do + BackupRestore.stubs(:can_rollback?).returns(true) + BackupRestore.expects(:move_tables_between_schemas).with("backup", "public").never + subject.rollback + + execute_stubbed_restore + + BackupRestore.expects(:move_tables_between_schemas).with("backup", "public").once + subject.rollback + end + end + + context "readonly functions" do + before do + Migration::SafeMigrate.stubs(:post_migration_path).returns("spec/fixtures/db/post_migrate") + end + + it "doesn't try to drop function when no functions have been created" do + Migration::BaseDropper.expects(:drop_readonly_function).never + subject.clean_up + end + + it "creates and drops all functions when none exist" do + Migration::BaseDropper.expects(:create_readonly_function).with(:email_logs, nil) + Migration::BaseDropper.expects(:create_readonly_function).with(:posts, :via_email) + Migration::BaseDropper.expects(:create_readonly_function).with(:posts, :raw_email) + execute_stubbed_restore(stub_readonly_functions: false) + + Migration::BaseDropper.expects(:drop_readonly_function).with(:email_logs, nil) + Migration::BaseDropper.expects(:drop_readonly_function).with(:posts, :via_email) + Migration::BaseDropper.expects(:drop_readonly_function).with(:posts, :raw_email) + subject.clean_up + end + + it "creates and drops only missing functions during restore" do + Migration::BaseDropper.stubs(:existing_discourse_function_names) + .returns(%w(raise_email_logs_readonly raise_posts_raw_email_readonly)) + + Migration::BaseDropper.expects(:create_readonly_function).with(:posts, :via_email) + execute_stubbed_restore(stub_readonly_functions: false) + + Migration::BaseDropper.expects(:drop_readonly_function).with(:posts, :via_email) + subject.clean_up + end + end +end diff --git a/spec/lib/backup_restore/meta_data_handler_spec.rb b/spec/lib/backup_restore/meta_data_handler_spec.rb new file mode 100644 index 00000000000..42b082326ce --- /dev/null +++ b/spec/lib/backup_restore/meta_data_handler_spec.rb @@ -0,0 +1,81 @@ +# frozen_string_literal: true + +require 'rails_helper' +require_relative 'shared_context_for_backup_restore' + +describe BackupRestore::MetaDataHandler do + include_context "shared stuff" + + let!(:backup_filename) { 'discourse-2019-11-18-143242-v20191108000414.tar.gz' } + + def with_metadata_file(content) + Dir.mktmpdir do |directory| + if !content.nil? + path = File.join(directory, BackupRestore::MetaDataHandler::METADATA_FILE) + File.write(path, content) + end + + yield(directory) + end + end + + def validate_metadata(filename, tmp_directory) + BackupRestore::MetaDataHandler.new(logger, filename, tmp_directory).validate + end + + it "extracts metadata from file when metadata file exists" do + metadata = '{"source":"discourse","version":20160329101122}' + + with_metadata_file(metadata) do |dir| + expect(validate_metadata(backup_filename, dir)) + .to include(version: 20160329101122) + end + end + + it "extracts metadata from filename when metadata file does not exist" do + with_metadata_file(nil) do |dir| + expect(validate_metadata(backup_filename, dir)) + .to include(version: 20191108000414) + end + end + + it "raises an exception when the metadata file contains invalid JSON" do + currupt_metadata = '{"version":20160329101122' + + with_metadata_file(currupt_metadata) do |dir| + expect { validate_metadata(backup_filename, dir) } + .to raise_error(BackupRestore::MetaDataError) + end + end + + it "raises an exception when the metadata file is empty" do + with_metadata_file('') do |dir| + expect { validate_metadata(backup_filename, dir) } + .to raise_error(BackupRestore::MetaDataError) + end + end + + it "raises an exception when the filename contains no version number" do + filename = 'discourse-2019-11-18-143242.tar.gz' + + expect { validate_metadata(filename, nil) } + .to raise_error(BackupRestore::MetaDataError) + end + + it "raises an exception when the filename contains an invalid version number" do + filename = 'discourse-2019-11-18-143242-v123456789.tar.gz' + + expect { validate_metadata(filename, nil) } + .to raise_error(BackupRestore::MetaDataError) + end + + it "raises an exception when the backup's version is newer than the current version" do + new_backup_filename = 'discourse-2019-11-18-143242-v20191113193141.sql.gz' + + BackupRestore.expects(:current_version) + .returns(20191025005204).once + + expect { validate_metadata(new_backup_filename, nil) } + .to raise_error(BackupRestore::MigrationRequiredError) + end +end diff --git a/spec/lib/backup_restore/restorer_spec.rb b/spec/lib/backup_restore/restorer_spec.rb index 2990be90521..47dea3e32d8 100644 --- a/spec/lib/backup_restore/restorer_spec.rb +++ b/spec/lib/backup_restore/restorer_spec.rb @@ -2,135 +2,6 @@ require 'rails_helper' -# Causes flakiness describe BackupRestore::Restorer do - it 'detects which pg_dump output is restorable to different schemas' do - { - "9.6.7" => true, - "9.6.8" => false, - "9.6.9" => false, - "10.2" => true, - "10.3" => false, - "10.3.1" => false, - "10.4" => false, - "11" => false, - "11.4" => false, - "21" => false, - }.each do |key, value| - expect(described_class.pg_produces_portable_dump?(key)).to eq(value) - end - end - describe 'Decompressing a backup' do - let!(:admin) { Fabricate(:admin) } - - before do - SiteSetting.allow_restore = true - @restore_path = File.join(Rails.root, "public", "backups", RailsMultisite::ConnectionManagement.current_db) - end - - after do - FileUtils.rm_rf @restore_path - FileUtils.rm_rf @restorer.tmp_directory - end - - context 'When there are uploads' do - before do - @restore_folder = "backup-#{SecureRandom.hex}" - @temp_folder = "#{@restore_path}/#{@restore_folder}" - FileUtils.mkdir_p("#{@temp_folder}/uploads") - - Dir.chdir(@restore_path) do - File.write("#{@restore_folder}/dump.sql", 'This is a dump') - Compression::Gzip.new.compress(@restore_folder, 'dump.sql') - FileUtils.rm_rf("#{@restore_folder}/dump.sql") - File.write("#{@restore_folder}/uploads/upload.txt", 'This is an upload') - - Compression::Tar.new.compress(@restore_path, @restore_folder) - end - - Compression::Gzip.new.compress(@restore_path, "#{@restore_folder}.tar") - FileUtils.rm_rf @temp_folder - - build_restorer("#{@restore_folder}.tar.gz") - end - - it '#decompress_archive works correctly' do - @restorer.decompress_archive - - expect(exists?("dump.sql.gz")).to eq(true) - expect(exists?("uploads", directory: true)).to eq(true) - end - - it '#extract_dump works correctly' do - @restorer.decompress_archive - @restorer.extract_dump - - expect(exists?('dump.sql')).to eq(true) - end - end - - context 'When restoring a single file' do - before do - FileUtils.mkdir_p(@restore_path) - - Dir.chdir(@restore_path) do - File.write('dump.sql', 'This is a dump') - Compression::Gzip.new.compress(@restore_path, 'dump.sql') - FileUtils.rm_rf('dump.sql') - end - - build_restorer('dump.sql.gz') - end - - it '#extract_dump works correctly with a single file' do - @restorer.extract_dump - - expect(exists?("dump.sql")).to eq(true) - end - end - - def exists?(relative_path, directory: false) - full_path = "#{@restorer.tmp_directory}/#{relative_path}" - directory ? File.directory?(full_path) : File.exists?(full_path) - end - - def build_restorer(filename) - @restorer = described_class.new(admin.id, filename: filename) - @restorer.ensure_directory_exists(@restorer.tmp_directory) - @restorer.copy_archive_to_tmp_directory - end - end - - context 'Database connection' do - let!(:admin) { Fabricate(:admin) } - before do - SiteSetting.allow_restore = true - described_class.any_instance.stubs(ensure_we_have_a_filename: true) - described_class.any_instance.stubs(initialize_state: true) - end - - after do - SiteSetting.allow_restore = false - described_class.any_instance.unstub(:ensure_we_have_a_filename) - described_class.any_instance.unstub(:initialize_state) - end - - let(:conn) { RailsMultisite::ConnectionManagement } - let(:restorer) { described_class.new(admin.id) } - - it 'correctly reconnects to database', type: :multisite do - restorer.instance_variable_set(:@current_db, 'second') - conn.establish_connection(db: 'second') - expect(RailsMultisite::ConnectionManagement.current_db).to eq('second') - ActiveRecord::Base.connection_pool.spec.config[:db_key] = "incorrect_db" - restorer.send(:reconnect_database) - expect(RailsMultisite::ConnectionManagement.current_db).to eq('second') - end - - it 'it is not erroring for non multisite', type: :multisite do - RailsMultisite::ConnectionManagement::clear_settings! - expect { restorer.send(:reconnect_database) }.not_to raise_error - end - end end diff --git a/spec/lib/backup_restore/shared_context_for_backup_restore.rb b/spec/lib/backup_restore/shared_context_for_backup_restore.rb new file mode 100644 index 00000000000..bc9990469fa --- /dev/null +++ b/spec/lib/backup_restore/shared_context_for_backup_restore.rb @@ -0,0 +1,9 @@ +# frozen_string_literal: true +# +shared_context "shared stuff" do + let!(:logger) do + Class.new do + def log(message, ex = nil); end + end.new + end +end diff --git a/spec/lib/backup_restore/system_interface_spec.rb b/spec/lib/backup_restore/system_interface_spec.rb new file mode 100644 index 00000000000..a2a2c4314ee --- /dev/null +++ b/spec/lib/backup_restore/system_interface_spec.rb @@ -0,0 +1,152 @@ +# frozen_string_literal: true + +require 'rails_helper' +require_relative 'shared_context_for_backup_restore' + +describe BackupRestore::SystemInterface do + include_context "shared stuff" + + subject { BackupRestore::SystemInterface.new(logger) } + + context "readonly mode" do + after do + Discourse::READONLY_KEYS.each { |key| $redis.del(key) } + end + + describe "#enable_readonly_mode" do + it "enables readonly mode" do + Discourse.expects(:enable_readonly_mode).once + subject.enable_readonly_mode + end + + it "does not enable readonly mode when it is already in readonly mode" do + Discourse.enable_readonly_mode + Discourse.expects(:enable_readonly_mode).never + subject.enable_readonly_mode + end + end + + describe "#disable_readonly_mode" do + it "disables readonly mode" do + Discourse.expects(:disable_readonly_mode).once + subject.disable_readonly_mode + end + + it "does not disable readonly mode when readonly mode was explicitly enabled" do + Discourse.enable_readonly_mode + Discourse.expects(:disable_readonly_mode).never + subject.disable_readonly_mode + end + end + end + + describe "#mark_restore_as_running" do + it "calls mark_restore_as_running" do + BackupRestore.expects(:mark_as_running!).once + subject.mark_restore_as_running + end + end + + describe "#mark_restore_as_not_running" do + it "calls mark_restore_as_not_running" do + BackupRestore.expects(:mark_as_not_running!).once + subject.mark_restore_as_not_running + end + end + + describe "#listen_for_shutdown_signal" do + before { BackupRestore.mark_as_running! } + + after do + BackupRestore.clear_shutdown_signal! + BackupRestore.mark_as_not_running! + end + + it "exits the process when shutdown signal is set" do + expect do + thread = subject.listen_for_shutdown_signal + BackupRestore.set_shutdown_signal! + thread.join + end.to raise_error(SystemExit) + end + end + + describe "#pause_sidekiq" do + it "calls pause!" do + Sidekiq.expects(:pause!).once + subject.pause_sidekiq + end + end + + describe "#unpause_sidekiq" do + it "calls unpause!" do + Sidekiq.expects(:unpause!).once + subject.unpause_sidekiq + end + end + + describe "#wait_for_sidekiq" do + it "waits 6 seconds even when there are no running Sidekiq jobs" do + subject.expects(:sleep).with(6).once + subject.wait_for_sidekiq + end + + context "with Sidekiq workers" do + before { $redis.flushall } + after { $redis.flushall } + + def create_workers(site_id: nil, all_sites: false) + $redis.flushall + + payload = Sidekiq::Testing.fake! do + data = { post_id: 1 } + + if all_sites + data[:all_sites] = true + else + data[:current_site_id] = site_id || RailsMultisite::ConnectionManagement.current_db + end + + Jobs.enqueue(:process_post, data) + Jobs::ProcessPost.jobs.last + end + + Sidekiq.redis do |conn| + hostname = "localhost" + pid = 7890 + key = "#{hostname}:#{pid}" + process = { pid: pid, hostname: hostname } + + conn.sadd('processes', key) + conn.hmset(key, 'info', Sidekiq.dump_json(process)) + + data = Sidekiq.dump_json( + queue: 'default', + run_at: Time.now.to_i, + payload: Sidekiq.dump_json(payload) + ) + conn.hmset("#{key}:workers", '444', data) + end + end + + it "waits up to 60 seconds for jobs running for the current site to finish" do + subject.expects(:sleep).with(6).times(10) + create_workers + expect { subject.wait_for_sidekiq }.to raise_error(BackupRestore::RunningSidekiqJobsError) + end + + it "waits up to 60 seconds for jobs running on all sites to finish" do + subject.expects(:sleep).with(6).times(10) + create_workers(all_sites: true) + expect { subject.wait_for_sidekiq }.to raise_error(BackupRestore::RunningSidekiqJobsError) + end + + it "ignores jobs of other sites" do + subject.expects(:sleep).with(6).once + create_workers(site_id: "another_site") + + subject.wait_for_sidekiq + end + end + end +end diff --git a/spec/lib/backup_restore/uploads_restorer_spec.rb b/spec/lib/backup_restore/uploads_restorer_spec.rb new file mode 100644 index 00000000000..15fbcd24ce3 --- /dev/null +++ b/spec/lib/backup_restore/uploads_restorer_spec.rb @@ -0,0 +1,566 @@ +# frozen_string_literal: true + +require 'rails_helper' +require_relative 'shared_context_for_backup_restore' + +describe BackupRestore::UploadsRestorer do + include_context "shared stuff" + + subject { BackupRestore::UploadsRestorer.new(logger) } + + def with_temp_uploads_directory(name: "default", with_optimized: false) + Dir.mktmpdir do |directory| + path = File.join(directory, "uploads", name) + FileUtils.mkdir_p(path) + FileUtils.mkdir(File.join(path, "optimized")) if with_optimized + yield(directory, path) + end + end + + def expect_no_remap(source_site_name: nil, target_site_name:, metadata: []) + expect_remaps( + source_site_name: source_site_name, + target_site_name: target_site_name, + metadata: metadata + ) + end + + def expect_remap(source_site_name: nil, target_site_name:, metadata: [], from:, to:, &block) + expect_remaps( + source_site_name: source_site_name, + target_site_name: target_site_name, + metadata: metadata, + remaps: [{ from: from, to: to }], + &block + ) + end + + def expect_remaps(source_site_name: nil, target_site_name:, metadata: [], remaps: [], &block) + source_site_name ||= metadata.find { |d| d[:name] == "db_name" }&.dig(:value) || "default" + + if source_site_name != target_site_name + site_rename = { from: "/uploads/#{source_site_name}/", to: uploads_path(target_site_name) } + remaps << site_rename unless remaps.last == site_rename + end + + with_temp_uploads_directory(name: source_site_name, with_optimized: true) do |directory, path| + yield(directory) if block_given? + + Discourse.store.class.any_instance.expects(:copy_from).with(path).once + + if remaps.blank? + DbHelper.expects(:remap).never + else + DbHelper.expects(:remap).with do |from, to, args| + args[:excluded_tables]&.include?("backup_metadata") + remaps.shift == { from: from, to: to } + end.times(remaps.size) + end + + if target_site_name == "default" + setup_and_restore(directory, metadata) + else + test_multisite_connection(target_site_name) { setup_and_restore(directory, metadata) } + end + end + end + + def setup_and_restore(directory, metadata) + metadata.each { |d| BackupMetadata.create!(d) } + subject.restore(directory) + end + + def uploads_path(database) + path = File.join("uploads", database) + + if Discourse.is_parallel_test? + path = File.join(path, ENV['TEST_ENV_NUMBER'].presence || '1') + end + + "/#{path}/" + end + + context "uploads" do + let!(:multisite) { { name: "multisite", value: true } } + let!(:no_multisite) { { name: "multisite", value: false } } + let!(:source_db_name) { { name: "db_name", value: "foo" } } + let!(:base_url) { { name: "base_url", value: "https://www.example.com/forum" } } + let!(:no_cdn_url) { { name: "cdn_url", value: nil } } + let!(:cdn_url) { { name: "cdn_url", value: "https://some-cdn.example.com" } } + let(:target_site_name) { target_site_type == multisite ? "second" : "default" } + let(:target_hostname) { target_site_type == multisite ? "test2.localhost" : "test.localhost" } + + shared_context "no uploads" do + it "does nothing when temporary uploads directory is missing or empty" do + store_class.any_instance.expects(:copy_from).never + + Dir.mktmpdir do |directory| + subject.restore(directory) + + FileUtils.mkdir(File.join(directory, "uploads")) + subject.restore(directory) + end + end + end + + shared_examples "without metadata" do + it "correctly remaps uploads" do + expect_no_remap(target_site_name: "default") + end + + it "correctly remaps when site name is different" do + expect_remap( + source_site_name: "foo", + target_site_name: "default", + from: "/uploads/foo/", + to: uploads_path("default") + ) + end + end + + shared_context "restores uploads" do + before do + Upload.where("id > 0").destroy_all + Fabricate(:optimized_image) + + upload = Fabricate(:upload_s3) + post = Fabricate(:post, raw: "![#{upload.original_filename}](#{upload.short_url})") + post.link_post_uploads + + FileHelper.stubs(:download).returns(file_from_fixtures("logo.png")) + FileStore::S3Store.any_instance.stubs(:store_upload).returns do + File.join( + "//s3-upload-bucket.s3.dualstack.us-east-1.amazonaws.com", + target_site_type == multisite ? "/uploads/#{target_site_name}" : "", + "original/1X/bc975735dfc6409c1c2aa5ebf2239949bcbdbd65.png" + ) + end + UserAvatar.import_url_for_user("logo.png", Fabricate(:user)) + end + + it "successfully restores uploads" do + SiteIconManager.expects(:ensure_optimized!).once + + with_temp_uploads_directory do |directory, path| + store_class.any_instance.expects(:copy_from).with(path).once + + expect { subject.restore(directory) } + .to change { OptimizedImage.count }.by_at_most(-1) + .and change { Jobs::CreateAvatarThumbnails.jobs.size }.by(1) + .and change { Post.where(baked_version: nil).count }.by(1) + end + end + + it "doesn't generate optimized images when backup contains optimized images" do + SiteIconManager.expects(:ensure_optimized!).never + + with_temp_uploads_directory(with_optimized: true) do |directory, path| + store_class.any_instance.expects(:copy_from).with(path).once + + expect { subject.restore(directory) } + .to change { OptimizedImage.count }.by(0) + .and change { Jobs::CreateAvatarThumbnails.jobs.size }.by(0) + .and change { Post.where(baked_version: nil).count }.by(1) + end + end + end + + shared_examples "common remaps" do + it "remaps when `base_url` changes" do + Discourse.expects(:base_url).returns("http://localhost").at_least_once + + expect_remap( + target_site_name: target_site_name, + metadata: [source_site_type, base_url], + from: "https://www.example.com/forum", + to: "http://localhost" + ) + end + + it "doesn't remap when `cdn_url` in `backup_metadata` is empty" do + expect_no_remap( + target_site_name: target_site_name, + metadata: [source_site_type, no_cdn_url] + ) + end + + it "remaps to new `cdn_url` when `cdn_url` changes to a different value" do + Discourse.expects(:asset_host).returns("https://new-cdn.example.com").at_least_once + + expect_remaps( + target_site_name: target_site_name, + metadata: [source_site_type, cdn_url], + remaps: [ + { from: "https://some-cdn.example.com/", to: "https://new-cdn.example.com/" }, + { from: "some-cdn.example.com", to: "new-cdn.example.com" } + ] + ) + end + + it "remaps to `base_url` when `cdn_url` changes to an empty value" do + Discourse.expects(:base_url).returns("http://example.com/discourse").at_least_once + Discourse.expects(:asset_host).returns(nil).at_least_once + + expect_remaps( + target_site_name: target_site_name, + metadata: [source_site_type, cdn_url], + remaps: [ + { from: "https://some-cdn.example.com/", to: "//example.com/discourse/" }, + { from: "some-cdn.example.com", to: "example.com" } + ] + ) + end + end + + shared_examples "remaps from local storage" do + it "doesn't remap when `s3_base_url` in `backup_metadata` is empty" do + expect_no_remap( + target_site_name: target_site_name, + metadata: [source_site_type, s3_base_url] + ) + end + + it "doesn't remap when `s3_cdn_url` in `backup_metadata` is empty" do + expect_no_remap( + target_site_name: target_site_name, + metadata: [source_site_type, s3_cdn_url] + ) + end + end + + context "currently stored locally" do + before do + SiteSetting.enable_s3_uploads = false + end + + let!(:store_class) { FileStore::LocalStore } + + include_context "no uploads" + include_context "restores uploads" + + context "remaps" do + include_examples "without metadata" + + context "uploads previously stored locally" do + let!(:s3_base_url) { { name: "s3_base_url", value: nil } } + let!(:s3_cdn_url) { { name: "s3_cdn_url", value: nil } } + + context "from regular site" do + let!(:source_site_type) { no_multisite } + + context "to regular site" do + let!(:target_site_type) { no_multisite } + + include_examples "common remaps" + include_examples "remaps from local storage" + end + + context "to multisite", type: :multisite do + let!(:target_site_type) { multisite } + + include_examples "common remaps" + include_examples "remaps from local storage" + end + end + + context "from multisite" do + let!(:source_site_type) { multisite } + + context "to regular site" do + let!(:target_site_type) { no_multisite } + + include_examples "common remaps" + include_examples "remaps from local storage" + end + + context "to multisite", type: :multisite do + let!(:target_site_type) { multisite } + + include_examples "common remaps" + include_examples "remaps from local storage" + end + end + end + + context "uploads previously stored on S3" do + let!(:s3_base_url) { { name: "s3_base_url", value: "//old-bucket.s3-us-east-1.amazonaws.com" } } + let!(:s3_cdn_url) { { name: "s3_cdn_url", value: "https://s3-cdn.example.com" } } + + shared_examples "regular site remaps from S3" do + it "remaps when `s3_base_url` changes" do + expect_remap( + target_site_name: target_site_name, + metadata: [no_multisite, s3_base_url], + from: "//old-bucket.s3-us-east-1.amazonaws.com/", + to: uploads_path(target_site_name) + ) + end + + it "remaps when `s3_cdn_url` changes" do + expect_remaps( + target_site_name: target_site_name, + metadata: [no_multisite, s3_cdn_url], + remaps: [ + { from: "https://s3-cdn.example.com/", to: "//#{target_hostname}#{uploads_path(target_site_name)}" }, + { from: "s3-cdn.example.com", to: target_hostname } + ] + ) + end + end + + shared_examples "multisite remaps from S3" do + it "remaps when `s3_base_url` changes" do + expect_remap( + target_site_name: target_site_name, + metadata: [source_db_name, multisite, s3_base_url], + from: "//old-bucket.s3-us-east-1.amazonaws.com/", + to: "/" + ) + end + + it "remaps when `s3_cdn_url` changes" do + expect_remaps( + target_site_name: target_site_name, + metadata: [source_db_name, multisite, s3_cdn_url], + remaps: [ + { from: "https://s3-cdn.example.com/", to: "//#{target_hostname}/" }, + { from: "s3-cdn.example.com", to: target_hostname } + ] + ) + end + end + + context "from regular site" do + let!(:source_site_type) { no_multisite } + + context "to regular site" do + let!(:target_site_type) { no_multisite } + + include_examples "common remaps" + include_examples "regular site remaps from S3" + end + + context "to multisite", type: :multisite do + let!(:target_site_type) { multisite } + + include_examples "common remaps" + include_examples "regular site remaps from S3" + end + end + + context "from multisite" do + let!(:source_site_type) { multisite } + + context "to regular site" do + let!(:target_site_type) { no_multisite } + + include_examples "common remaps" + include_examples "multisite remaps from S3" + end + + context "to multisite", type: :multisite do + let!(:target_site_type) { no_multisite } + + include_examples "common remaps" + include_examples "multisite remaps from S3" + end + end + end + end + end + + context "currently stored on S3" do + before do + SiteSetting.s3_upload_bucket = "s3-upload-bucket" + SiteSetting.s3_access_key_id = "s3-access-key-id" + SiteSetting.s3_secret_access_key = "s3-secret-access-key" + SiteSetting.enable_s3_uploads = true + end + + let!(:store_class) { FileStore::S3Store } + + include_context "no uploads" + include_context "restores uploads" + + context "remaps" do + include_examples "without metadata" + + context "uploads previously stored locally" do + let!(:s3_base_url) { { name: "s3_base_url", value: nil } } + let!(:s3_cdn_url) { { name: "s3_cdn_url", value: nil } } + + context "from regular site" do + let!(:source_site_type) { no_multisite } + + context "to regular site" do + let!(:target_site_type) { no_multisite } + + include_examples "common remaps" + include_examples "remaps from local storage" + end + + context "to multisite", type: :multisite do + let!(:target_site_type) { no_multisite } + + include_examples "common remaps" + include_examples "remaps from local storage" + end + end + + context "from multisite" do + let!(:source_site_type) { multisite } + + context "to regular site" do + let!(:target_site_type) { no_multisite } + + include_examples "common remaps" + include_examples "remaps from local storage" + end + + context "to multisite", type: :multisite do + let!(:target_site_type) { multisite } + + include_examples "common remaps" + include_examples "remaps from local storage" + end + end + end + + context "uploads previously stored on S3" do + let!(:s3_base_url) { { name: "s3_base_url", value: "//old-bucket.s3-us-east-1.amazonaws.com" } } + let!(:s3_cdn_url) { { name: "s3_cdn_url", value: "https://s3-cdn.example.com" } } + + shared_examples "regular site remaps from S3" do + it "remaps when `s3_base_url` changes" do + expect_remap( + target_site_name: target_site_name, + metadata: [no_multisite, s3_base_url], + from: "//old-bucket.s3-us-east-1.amazonaws.com/", + to: uploads_path(target_site_name) + ) + end + + it "remaps when `s3_cdn_url` changes" do + SiteSetting::Upload.expects(:s3_cdn_url).returns("https://new-s3-cdn.example.com").at_least_once + + expect_remaps( + target_site_name: target_site_name, + metadata: [no_multisite, s3_cdn_url], + remaps: [ + { from: "https://s3-cdn.example.com/", to: "https://new-s3-cdn.example.com#{uploads_path(target_site_name)}" }, + { from: "s3-cdn.example.com", to: "new-s3-cdn.example.com" } + ] + ) + end + end + + shared_examples "multisite remaps from S3" do + it "remaps when `s3_base_url` changes" do + expect_remap( + target_site_name: target_site_name, + metadata: [source_db_name, multisite, s3_base_url], + from: "//old-bucket.s3-us-east-1.amazonaws.com/", + to: "/" + ) + end + + context "when `s3_cdn_url` is configured" do + it "remaps when `s3_cdn_url` changes" do + SiteSetting::Upload.expects(:s3_cdn_url).returns("http://new-s3-cdn.example.com").at_least_once + + expect_remaps( + target_site_name: target_site_name, + metadata: [source_db_name, multisite, s3_cdn_url], + remaps: [ + { from: "https://s3-cdn.example.com/", to: "//new-s3-cdn.example.com/" }, + { from: "s3-cdn.example.com", to: "new-s3-cdn.example.com" } + ] + ) + end + end + + context "when `s3_cdn_url` is not configured" do + it "remaps to `base_url` when `s3_cdn_url` changes" do + SiteSetting::Upload.expects(:s3_cdn_url).returns(nil).at_least_once + + expect_remaps( + target_site_name: target_site_name, + metadata: [source_db_name, multisite, s3_cdn_url], + remaps: [ + { from: "https://s3-cdn.example.com/", to: "//#{target_hostname}/" }, + { from: "s3-cdn.example.com", to: target_hostname } + ] + ) + end + end + end + + context "from regular site" do + let!(:source_site_type) { no_multisite } + + context "to regular site" do + let!(:target_site_name) { "default" } + let!(:target_hostname) { "test.localhost" } + + include_examples "common remaps" + include_examples "regular site remaps from S3" + end + + context "to multisite", type: :multisite do + let!(:target_site_name) { "second" } + let!(:target_hostname) { "test2.localhost" } + + include_examples "common remaps" + include_examples "regular site remaps from S3" + end + end + + context "from multisite" do + let!(:source_site_type) { multisite } + + context "to regular site" do + let!(:target_site_type) { no_multisite } + + include_examples "common remaps" + include_examples "multisite remaps from S3" + end + + context "to multisite", type: :multisite do + let!(:target_site_type) { multisite } + + include_examples "common remaps" + include_examples "multisite remaps from S3" + end + end + end + end + end + end + + it "raises an exception when the store doesn't support the copy_from method" do + Discourse.stubs(:store).returns(Object.new) + + with_temp_uploads_directory do |directory| + expect { subject.restore(directory) }.to raise_error(BackupRestore::UploadsRestoreError) + end + end + + it "raises an exception when there are multiple folders in the uploads directory" do + with_temp_uploads_directory do |directory| + FileUtils.mkdir_p(File.join(directory, "uploads", "foo")) + expect { subject.restore(directory) }.to raise_error(BackupRestore::UploadsRestoreError) + end + end + + it "ignores 'PaxHeaders' and hidden directories within the uploads directory" do + expect_remap( + source_site_name: "xylan", + target_site_name: "default", + from: "/uploads/xylan/", + to: uploads_path("default") + ) do |directory| + FileUtils.mkdir_p(File.join(directory, "uploads", "PaxHeaders.27134")) + FileUtils.mkdir_p(File.join(directory, "uploads", ".hidden")) + end + end +end