# frozen_string_literal: true require_relative "../base" require_relative "support/database" require_relative "support/indexer" require_relative "support/settings" module ImportScripts::Mbox class Importer < ImportScripts::Base def initialize(settings_filename) @settings = Settings.load(settings_filename) super() @database = Database.new(@settings.data_dir, @settings.batch_size) end def get_site_settings_for_import settings = super settings[:enable_staged_users] = true settings[:incoming_email_prefer_html] = @settings.prefer_html settings end protected def execute index_messages if @settings.index_only @skip_updates = true else SiteSetting.tagging_enabled = true if @settings.tags.present? import_categories import_users import_posts end end def index_messages puts "", "creating index" indexer = Indexer.new(@database, @settings) indexer.execute end def import_categories puts "", "creating categories" rows = @database.fetch_categories create_categories(rows) do |row| { id: row["name"], name: row["name"], parent_category_id: row["parent_category_id"].presence, } end end def import_users puts "", "creating users" total_count = @database.count_users last_email = "" batches do |offset| rows, last_email = @database.fetch_users(last_email) break if rows.empty? next if all_records_exist?(:users, rows.map { |row| row["email"] }) create_users(rows, total: total_count, offset: offset) do |row| { id: row["email"], email: row["email"], name: row["name"], trust_level: @settings.trust_level, staged: @settings.staged, active: !@settings.staged, created_at: to_time(row["date_of_first_message"]), } end end end def batches super(@settings.batch_size) end def import_posts puts "", "creating topics and posts" total_count = @database.count_messages last_row_id = 0 batches do |offset| rows, last_row_id = @database.fetch_messages(last_row_id) break if rows.empty? next if all_records_exist?(:posts, rows.map { |row| row["msg_id"] }) create_posts(rows, total: total_count, offset: offset) do |row| begin if row["email_date"].blank? puts "Date is missing. Skipping #{row["msg_id"]}" nil elsif row["in_reply_to"].blank? map_first_post(row) else map_reply(row) end rescue => e puts "Failed to map post for #{row["msg_id"]}", e, e.backtrace.join("\n") end end end end def map_post(row) user_id = user_id_from_imported_user_id(row["from_email"]) || Discourse::SYSTEM_USER_ID { id: row["msg_id"], user_id: user_id, created_at: to_time(row["email_date"]), raw: format_raw(row, user_id), raw_email: row["raw_message"], via_email: true, post_create_action: proc { |post| create_incoming_email(post, row) }, } end def format_raw(row, user_id) body = row["body"] || "" elided = row["elided"] if row["attachment_count"].positive? receiver = Email::Receiver.new(row["raw_message"]) user = User.find(user_id) body = receiver.add_attachments(body, user) end if elided.present? && @settings.show_trimmed_content body = "#{body}#{Email::Receiver.elided_html(elided)}" end body end def map_first_post(row) subject = row["subject"].dup tags = remove_tags!(subject) mapped = map_post(row) mapped[:category] = category_id_from_imported_category_id(row["category"]) mapped[:title] = subject.strip[0...255] mapped[:tags] = tags if tags.present? mapped end def map_reply(row) parent = @lookup.topic_lookup_from_imported_post_id(row["in_reply_to"]) if parent.blank? puts "Parent message #{row["in_reply_to"]} doesn't exist. Skipping #{row["msg_id"]}: #{row["subject"][0..40]}" return nil end mapped = map_post(row) mapped[:topic_id] = parent[:topic_id] mapped end def remove_tags!(subject) tag_names = [] remove_prefixes!(subject) loop do old_length = subject.length @settings.tags.each do |tag| tag_names << tag[:name] if subject.sub!(tag[:regex], "") && tag[:name].present? end remove_prefixes!(subject) if subject.length != old_length break if subject.length == old_length end tag_names.uniq end def remove_prefixes!(subject) # There could be multiple prefixes... loop do if subject.sub!(@settings.subject_prefix_regex, "") subject.strip! else break end end end def create_incoming_email(post, row) IncomingEmail.create( message_id: row["msg_id"], raw: row["raw_message"], subject: row["subject"], from_address: row["from_email"], user_id: post.user_id, topic_id: post.topic_id, post_id: post.id, ) end def to_time(timestamp) Time.zone.at(timestamp) if timestamp end end end