From c41880ab193e85c727e19a23de05f21b4c8ebdd3 Mon Sep 17 00:00:00 2001 From: Gerhard Schlager Date: Thu, 19 Oct 2017 14:27:40 +0200 Subject: [PATCH] Improvements to the experimental mbox importer * Disable journaling to improve performance in Docker * Use the email cooking method * Store IncomingEmail in order find related posts by Message-ID * Escape HTML in imported messages --- script/import_scripts/mbox/importer.rb | 21 ++++++++++++++++--- .../import_scripts/mbox/support/database.rb | 2 +- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/script/import_scripts/mbox/importer.rb b/script/import_scripts/mbox/importer.rb index 762c162e823..71328df3a71 100644 --- a/script/import_scripts/mbox/importer.rb +++ b/script/import_scripts/mbox/importer.rb @@ -97,7 +97,7 @@ module ImportScripts::Mbox def map_post(row) user_id = user_id_from_imported_user_id(row['from_email']) || Discourse::SYSTEM_USER_ID - body = row['body'] || '' + body = CGI.escapeHTML(row['body'] || '') body << map_attachments(row['raw_message'], user_id) if row['attachment_count'].positive? body << Email::Receiver.elided_html(row['elided']) if row['elided'].present? @@ -108,14 +108,17 @@ module ImportScripts::Mbox raw: body, raw_email: row['raw_message'], via_email: true, - # cook_method: Post.cook_methods[:email] # this is slowing down the import by factor 4 + cook_method: Post.cook_methods[:email], + post_create_action: proc do |post| + create_incoming_email(post, row) + end } end def map_first_post(row) mapped = map_post(row) mapped[:category] = category_id_from_imported_category_id(row['category']) - mapped[:title] = row['subject'].strip[0...255] + mapped[:title] = CGI.escapeHTML(row['subject'].strip)[0...255] mapped end @@ -154,6 +157,18 @@ module ImportScripts::Mbox attachment_markdown end + def create_incoming_email(post, row) + IncomingEmail.create( + message_id: row['msg_id'], + raw: row['raw_message'], + subject: row['subject'], + from_address: row['from_email'], + user_id: post.user_id, + topic_id: post.topic_id, + post_id: post.id + ) + end + def to_time(datetime) Time.zone.at(DateTime.iso8601(datetime)) if datetime end diff --git a/script/import_scripts/mbox/support/database.rb b/script/import_scripts/mbox/support/database.rb index 396d23e5b23..a3cb723046c 100644 --- a/script/import_scripts/mbox/support/database.rb +++ b/script/import_scripts/mbox/support/database.rb @@ -163,7 +163,7 @@ module ImportScripts::Mbox private def configure_database - @db.execute 'PRAGMA journal_mode = TRUNCATE' + @db.execute 'PRAGMA journal_mode = OFF' end def upgrade_schema_version