Improvements to the experimental mbox importer

* Disable journaling to improve performance in Docker
* Use the email cooking method
* Store IncomingEmail in order find related posts by Message-ID
* Escape HTML in imported messages
This commit is contained in:
Gerhard Schlager 2017-10-19 14:27:40 +02:00
parent 22ba70fb01
commit c41880ab19
2 changed files with 19 additions and 4 deletions

View File

@ -97,7 +97,7 @@ module ImportScripts::Mbox
def map_post(row)
user_id = user_id_from_imported_user_id(row['from_email']) || Discourse::SYSTEM_USER_ID
body = row['body'] || ''
body = CGI.escapeHTML(row['body'] || '')
body << map_attachments(row['raw_message'], user_id) if row['attachment_count'].positive?
body << Email::Receiver.elided_html(row['elided']) if row['elided'].present?
@ -108,14 +108,17 @@ module ImportScripts::Mbox
raw: body,
raw_email: row['raw_message'],
via_email: true,
# cook_method: Post.cook_methods[:email] # this is slowing down the import by factor 4
cook_method: Post.cook_methods[:email],
post_create_action: proc do |post|
create_incoming_email(post, row)
end
}
end
def map_first_post(row)
mapped = map_post(row)
mapped[:category] = category_id_from_imported_category_id(row['category'])
mapped[:title] = row['subject'].strip[0...255]
mapped[:title] = CGI.escapeHTML(row['subject'].strip)[0...255]
mapped
end
@ -154,6 +157,18 @@ module ImportScripts::Mbox
attachment_markdown
end
def create_incoming_email(post, row)
IncomingEmail.create(
message_id: row['msg_id'],
raw: row['raw_message'],
subject: row['subject'],
from_address: row['from_email'],
user_id: post.user_id,
topic_id: post.topic_id,
post_id: post.id
)
end
def to_time(datetime)
Time.zone.at(DateTime.iso8601(datetime)) if datetime
end

View File

@ -163,7 +163,7 @@ module ImportScripts::Mbox
private
def configure_database
@db.execute 'PRAGMA journal_mode = TRUNCATE'
@db.execute 'PRAGMA journal_mode = OFF'
end
def upgrade_schema_version