handle plaintext and HTML emails in mbox importer

This commit is contained in:
Gerhard Schlager 2017-11-15 17:48:36 +01:00
parent 9207dee69a
commit 06a6ddc3ba
3 changed files with 30 additions and 8 deletions

View File

@ -97,15 +97,13 @@ module ImportScripts::Mbox
def map_post(row)
user_id = user_id_from_imported_user_id(row['from_email']) || Discourse::SYSTEM_USER_ID
body = CGI.escapeHTML(row['body'] || '')
body << map_attachments(row['raw_message'], user_id) if row['attachment_count'].positive?
body << Email::Receiver.elided_html(row['elided']) if row['elided'].present?
attachment_html = map_attachments(row['raw_message'], user_id) if row['attachment_count'].positive?
{
id: row['msg_id'],
user_id: user_id,
created_at: to_time(row['email_date']),
raw: body,
raw: format_raw(row['body'], attachment_html, row['elided'], row['format']),
raw_email: row['raw_message'],
via_email: true,
cook_method: Post.cook_methods[:email],
@ -115,6 +113,28 @@ module ImportScripts::Mbox
}
end
def format_raw(email_body, attachment_html, elided, format)
email_body ||= ''
case format
when Email::Receiver::formats[:markdown]
body = email_body
body << attachment_html if attachment_html.present?
body << elided if elided.present?
when Email::Receiver::formats[:plaintext]
body = %|[plaintext]\n#{escape_tags(email_body)}\n[/plaintext]|
body << %|\n[attachments]\n#{escape_tags(attachment_html)}\n[/attachments]| if attachment_html.present?
body << %|\n[elided]\n#{escape_tags(elided)}\n[/elided]| if elided.present?
end
body
end
def escape_tags(text)
text.gsub!(/^(\[\/?(?:plaintext|attachments|elided)\])$/, ' \1')
text
end
def map_first_post(row)
mapped = map_post(row)
mapped[:category] = category_id_from_imported_category_id(row['category'])

View File

@ -34,10 +34,10 @@ module ImportScripts::Mbox
def insert_email(email)
@db.execute(<<-SQL, email)
INSERT OR REPLACE INTO email (msg_id, from_email, from_name, subject,
email_date, raw_message, body, elided, attachment_count, charset,
email_date, raw_message, body, elided, format, attachment_count, charset,
category, filename, first_line_number, last_line_number)
VALUES (:msg_id, :from_email, :from_name, :subject,
:email_date, :raw_message, :body, :elided, :attachment_count, :charset,
:email_date, :raw_message, :body, :elided, :format, :attachment_count, :charset,
:category, :filename, :first_line_number, :last_line_number)
SQL
end
@ -148,7 +148,7 @@ module ImportScripts::Mbox
def fetch_messages(last_row_id)
rows = @db.execute(<<-SQL, last_row_id)
SELECT o.ROWID, e.msg_id, from_email, subject, email_date, in_reply_to,
raw_message, body, elided, attachment_count, category
raw_message, body, elided, format, attachment_count, category
FROM email e
JOIN email_order o USING (msg_id)
WHERE email_date IS NOT NULL AND
@ -204,6 +204,7 @@ module ImportScripts::Mbox
raw_message TEXT,
body TEXT,
elided TEXT,
format INTEGER,
attachment_count INTEGER NOT NULL DEFAULT 0,
charset TEXT,
category TEXT NOT NULL,

View File

@ -58,7 +58,7 @@ module ImportScripts::Mbox
msg_id = receiver.message_id
parsed_email = receiver.mail
from_email, from_display_name = receiver.parse_from_field(parsed_email)
body, elided = receiver.select_body
body, elided, format = receiver.select_body
reply_message_ids = extract_reply_message_ids(parsed_email)
email = {
@ -70,6 +70,7 @@ module ImportScripts::Mbox
raw_message: receiver.raw_email,
body: body,
elided: elided,
format: format,
attachment_count: receiver.attachments.count,
charset: parsed_email.charset&.downcase,
category: category_name,