mirror of
https://github.com/discourse/discourse.git
synced 2024-11-22 07:28:00 +08:00
DEV: mbox importer improvements (#16557)
* FIX: support specifying parent_category_id in mbox import metadata * FIX: elide tabs from topic titles * FIX: optionally fix Mailman from: addresses * DEV: optionally elide anything up to the last = in email addresses * Fix Mailmain broken from: detection
This commit is contained in:
parent
0f772bdf5b
commit
3e5faffb0d
|
@ -458,7 +458,16 @@ class ImportScripts::Base
|
|||
.where("LOWER(name) = ?", opts[:name].downcase.strip)
|
||||
.first
|
||||
|
||||
return existing if existing
|
||||
if existing
|
||||
if import_id && existing.custom_fields["import_id"] != import_id
|
||||
existing.custom_fields["import_id"] = import_id
|
||||
existing.save!
|
||||
|
||||
add_category(import_id, existing)
|
||||
end
|
||||
|
||||
return existing
|
||||
end
|
||||
|
||||
post_create_action = opts.delete(:post_create_action)
|
||||
|
||||
|
|
|
@ -50,7 +50,8 @@ module ImportScripts::Mbox
|
|||
create_categories(rows) do |row|
|
||||
{
|
||||
id: row['name'],
|
||||
name: row['name']
|
||||
name: row['name'],
|
||||
parent_category_id: row['parent_category_id'].presence,
|
||||
}
|
||||
end
|
||||
end
|
||||
|
|
|
@ -48,4 +48,18 @@ tags:
|
|||
# https://en.wikipedia.org/wiki/List_of_email_subject_abbreviations#Abbreviations_in_other_languages).
|
||||
remove_subject_prefixes:
|
||||
- "Re: "
|
||||
- "Fwd?: "
|
||||
- "Fwd?: "
|
||||
|
||||
# For emails processed by mailman with this type of bogus header:
|
||||
# From: Jane Smith via ListName <ListName@lists.example.com>
|
||||
# attempt to get the actual sender's address from other available headers like Reply-To or the /^From / line
|
||||
fix_mailman_via_addresses: false
|
||||
|
||||
# Elide up to last occurence of = in addresses (BATV, etc)
|
||||
# ie. normalize these:
|
||||
# prvs=00377ab9a2=someone@somewhere.com
|
||||
# bounces+840901-dbd9-flang-dev=someone@somewhere.com
|
||||
# blah=blih=bluh=someone@somewhere.com
|
||||
# into:
|
||||
# someone@somewhere.com
|
||||
elide_equals_in_addresses: false
|
||||
|
|
|
@ -30,8 +30,8 @@ module ImportScripts::Mbox
|
|||
|
||||
def insert_category(category)
|
||||
@db.execute(<<-SQL, category)
|
||||
INSERT OR REPLACE INTO category (name, description)
|
||||
VALUES (:name, :description)
|
||||
INSERT OR REPLACE INTO category (name, description, parent_category_id)
|
||||
VALUES (:name, :description, :parent_category_id)
|
||||
SQL
|
||||
end
|
||||
|
||||
|
@ -150,7 +150,7 @@ module ImportScripts::Mbox
|
|||
|
||||
def fetch_categories
|
||||
@db.execute <<-SQL
|
||||
SELECT name, description
|
||||
SELECT name, description, parent_category_id
|
||||
FROM category
|
||||
ORDER BY name
|
||||
SQL
|
||||
|
@ -218,7 +218,8 @@ module ImportScripts::Mbox
|
|||
@db.execute <<-SQL
|
||||
CREATE TABLE IF NOT EXISTS category (
|
||||
name TEXT NOT NULL PRIMARY KEY,
|
||||
description TEXT
|
||||
description TEXT,
|
||||
parent_category_id INTEGER
|
||||
)
|
||||
SQL
|
||||
end
|
||||
|
|
|
@ -55,7 +55,8 @@ module ImportScripts::Mbox
|
|||
|
||||
category = {
|
||||
name: metadata['name'].presence || File.basename(directory),
|
||||
description: metadata['description']
|
||||
description: metadata['description'],
|
||||
parent_category_id: metadata['parent_category_id'].presence,
|
||||
}
|
||||
|
||||
@database.insert_category(category)
|
||||
|
@ -67,7 +68,28 @@ module ImportScripts::Mbox
|
|||
begin
|
||||
msg_id = receiver.message_id
|
||||
parsed_email = receiver.mail
|
||||
|
||||
from_email, from_display_name = receiver.parse_from_field(parsed_email)
|
||||
|
||||
if @settings.fix_mailman_via_addresses
|
||||
# Detect cases like this and attempt to get actual sender from other headers:
|
||||
# From: Jane Smith via ListName <ListName@lists.example.com>
|
||||
|
||||
if receiver.mail['X-Mailman-Version'] && from_display_name =~ /\bvia \S+$/i
|
||||
email_from_from_line = opts[:from_line].scan(/From (\S+)/).flatten.first
|
||||
a = Mail::Address.new(email_from_from_line)
|
||||
from_email = a.address
|
||||
from_display_name = a.display_name
|
||||
# if name is not available there, look for it in Reply-To
|
||||
if from_display_name.nil?
|
||||
reply_to = receiver.mail.to_s.scan(/[\n\r]Reply-To: ([^\r\n]+)/).flatten.first
|
||||
from_display_name = Mail::Address.new(reply_to).display_name
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
from_email = from_email.sub(/^(.*)=/, '') if @settings.elide_equals_in_addresses
|
||||
|
||||
body, elided, format = receiver.select_body
|
||||
reply_message_ids = extract_reply_message_ids(parsed_email)
|
||||
|
||||
|
@ -125,11 +147,12 @@ module ImportScripts::Mbox
|
|||
puts "indexing #{filename}"
|
||||
|
||||
if @split_regex.present?
|
||||
each_mail(filename) do |raw_message, first_line_number, last_line_number|
|
||||
each_mail(filename) do |raw_message, first_line_number, last_line_number, from_line|
|
||||
opts = {
|
||||
first_line_number: first_line_number,
|
||||
last_line_number: last_line_number,
|
||||
start_time: monotonic_time
|
||||
start_time: monotonic_time,
|
||||
from_line: from_line,
|
||||
}
|
||||
receiver = read_mail_from_string(raw_message)
|
||||
yield receiver, filename, opts if receiver.present?
|
||||
|
@ -159,13 +182,17 @@ module ImportScripts::Mbox
|
|||
first_line_number = 1
|
||||
last_line_number = 0
|
||||
|
||||
from_line = nil
|
||||
|
||||
each_line(filename) do |line|
|
||||
if line.scrub =~ @split_regex
|
||||
if last_line_number > 0
|
||||
yield raw_message, first_line_number, last_line_number
|
||||
yield raw_message, first_line_number, last_line_number, from_line
|
||||
raw_message = +''
|
||||
first_line_number = last_line_number + 1
|
||||
end
|
||||
|
||||
from_line = line
|
||||
else
|
||||
raw_message << line
|
||||
end
|
||||
|
@ -173,7 +200,7 @@ module ImportScripts::Mbox
|
|||
last_line_number += 1
|
||||
end
|
||||
|
||||
yield raw_message, first_line_number, last_line_number if raw_message.present?
|
||||
yield raw_message, first_line_number, last_line_number, from_line if raw_message.present?
|
||||
end
|
||||
|
||||
def each_line(filename)
|
||||
|
@ -202,7 +229,7 @@ module ImportScripts::Mbox
|
|||
|
||||
def extract_subject(receiver, list_name)
|
||||
subject = receiver.subject
|
||||
subject.blank? ? nil : subject.strip
|
||||
subject.blank? ? nil : subject.strip.gsub(/\t+/, ' ')
|
||||
end
|
||||
|
||||
def ignored_file?(path, checksums)
|
||||
|
|
|
@ -21,6 +21,8 @@ module ImportScripts::Mbox
|
|||
attr_reader :automatically_remove_list_name_prefix
|
||||
attr_reader :show_trimmed_content
|
||||
attr_reader :tags
|
||||
attr_reader :fix_mailman_via_addresses
|
||||
attr_reader :elide_equals_in_addresses
|
||||
|
||||
def initialize(yaml)
|
||||
@data_dir = yaml['data_dir']
|
||||
|
@ -39,6 +41,8 @@ module ImportScripts::Mbox
|
|||
|
||||
@automatically_remove_list_name_prefix = yaml['automatically_remove_list_name_prefix']
|
||||
@show_trimmed_content = yaml['show_trimmed_content']
|
||||
@fix_mailman_via_addresses = yaml['fix_mailman_via_addresses']
|
||||
@elide_equals_in_addresses = yaml['elide_equals_in_addresses']
|
||||
|
||||
@tags = []
|
||||
if yaml['tags'].present?
|
||||
|
|
Loading…
Reference in New Issue
Block a user