mirror of
https://github.com/discourse/discourse.git
synced 2024-11-22 14:38:17 +08:00
DEV: Improve mbox import script
* Better documentation of settings * Add option to exclude trimmed parts of emails (enabled by default) to not revail email addresses
This commit is contained in:
parent
36062f43c8
commit
0a88232e87
|
@ -8,11 +8,7 @@ end
|
|||
|
||||
module ImportScripts
|
||||
module Mbox
|
||||
require_relative 'mbox/support/settings'
|
||||
|
||||
@settings = Settings.load(ARGV[0])
|
||||
|
||||
require_relative 'mbox/importer'
|
||||
Importer.new(@settings).perform
|
||||
Importer.new(ARGV[0]).perform
|
||||
end
|
||||
end
|
||||
|
|
|
@ -7,9 +7,8 @@ require_relative 'support/settings'
|
|||
|
||||
module ImportScripts::Mbox
|
||||
class Importer < ImportScripts::Base
|
||||
# @param settings [ImportScripts::Mbox::Settings]
|
||||
def initialize(settings)
|
||||
@settings = settings
|
||||
def initialize(settings_filename)
|
||||
@settings = Settings.load(settings_filename)
|
||||
super()
|
||||
|
||||
@database = Database.new(@settings.data_dir, @settings.batch_size)
|
||||
|
@ -139,7 +138,10 @@ module ImportScripts::Mbox
|
|||
body = receiver.add_attachments(body, user)
|
||||
end
|
||||
|
||||
body = "#{body}#{Email::Receiver.elided_html(elided)}" if elided.present?
|
||||
if elided.present? && @settings.show_trimmed_content
|
||||
body = "#{body}#{Email::Receiver.elided_html(elided)}"
|
||||
end
|
||||
|
||||
body
|
||||
end
|
||||
|
||||
|
|
|
@ -1,22 +1,38 @@
|
|||
# Directory where all emails and mbox files are stored.
|
||||
data_dir: /shared/import/data
|
||||
|
||||
# mbox files
|
||||
# Regular expression for splitting emails in mbox files.
|
||||
# Choose one of the following examples that works for you or add your own regular expression.
|
||||
split_regex: "^From .+@.+"
|
||||
#split_regex: "^From .+@example.com.+"
|
||||
|
||||
# individual emails
|
||||
#split_regex: ""
|
||||
|
||||
# Listserv files
|
||||
#split_regex: "^========================================================================="
|
||||
|
||||
default_trust_level: 1
|
||||
# When each file contains only one email
|
||||
#split_regex: ""
|
||||
|
||||
# Prefer the HTML part of emails instead of the plain text part.
|
||||
prefer_html: true
|
||||
|
||||
# The trust level of users created by the import script.
|
||||
default_trust_level: 1
|
||||
|
||||
# Create staged users instead of regular users.
|
||||
staged: true
|
||||
|
||||
# You can enable this option if the script should stop after indexing all emails
|
||||
# instead of executing the import right away. That's useful if you need to make some changes
|
||||
# to the indexed data in the `index.db` created during the indexing step.
|
||||
index_only: false
|
||||
|
||||
# Only enable this option when the import script fails to group messages with
|
||||
# with the `In-Reply-To` and `References` headers.
|
||||
group_messages_by_subject: false
|
||||
|
||||
# Always show trimmed part of emails. WARNING: This might reveal email addresses.
|
||||
show_trimmed_content: false
|
||||
|
||||
# Remove prefixes like [FOO] or (BAR) from topic titles and replace them with tags.
|
||||
# You can map one or more case-insensitive prefixes to the same tag in Discourse.
|
||||
# "Tag name in Discourse": "foo"
|
||||
|
|
|
@ -19,6 +19,7 @@ module ImportScripts::Mbox
|
|||
attr_reader :group_messages_by_subject
|
||||
attr_reader :subject_prefix_regex
|
||||
attr_reader :automatically_remove_list_name_prefix
|
||||
attr_reader :show_trimmed_content
|
||||
attr_reader :tags
|
||||
|
||||
def initialize(yaml)
|
||||
|
@ -31,20 +32,23 @@ module ImportScripts::Mbox
|
|||
@index_only = yaml['index_only']
|
||||
@group_messages_by_subject = yaml['group_messages_by_subject']
|
||||
|
||||
unless yaml['remove_subject_prefixes'].empty?
|
||||
if yaml['remove_subject_prefixes'].present?
|
||||
prefix_regexes = yaml['remove_subject_prefixes'].map { |p| Regexp.new(p) }
|
||||
@subject_prefix_regex = /^#{Regexp.union(prefix_regexes).source}/i
|
||||
end
|
||||
|
||||
@automatically_remove_list_name_prefix = yaml['automatically_remove_list_name_prefix']
|
||||
@show_trimmed_content = yaml['show_trimmed_content']
|
||||
|
||||
@tags = []
|
||||
yaml['tags'].each do |tag_name, value|
|
||||
prefixes = Regexp.union(value).source
|
||||
@tags << {
|
||||
regex: /^(?:(?:\[(?:#{prefixes})\])|(?:\((?:#{prefixes})\)))\s*/i,
|
||||
name: tag_name
|
||||
}
|
||||
if yaml['tags'].present?
|
||||
@tags = []
|
||||
yaml['tags'].each do |tag_name, value|
|
||||
prefixes = Regexp.union(value).source
|
||||
@tags << {
|
||||
regex: /^(?:(?:\[(?:#{prefixes})\])|(?:\((?:#{prefixes})\)))\s*/i,
|
||||
name: tag_name
|
||||
}
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue
Block a user