mirror of
https://github.com/discourse/discourse.git
synced 2024-11-22 13:41:31 +08:00
Improvements to importing a mailing list
This commit is contained in:
parent
0ea54e9255
commit
3b35972d25
|
@ -37,6 +37,7 @@ class ImportScripts::Mbox < ImportScripts::Base
|
|||
topics = []
|
||||
|
||||
topic_lookup = {}
|
||||
topic_titles = {}
|
||||
replies = []
|
||||
|
||||
all_messages do |mail, filename|
|
||||
|
@ -44,21 +45,55 @@ class ImportScripts::Mbox < ImportScripts::Base
|
|||
|
||||
msg_id = mail['Message-ID'].to_s
|
||||
reply_to = mail['In-Reply-To'].to_s
|
||||
title = clean_title(mail['Subject'].to_s)
|
||||
|
||||
if reply_to.present?
|
||||
topic = topic_lookup[reply_to] || reply_to
|
||||
topic_lookup[msg_id] = topic
|
||||
replies << {id: msg_id, topic: topic, file: filename}
|
||||
replies << {id: msg_id, topic: topic, file: filename, title: title}
|
||||
else
|
||||
topics << {id: msg_id, file: filename}
|
||||
topics << {id: msg_id, file: filename, title: title}
|
||||
topic_titles[title] ||= msg_id
|
||||
end
|
||||
end
|
||||
|
||||
# Replies without parents should be hoisted to topics
|
||||
to_hoist = []
|
||||
replies.each do |r|
|
||||
to_hoist << r if !topic_lookup[r[:topic]]
|
||||
end
|
||||
|
||||
to_hoist.each do |h|
|
||||
replies.delete(h)
|
||||
topics << {id: h[:id], file: h[:file], title: h[:title]}
|
||||
topic_titles[h[:title]] ||= h[:id]
|
||||
end
|
||||
|
||||
# Topics with duplicate replies should be replies
|
||||
to_group = []
|
||||
topics.each do |t|
|
||||
first = topic_titles[t[:title]]
|
||||
to_group << t if first && first != t[:id]
|
||||
end
|
||||
|
||||
to_group.each do |t|
|
||||
topics.delete(t)
|
||||
replies << {id: t[:id], topic: topic_titles[t[:title]], file: t[:file], title: t[:title]}
|
||||
end
|
||||
|
||||
File.write(USER_INDEX_PATH, {users: users}.to_json)
|
||||
File.write(TOPIC_INDEX_PATH, {topics: topics}.to_json)
|
||||
File.write(REPLY_INDEX_PATH, {replies: replies}.to_json)
|
||||
end
|
||||
|
||||
def clean_title(title)
|
||||
title.gsub(/^Re: */i, '')
|
||||
end
|
||||
|
||||
def clean_raw(raw)
|
||||
raw.gsub(/-- \nYou received this message because you are subscribed to the Google Groups "[^"]*" group.\nTo unsubscribe from this group and stop receiving emails from it, send an email to [^+@]+\+unsubscribe@googlegroups.com\.\nFor more options, visit https:\/\/groups\.google\.com\/groups\/opt_out\./, '')
|
||||
end
|
||||
|
||||
def import_users
|
||||
puts "", "importing users"
|
||||
|
||||
|
@ -100,7 +135,7 @@ class ImportScripts::Mbox < ImportScripts::Base
|
|||
topics = all_topics[offset..offset+BATCH_SIZE-1]
|
||||
break if topics.nil?
|
||||
|
||||
next if all_records_exist? :posts, topics.map {|t| t['id'].to_i}
|
||||
next if all_records_exist? :posts, topics.map {|t| t['id']}
|
||||
|
||||
create_posts(topics, total: topic_count, offset: offset) do |t|
|
||||
raw_email = File.read(t['file'])
|
||||
|
@ -116,11 +151,11 @@ class ImportScripts::Mbox < ImportScripts::Base
|
|||
title = mail.subject.gsub(/\[[^\]]+\]+/, '').strip
|
||||
|
||||
{ id: t['id'],
|
||||
title: title,
|
||||
title: clean_title(title),
|
||||
user_id: user_id_from_imported_user_id(mail.from.first) || Discourse::SYSTEM_USER_ID,
|
||||
created_at: mail.date,
|
||||
category: CATEGORY_ID,
|
||||
raw: raw,
|
||||
raw: clean_raw(raw),
|
||||
cook_method: Post.cook_methods[:email] }
|
||||
end
|
||||
end
|
||||
|
@ -129,9 +164,6 @@ class ImportScripts::Mbox < ImportScripts::Base
|
|||
def import_replies
|
||||
puts "", "creating topic replies"
|
||||
|
||||
all_topics = ::JSON.parse(File.read(TOPIC_INDEX_PATH))['topics']
|
||||
topic_count = all_topics.size
|
||||
|
||||
replies = ::JSON.parse(File.read(REPLY_INDEX_PATH))['replies']
|
||||
post_count = replies.size
|
||||
|
||||
|
@ -139,7 +171,7 @@ class ImportScripts::Mbox < ImportScripts::Base
|
|||
posts = replies[offset..offset+BATCH_SIZE-1]
|
||||
break if posts.nil?
|
||||
|
||||
next if all_records_exist? :posts, posts.map {|p| p['id'].to_i}
|
||||
next if all_records_exist? :posts, posts.map {|p| p['id']}
|
||||
|
||||
create_posts(posts, total: post_count, offset: offset) do |p|
|
||||
parent_id = p['topic']
|
||||
|
@ -161,7 +193,7 @@ class ImportScripts::Mbox < ImportScripts::Base
|
|||
topic_id: topic_id,
|
||||
user_id: user_id_from_imported_user_id(mail.from.first) || Discourse::SYSTEM_USER_ID,
|
||||
created_at: mail.date,
|
||||
raw: raw,
|
||||
raw: clean_raw(raw),
|
||||
cook_method: Post.cook_methods[:email] }
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue
Block a user