From df3ac5e2e9b938f7fb62f86b9fb735dae73cac9e Mon Sep 17 00:00:00 2001 From: Jay Pfaffman Date: Wed, 12 Oct 2016 13:18:34 -0700 Subject: [PATCH] MBOX: replace email address with @username --- script/import_scripts/mbox.rb | 58 +++++++++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 3 deletions(-) diff --git a/script/import_scripts/mbox.rb b/script/import_scripts/mbox.rb index 47c5c5613d3..9f254bfd0df 100755 --- a/script/import_scripts/mbox.rb +++ b/script/import_scripts/mbox.rb @@ -53,6 +53,7 @@ class ImportScripts::Mbox < ImportScripts::Base import_users create_forum_topics import_replies + # replace_email_addresses # uncomment to replace all email address with @username end def import_categories @@ -344,6 +345,42 @@ p end db.close end + def replace_email_addresses + puts "", "replacing email addresses with @usernames" + post = Post.new + + total_count = User.real.count + progress_count = 0 + start_time = Time.now + + # from: https://meta.discourse.org/t/replace-a-string-in-all-posts/48729/17 + # and https://github.com/discourse/discourse/blob/master/lib/tasks/posts.rake#L114-L136 + User.find_each do |u| + i = 0 + find = u.email.dup + replace = "@#{u.username}" + if !replace.include? "@" + puts "Skipping #{replace}" + end + + found = Post.where("raw ILIKE ?", "%#{find}%") + next if found.nil? + next if found.count < 1 + + found.each do |p| + new_raw = p.raw.dup + new_raw = new_raw.gsub!(/#{Regexp.escape(find)}/i, replace) || new_raw + if new_raw != p.raw + p.revise(Discourse.system_user, { raw: new_raw }, { bypass_bump: true }) + print_warning "\nReplaced #{find} with #{replace} in topic #{p.topic_id}" + end + end + progress_count += 1 + puts "" + print_status(progress_count, total_count, start_time) + end + end + def parse_email(msg) receiver = Email::Receiver.new(msg) mail = Mail.read_from_string(msg) @@ -387,11 +424,12 @@ p end next unless selected selected = selected.join('') if selected.kind_of?(Array) - raw = selected.force_encoding(selected.encoding).encode("UTF-8") - title = mail.subject + username = User.find_by_email(from_email).username + # import the attachments + raw = "" mail.attachments.each do |attachment| tmp = Tempfile.new("discourse-email-attachment") begin @@ -407,9 +445,17 @@ p end end end + user_id = user_id_from_imported_user_id(from_email) || Discourse::SYSTEM_USER_ID + + raw = selected.force_encoding(selected.encoding).encode("UTF-8") + raw = clean_raw(raw) + raw = raw.dup.to_s + raw.gsub!(/#{from_email}/, "@#{username}") + cleaned_email = from_email.dup.sub(/@/,' at ') + raw.gsub!(/#{cleaned_email}/, "@#{username}") { id: t[0], title: clean_title(title), - user_id: user_id_from_imported_user_id(from_email) || Discourse::SYSTEM_USER_ID, + user_id: user_id, created_at: mail.date, category: t[6], raw: clean_raw(raw), @@ -467,7 +513,13 @@ p end next unless selected raw = selected.force_encoding(selected.encoding).encode("UTF-8") + username = User.find_by_email(from_email).username + user_id = user_id_from_imported_user_id(from_email) || Discourse::SYSTEM_USER_ID + raw = clean_raw(raw).to_s + raw.gsub!(/#{from_email}/, "@#{username}") + cleaned_email = from_email.dup.sub(/@/,' at ') + raw.gsub!(/#{cleaned_email}/, "@#{username}") # import the attachments mail.attachments.each do |attachment| tmp = Tempfile.new("discourse-email-attachment")