From 7302f6b60bc22c7718c52f60188107945783a1ce Mon Sep 17 00:00:00 2001 From: Robin Ward Date: Fri, 5 Jun 2015 11:46:21 -0400 Subject: [PATCH] Simple "cook" for email imports from mailing lists --- Gemfile.lock | 2 +- app/models/post.rb | 27 +++++++++++++++---------- lib/email_cook.rb | 36 +++++++++++++++++++++++++++++++++ lib/post_creator.rb | 1 + script/import_scripts/nabble.rb | 13 ++++++++---- 5 files changed, 63 insertions(+), 16 deletions(-) create mode 100644 lib/email_cook.rb diff --git a/Gemfile.lock b/Gemfile.lock index ff5e238b0ff..562f3a91979 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -486,4 +486,4 @@ DEPENDENCIES unicorn BUNDLED WITH - 1.10.2 + 1.10.3 diff --git a/app/models/post.rb b/app/models/post.rb index 281b6a3d4e9..0dbe2c6c3b4 100644 --- a/app/models/post.rb +++ b/app/models/post.rb @@ -6,6 +6,7 @@ require_dependency 'enum' require_dependency 'post_analyzer' require_dependency 'validators/post_validator' require_dependency 'plugin/filter' +require_dependency 'email_cook' require 'archetype' require 'digest/sha1' @@ -76,7 +77,7 @@ class Post < ActiveRecord::Base end def self.cook_methods - @cook_methods ||= Enum.new(:regular, :raw_html) + @cook_methods ||= Enum.new(:regular, :raw_html, :email) end def self.find_by_detail(key, value) @@ -161,16 +162,20 @@ class Post < ActiveRecord::Base # case we can skip the rendering pipeline. return raw if cook_method == Post.cook_methods[:raw_html] - # Default is to cook posts - cooked = if !self.user || SiteSetting.tl3_links_no_follow || !self.user.has_trust_level?(TrustLevel[3]) - post_analyzer.cook(*args) - else - # At trust level 3, we don't apply nofollow to links - cloned = args.dup - cloned[1] ||= {} - cloned[1][:omit_nofollow] = true - post_analyzer.cook(*cloned) - end + cooked = nil + if cook_method == Post.cook_methods[:email] + cooked = EmailCook.new(raw).cook + else + cooked = if !self.user || SiteSetting.tl3_links_no_follow || !self.user.has_trust_level?(TrustLevel[3]) + post_analyzer.cook(*args) + else + # At trust level 3, we don't apply nofollow to links + cloned = args.dup + cloned[1] ||= {} + cloned[1][:omit_nofollow] = true + post_analyzer.cook(*cloned) + end + end new_cooked = Plugin::Filter.apply(:after_post_cook, self, cooked) diff --git a/lib/email_cook.rb b/lib/email_cook.rb new file mode 100644 index 00000000000..33693190cbb --- /dev/null +++ b/lib/email_cook.rb @@ -0,0 +1,36 @@ +# A very simple formatter for imported emails +class EmailCook + + def initialize(raw) + @raw = raw + end + + def cook + result = "" + + in_quote = false + quote_buffer = "" + @raw.each_line do |l| + + if l =~ /^\s*>/ + in_quote = true + quote_buffer << l.sub(/^[\s>]*/, '') << "
" + elsif in_quote + result << "
#{quote_buffer}
" + quote_buffer = "" + in_quote = false + else + result << l << "
" + end + end + + if in_quote + result << "
#{quote_buffer}
" + end + + result.gsub!(/(
){3,10}/, '

') + + result + end + +end diff --git a/lib/post_creator.rb b/lib/post_creator.rb index 0dc39952eef..bb9723a9598 100644 --- a/lib/post_creator.rb +++ b/lib/post_creator.rb @@ -28,6 +28,7 @@ class PostCreator # cook_method - Method of cooking the post. # :regular - Pass through Markdown parser and strip bad HTML # :raw_html - Perform no processing + # :raw_email - Imported from an email # via_email - Mark this post as arriving via email # raw_email - Full text of arriving email (to store) # diff --git a/script/import_scripts/nabble.rb b/script/import_scripts/nabble.rb index c975b5cf1bb..35eceb146f9 100644 --- a/script/import_scripts/nabble.rb +++ b/script/import_scripts/nabble.rb @@ -54,7 +54,10 @@ class ImportScripts::MyAskBot < ImportScripts::Base def parse_email(msg) receiver = Email::Receiver.new(msg, skip_sanity_check: true) mail = Mail.read_from_string(msg) - receiver.parse_body(mail) + mail.body + + selected = receiver.select_body(mail) + selected.force_encoding(selected.encoding).encode("UTF-8") end def create_forum_topics @@ -86,7 +89,8 @@ class ImportScripts::MyAskBot < ImportScripts::Base user_id: user_id_from_imported_user_id(t["owner_id"]) || Discourse::SYSTEM_USER_ID, created_at: Time.zone.at(@td.decode(t["when_created"])), category: CATEGORY_ID, - raw: raw } + raw: raw, + cook_method: Post.cook_methods[:email] } end end end @@ -137,10 +141,11 @@ class ImportScripts::MyAskBot < ImportScripts::Base topic_id: topic_id, user_id: user_id_from_imported_user_id(p['owner_id']) || Discourse::SYSTEM_USER_ID, created_at: Time.zone.at(@td.decode(p["when_created"])), - raw: raw } + raw: raw, + cook_method: Post.cook_methods[:email] } end end end end -ImportScripts::MyAskBot.new.perform +ImportScripts::MyAskBot.new.perform