discourse/lib/email/sender.rb
Martin Brennan 8ebd5edd1e
DEV: Rename secure_media to secure_uploads (#18376)
This commit renames all secure_media related settings to secure_uploads_* along with the associated functionality.

This is being done because "media" does not really cover it, we aren't just doing this for images and videos etc. but for all uploads in the site.

Additionally, in future we want to secure more types of uploads, and enable a kind of "mixed mode" where some uploads are secure and some are not, so keeping media in the name is just confusing.

This also keeps compatibility with the `secure-media-uploads` path, and changes new
secure URLs to be `secure-uploads`.

Deprecated settings:

* secure_media -> secure_uploads
* secure_media_allow_embed_images_in_emails -> secure_uploads_allow_embed_images_in_emails
* secure_media_max_email_embed_image_size_kb -> secure_uploads_max_email_embed_image_size_kb
2022-09-29 09:24:33 +10:00

583 lines
22 KiB
Ruby

# frozen_string_literal: true
#
# A helper class to send an email. It will also handle a nil message, which it considers
# to be "do nothing". This is because some Mailers will decide not to do work for some
# reason. For example, emailing a user too frequently. A nil to address is also considered
# "do nothing"
#
# It also adds an HTML part for the plain text body
#
require 'uri'
require 'net/smtp'
SMTP_CLIENT_ERRORS = [Net::SMTPFatalError, Net::SMTPSyntaxError]
BYPASS_DISABLE_TYPES = %w(
admin_login
test_message
new_version
group_smtp
invite_password_instructions
download_backup_message
admin_confirmation_message
)
module Email
class Sender
def initialize(message, email_type, user = nil)
@message = message
@message_attachments_index = {}
@email_type = email_type
@user = user
end
def send
bypass_disable = BYPASS_DISABLE_TYPES.include?(@email_type.to_s)
if SiteSetting.disable_emails == "yes" && !bypass_disable
return
end
return if ActionMailer::Base::NullMail === @message
return if ActionMailer::Base::NullMail === (@message.message rescue nil)
return skip(SkippedEmailLog.reason_types[:sender_message_blank]) if @message.blank?
return skip(SkippedEmailLog.reason_types[:sender_message_to_blank]) if @message.to.blank?
if SiteSetting.disable_emails == "non-staff" && !bypass_disable
return unless find_user&.staff?
end
return skip(SkippedEmailLog.reason_types[:sender_message_to_invalid]) if to_address.end_with?(".invalid")
if @message.text_part
if @message.text_part.body.to_s.blank?
return skip(SkippedEmailLog.reason_types[:sender_text_part_body_blank])
end
else
if @message.body.to_s.blank?
return skip(SkippedEmailLog.reason_types[:sender_body_blank])
end
end
@message.charset = 'UTF-8'
opts = {}
renderer = Email::Renderer.new(@message, opts)
if @message.html_part
@message.html_part.body = renderer.html
else
@message.html_part = Mail::Part.new do
content_type 'text/html; charset=UTF-8'
body renderer.html
end
end
# Fix relative (ie upload) HTML links in markdown which do not work well in plain text emails.
# These are the links we add when a user uploads a file or image.
# Ideally we would parse general markdown into plain text, but that is almost an intractable problem.
url_prefix = Discourse.base_url
@message.parts[0].body = @message.parts[0].body.to_s.gsub(/<a class="attachment" href="(\/uploads\/default\/[^"]+)">([^<]*)<\/a>/, '[\2|attachment](' + url_prefix + '\1)')
@message.parts[0].body = @message.parts[0].body.to_s.gsub(/<img src="(\/uploads\/default\/[^"]+)"([^>]*)>/, '![](' + url_prefix + '\1)')
@message.text_part.content_type = 'text/plain; charset=UTF-8'
user_id = @user&.id
# Set up the email log
email_log = EmailLog.new(
email_type: @email_type,
to_address: to_address,
user_id: user_id
)
if cc_addresses.any?
email_log.cc_addresses = cc_addresses.join(";")
email_log.cc_user_ids = User.with_email(cc_addresses).pluck(:id)
end
host = Email::Sender.host_for(Discourse.base_url)
post_id = header_value('X-Discourse-Post-Id')
topic_id = header_value('X-Discourse-Topic-Id')
reply_key = get_reply_key(post_id, user_id)
from_address = @message.from&.first
smtp_group_id = from_address.blank? ? nil : Group.where(
email_username: from_address, smtp_enabled: true
).pluck_first(:id)
# always set a default Message ID from the host
@message.header['Message-ID'] = Email::MessageIdService.generate_default
if topic_id.present? && post_id.present?
post = Post.find_by(id: post_id, topic_id: topic_id)
# guards against deleted posts and topics
return skip(SkippedEmailLog.reason_types[:sender_post_deleted]) if post.blank?
topic = post.topic
return skip(SkippedEmailLog.reason_types[:sender_topic_deleted]) if topic.blank?
add_attachments(post)
add_identification_field_headers(topic, post)
# See https://www.ietf.org/rfc/rfc2919.txt for the List-ID
# specification.
if topic&.category && !topic.category.uncategorized?
list_id = "#{SiteSetting.title} | #{topic.category.name} <#{topic.category.name.downcase.tr(' ', '-')}.#{host}>"
# subcategory case
if !topic.category.parent_category_id.nil?
parent_category_name = Category.find_by(id: topic.category.parent_category_id).name
list_id = "#{SiteSetting.title} | #{parent_category_name} #{topic.category.name} <#{topic.category.name.downcase.tr(' ', '-')}.#{parent_category_name.downcase.tr(' ', '-')}.#{host}>"
end
else
list_id = "#{SiteSetting.title} <#{host}>"
end
# When we are emailing people from a group inbox, we are having a PM
# conversation with them, as a support account would. In this case
# mailing list headers do not make sense. It is not like a forum topic
# where you may have tens or hundreds of participants -- it is a
# conversation between the group and a small handful of people
# directly contacting the group, often just one person.
if !smtp_group_id
# https://www.ietf.org/rfc/rfc3834.txt
@message.header['Precedence'] = 'list'
@message.header['List-ID'] = list_id
if topic
if SiteSetting.private_email?
@message.header['List-Archive'] = "#{Discourse.base_url}#{topic.slugless_url}"
else
@message.header['List-Archive'] = topic.url
end
end
end
end
if Email::Sender.bounceable_reply_address?
email_log.bounce_key = SecureRandom.hex
# WARNING: RFC claims you can not set the Return Path header, this is 100% correct
# however Rails has special handling for this header and ends up using this value
# as the Envelope From address so stuff works as expected
@message.header[:return_path] = Email::Sender.bounce_address(email_log.bounce_key)
end
email_log.post_id = post_id if post_id.present?
email_log.topic_id = topic_id if topic_id.present?
if reply_key.present?
@message.header['Reply-To'] = header_value('Reply-To').gsub!("%{reply_key}", reply_key)
@message.header[Email::MessageBuilder::ALLOW_REPLY_BY_EMAIL_HEADER] = nil
end
MessageBuilder.custom_headers(SiteSetting.email_custom_headers).each do |key, _|
# Any custom headers added via MessageBuilder that are doubled up here
# with values that we determine should be set to the last value, which is
# the one we determined. Our header values should always override the email_custom_headers.
#
# While it is valid via RFC5322 to have more than one value for certain headers,
# we just want to keep it to one, especially in cases where the custom value
# would conflict with our own.
#
# See https://datatracker.ietf.org/doc/html/rfc5322#section-3.6 and
# https://github.com/mikel/mail/blob/8ef377d6a2ca78aa5bd7f739813f5a0648482087/lib/mail/header.rb#L109-L132
custom_header = @message.header[key]
if custom_header.is_a?(Array)
our_value = custom_header.last.value
# Must be set to nil first otherwise another value is just added
# to the array of values for the header.
@message.header[key] = nil
@message.header[key] = our_value
end
value = header_value(key)
# Remove Auto-Submitted header for group private message emails, it does
# not make sense there and may hurt deliverability.
#
# From https://www.iana.org/assignments/auto-submitted-keywords/auto-submitted-keywords.xhtml:
#
# > Indicates that a message was generated by an automatic process, and is not a direct response to another message.
if key.downcase == "auto-submitted" && smtp_group_id
@message.header[key] = nil
end
# Replace reply_key in custom headers or remove
if value&.include?('%{reply_key}')
# Delete old header first or else the same header will be added twice
@message.header[key] = nil
if reply_key.present?
@message.header[key] = value.gsub!('%{reply_key}', reply_key)
end
end
end
# pass the original message_id when using mailjet/mandrill/sparkpost
case ActionMailer::Base.smtp_settings[:address]
when /\.mailjet\.com/
@message.header['X-MJ-CustomID'] = @message.message_id
when "smtp.mandrillapp.com"
merge_json_x_header('X-MC-Metadata', message_id: @message.message_id)
when "smtp.sparkpostmail.com"
merge_json_x_header('X-MSYS-API', metadata: { message_id: @message.message_id })
end
# Parse the HTML again so we can make any final changes before
# sending
style = Email::Styles.new(@message.html_part.body.to_s)
# Suppress images from short emails
if SiteSetting.strip_images_from_short_emails &&
@message.html_part.body.to_s.bytesize <= SiteSetting.short_email_length &&
@message.html_part.body =~ /<img[^>]+>/
style.strip_avatars_and_emojis
end
# Embeds any of the secure images that have been attached inline,
# removing the redaction notice.
if SiteSetting.secure_uploads_allow_embed_images_in_emails
style.inline_secure_images(@message.attachments, @message_attachments_index)
end
@message.html_part.body = style.to_s
email_log.message_id = @message.message_id
# Log when a message is being sent from a group SMTP address, so we
# can debug deliverability issues.
if smtp_group_id
email_log.smtp_group_id = smtp_group_id
# Store contents of all outgoing emails using group SMTP
# for greater visibility and debugging. If the size of this
# gets out of hand, we should look into a group-level setting
# to enable this; size should be kept in check by regular purging
# of EmailLog though.
email_log.raw = Email::Cleaner.new(@message).execute
end
DiscourseEvent.trigger(:before_email_send, @message, @email_type)
begin
message_response = @message.deliver!
# TestMailer from the Mail gem does not return a real response, it
# returns an array containing @message, so we have to have this workaround.
if message_response.kind_of?(Net::SMTP::Response)
email_log.smtp_transaction_response = message_response.message&.chomp
end
rescue *SMTP_CLIENT_ERRORS => e
return skip(SkippedEmailLog.reason_types[:custom], custom_reason: e.message)
end
email_log.save!
email_log
end
def find_user
return @user if @user
User.find_by_email(to_address)
end
def to_address
@to_address ||= begin
to = @message.try(:to)
to = to.first if Array === to
to.presence || "no_email_found"
end
end
def cc_addresses
@cc_addresses ||= begin
@message.try(:cc) || []
end
end
def self.host_for(base_url)
host = "localhost"
if base_url.present?
begin
uri = URI.parse(base_url)
host = uri.host.downcase if uri.host.present?
rescue URI::Error
end
end
host
end
private
def add_attachments(post)
max_email_size = SiteSetting.email_total_attachment_size_limit_kb.kilobytes
return if max_email_size == 0
email_size = 0
post.uploads.each do |original_upload|
optimized_1X = original_upload.optimized_images.first
if FileHelper.is_supported_image?(original_upload.original_filename) &&
!should_attach_image?(original_upload, optimized_1X)
next
end
attached_upload = optimized_1X || original_upload
next if email_size + attached_upload.filesize > max_email_size
begin
path = if attached_upload.local?
Discourse.store.path_for(attached_upload)
else
Discourse.store.download(attached_upload).path
end
@message_attachments_index[original_upload.sha1] = @message.attachments.size
@message.attachments[original_upload.original_filename] = File.read(path)
email_size += File.size(path)
rescue => e
Discourse.warn_exception(
e,
message: "Failed to attach file to email",
env: {
post_id: post.id,
upload_id: original_upload.id,
filename: original_upload.original_filename
}
)
end
end
fix_parts_after_attachments!
end
def should_attach_image?(upload, optimized_1X = nil)
return if !SiteSetting.secure_uploads_allow_embed_images_in_emails || !upload.secure?
return if (optimized_1X&.filesize || upload.filesize) > SiteSetting.secure_uploads_max_email_embed_image_size_kb.kilobytes
true
end
#
# Two behaviors in the mail gem collide:
#
# 1. Attachments are added as extra parts at the top level,
# 2. When there are both text and html parts, the content type is set
# to 'multipart/alternative'.
#
# Since attachments aren't alternative renderings, for emails that contain
# attachments and both html and text parts, some coercing is necessary.
#
# When there are alternative rendering and attachments, this method causes
# the top level to be 'multipart/mixed' and puts the html and text parts
# into a nested 'multipart/alternative' part.
#
# Due to mail gem magic, @message.text_part and @message.html_part still
# refer to the same objects.
#
def fix_parts_after_attachments!
has_attachments = @message.attachments.present?
has_alternative_renderings =
@message.html_part.present? && @message.text_part.present?
if has_attachments && has_alternative_renderings
@message.content_type = "multipart/mixed"
html_part = @message.html_part
@message.html_part = nil
text_part = @message.text_part
@message.text_part = nil
content = Mail::Part.new do
content_type "multipart/alternative"
# we have to re-specify the charset and give the part the decoded body
# here otherwise the parts will get encoded with US-ASCII which makes
# a bunch of characters not render correctly in the email
part content_type: "text/html; charset=utf-8", body: html_part.body.decoded
part content_type: "text/plain; charset=utf-8", body: text_part.body.decoded
end
@message.parts.unshift(content)
end
end
def header_value(name)
header = @message.header[name]
return nil unless header
# NOTE: In most cases this is not a problem, but if a header has
# doubled up the header[] method will return an array. So we always
# get the last value of the array and assume that is the correct
# value.
#
# See https://github.com/mikel/mail/blob/8ef377d6a2ca78aa5bd7f739813f5a0648482087/lib/mail/header.rb#L109-L132
return header.last.value if header.is_a?(Array)
header.value
end
def skip(reason_type, custom_reason: nil)
attributes = {
email_type: @email_type,
to_address: to_address,
user_id: @user&.id,
reason_type: reason_type
}
attributes[:custom_reason] = custom_reason if custom_reason
SkippedEmailLog.create!(attributes)
end
def merge_json_x_header(name, value)
data = JSON.parse(@message.header[name].to_s) rescue nil
data ||= {}
data.merge!(value)
# /!\ @message.header is not a standard ruby hash.
# It can have multiple values attached to the same key...
# In order to remove all the previous keys, we have to "nil" it.
# But for "nil" to work, there must already be a key...
@message.header[name] = ""
@message.header[name] = nil
@message.header[name] = data.to_json
end
def get_reply_key(post_id, user_id)
# ALLOW_REPLY_BY_EMAIL_HEADER is only added if we are _not_ sending
# via group SMTP and if reply by email site settings are configured
return if !user_id || !post_id || !header_value(Email::MessageBuilder::ALLOW_REPLY_BY_EMAIL_HEADER).present?
PostReplyKey.create_or_find_by!(
post_id: post_id,
user_id: user_id
).reply_key
end
def self.bounceable_reply_address?
SiteSetting.reply_by_email_address.present? && SiteSetting.reply_by_email_address["+"]
end
def self.bounce_address(bounce_key)
SiteSetting.reply_by_email_address.sub("%{reply_key}", "verp-#{bounce_key}")
end
##
# When sending an email for the first post (OP) of the topic, we do not
# set References or In-Reply-To headers, since there is nothing yet
# to reference. This counts as the first email in the thread.
#
# Once set, the post's `outbound_message_id` should _always_ be used
# when sending emails relating to a particular post to maintain threading.
# This will either be:
#
# a) A Message-ID generated in an external main client or service which
# is recorded when creating a post from an IncomingEmail via Email::Receiver
# b) A Message-ID generated by Discourse and recorded when sending an email
# for a newly created post, which is created and saved here to the
# outbound_message_id column on the Post.
#
# The RFC that covers using "Identification Fields", which are References,
# In-Reply-To, Message-ID, et. al. can be in the RFC link below. It's a good idea to read
# this beginning in the area immediately after these quotes, at least to understand
# the 3 main headers:
#
# > The "Message-ID:" field provides a unique message identifier that
# > refers to a particular version of a particular message. The
# > uniqueness of the message identifier is guaranteed by the host that
# > generates it.
#
# > ...
#
# > The "In-Reply-To:" field may be used to identify the message (or
# > messages) to which the new message is a reply, while the "References:"
# > field may be used to identify a "thread" of conversation.
#
# https://www.rfc-editor.org/rfc/rfc5322.html#section-3.6.4
#
# It is a long read, but to understand the decision making process for this
# threading logic you can take a look at:
#
# https://meta.discourse.org/t/discourse-email-messages-are-incorrectly-threaded/233499
def add_identification_field_headers(topic, post)
@message.header["Message-ID"] = Email::MessageIdService.generate_or_use_existing(post.id).first
if post.post_number > 1
op_message_id = Email::MessageIdService.generate_or_use_existing(topic.first_post.id).first
##
# Whenever we reply to a post directly _or_ quote a post, a PostReply
# record is made, with the reply_post_id referencing the newly created
# post, and the post_id referencing the post that was quoted or replied to.
referenced_posts = Post
.joins("INNER JOIN post_replies ON post_replies.post_id = posts.id ")
.where("post_replies.reply_post_id = ?", post.id)
.order(id: :desc)
.to_a
##
# No referenced posts means that we are just creating a new post not
# referring to anything, and as such we should just fall back to using
# the OP.
if referenced_posts.empty?
@message.header["In-Reply-To"] = op_message_id
@message.header["References"] = op_message_id
else
##
# When referencing _multiple_ posts then we just choose the most recent one
# to use for References so we have a single parent to work with, but
# every directly replied to post can go into In-Reply-To.
#
# We want to make sure all of the outbound_message_ids are already filled here.
in_reply_to_message_ids = MessageIdService.generate_or_use_existing(referenced_posts.map(&:id))
@message.header["In-Reply-To"] = in_reply_to_message_ids
most_recent_post_message_id = in_reply_to_message_ids.last
##
# The RFC specifically states that the content of the parent's References
# field (in our case a tree of replies based on the PostReply table in
# addition to the OP post's Message-ID) first, _then_ the parent's
# Message-ID (in our case the outbound_message_id of the post we are replying to).
#
# This creates a thread from the OP all the way down to the most recent post we
# are replying to.
reply_tree = referenced_post_reply_tree(referenced_posts.first)
parent_message_ids = MessageIdService.generate_or_use_existing(reply_tree.values.flatten)
@message.header["References"] = [
op_message_id, parent_message_ids, most_recent_post_message_id
].flatten.uniq
end
end
end
def referenced_post_reply_tree(post)
results = DB.query(<<~SQL, start_post_id: post.id)
WITH RECURSIVE cte AS (
SELECT reply_post_id, post_id FROM post_replies
WHERE reply_post_id = :start_post_id
UNION
SELECT pr.reply_post_id, pr.post_id
FROM post_replies pr
INNER JOIN cte
ON cte.post_id = pr.reply_post_id
)
SELECT DISTINCT cte.*, posts.created_at, posts.outbound_message_id
FROM cte
INNER JOIN posts ON posts.id = cte.reply_post_id
ORDER BY posts.created_at DESC, post_id DESC;
SQL
results.inject({}) do |hash, value|
# We only want to get a single replied-to post, which is the most recently
# created post, since we cannot deal with multiple parents for References
hash[value.reply_post_id] ||= [value.post_id]
hash
end
end
end
end