discourse/lib/email/message_id_service.rb
Martin Brennan e3d495850d
FEATURE: Overhaul email threading (#17996)
See https://meta.discourse.org/t/discourse-email-messages-are-incorrectly-threaded/233499
for thorough reasoning.

This commit changes how we generate Message-IDs and do email
threading for emails sent from Discourse. The main changes are
as follows:

* Introduce an outbound_message_id column on Post that
  is either a) filled with a Discourse-generated Message-ID
  the first time that post is used for an outbound email
  or b) filled with an original Message-ID from an external
  mail client or service if the post was created from an
  incoming email.
* Change Discourse-generated Message-IDs to be more consistent
  and static, in the format `discourse/post/:post_id@:host`
* Do not send References or In-Reply-To headers for emails sent
  for the OP of topics.
* Make sure that In-Reply-To is filled with either a) the OP's
  Message-ID if the post is not a direct reply or b) the parent
  post's Message-ID
* Make sure that In-Reply-To has all referenced post's Message-IDs
* Make sure that References is filled with a chain of Message-IDs
  from the OP down to the parent post of the new post.

We also are keeping X-Discourse-Post-Id and X-Discourse-Topic-Id,
headers that we previously removed, for easier visual debugging
of outbound emails.

Finally, we backfill the `outbound_message_id` for posts that have
a linked `IncomingEmail` record, using the `message_id` of that record.
We do not need to do that for posts that don't have an incoming email
since they are backfilled at runtime if `outbound_message_id` is missing.
2022-09-26 09:14:24 +10:00

167 lines
6.6 KiB
Ruby

# frozen_string_literal: true
module Email
##
# Email Message-IDs are used in both our outbound and inbound email
# flow. For the outbound flow via Email::Sender, we assign a unique
# Message-ID for any emails sent out from the application.
# If we are sending an email related to a topic, such as through the
# PostAlerter class, then the Message-ID will contain references to
# the topic ID, and if it is for a specific post, the post ID,
# along with a random suffix to make the Message-ID truly unique.
# The host must also be included on the Message-IDs.
#
# For the inbound email flow via Email::Receiver, we use Message-IDs
# to discern which topic or post the inbound email reply should be
# in response to. In this case, the Message-ID is extracted from the
# References and/or In-Reply-To headers, and compared with either
# the IncomingEmail table, the Post table, or the IncomingEmail to
# determine where to send the reply.
#
# See https://datatracker.ietf.org/doc/html/rfc2822#section-3.6.4 for
# more specific information around Message-IDs in email.
#
# See https://tools.ietf.org/html/rfc850#section-2.1.7 for the
# Message-ID format specification.
class MessageIdService
class << self
def generate_default
"<#{SecureRandom.uuid}@#{host}>"
end
# TODO (martin) 2023-01-01 Deprecated, remove this once the new threading
# systems have been in place for a while.
def generate_for_post(post, use_incoming_email_if_present: false)
if use_incoming_email_if_present && post.incoming_email&.message_id.present?
return "<#{post.incoming_email.message_id}>"
end
"<topic/#{post.topic_id}/#{post.id}.#{random_suffix}@#{host}>"
end
# TODO (martin) 2023-01-01 Deprecated, remove this once the new threading
# systems have been in place for a while.
def generate_for_topic(topic, use_incoming_email_if_present: false, canonical: false)
first_post = topic.ordered_posts.first
incoming_email = first_post.incoming_email
# If the incoming email was created by handle_mail, then it was an
# inbound email sent to Discourse and handled by Email::Receiver,
# this is the only case where we want to use the original Message-ID
# because we want to maintain threading in the original mail client.
if use_incoming_email_if_present &&
incoming_email&.message_id.present? &&
incoming_email&.created_via == IncomingEmail.created_via_types[:handle_mail]
return "<#{first_post.incoming_email.message_id}>"
end
if canonical
"<topic/#{topic.id}@#{host}>"
else
"<topic/#{topic.id}.#{random_suffix}@#{host}>"
end
end
##
# The outbound_message_id may be present because either:
#
# * The post was created via incoming email and Email::Receiver, and
# references a Message-ID generated by an external email client or service.
# * At least one email has been sent because of the post being created
# to inform interested parties via email.
#
# If it is blank then we should assume Discourse was the originator
# of the post, and generate a Message-ID to be used from now on using
# our discourse/post/POST_ID@HOST format.
def generate_or_use_existing(post_ids)
post_ids = Array.wrap(post_ids)
return [] if post_ids.empty?
DB.exec(<<~SQL, host: host)
UPDATE posts
SET outbound_message_id = 'discourse/post/' || posts.id || '@' || :host
WHERE outbound_message_id IS NULL AND posts.id IN (#{post_ids.join(",")});
SQL
DB.query_single(<<~SQL)
SELECT '<' || posts.outbound_message_id || '>'
FROM posts
WHERE posts.id IN (#{post_ids.join(",")})
ORDER BY posts.created_at ASC;
SQL
end
##
# Uses extracted Message-IDs from both the In-Reply-To and References
# headers from an incoming email.
def find_post_from_message_ids(message_ids)
message_ids = message_ids.map { |message_id| message_id_clean(message_id) }
# TODO (martin) 2023-01-01 We should remove these backwards-compatible
# formats for the Message-ID and solely use the discourse/post/999@host
# format.
topic_ids = message_ids.map { |message_id| message_id[message_id_topic_id_regexp, 1] }.compact.map(&:to_i)
post_ids = message_ids.map { |message_id| message_id[message_id_post_id_regexp, 1] }.compact.map(&:to_i)
post_ids << message_ids.map { |message_id| message_id[message_id_discourse_regexp, 1] }.compact.map(&:to_i)
post_ids << Post.where(outbound_message_id: message_ids).or(Post.where(topic_id: topic_ids, post_number: 1)).pluck(:id)
post_ids << EmailLog.where(message_id: message_ids).pluck(:post_id)
post_ids << IncomingEmail.where(message_id: message_ids).pluck(:post_id)
post_ids.flatten!
post_ids.compact!
post_ids.uniq!
return if post_ids.empty?
Post.where(id: post_ids).order(:created_at).last
end
def random_suffix
SecureRandom.hex(12)
end
# TODO (martin) 2023-01-01 We should remove these backwards-compatible
# formats for the Message-ID and solely use the discourse/post/999@host
# format.
def discourse_generated_message_id?(message_id)
!!(message_id =~ message_id_post_id_regexp) ||
!!(message_id =~ message_id_topic_id_regexp) ||
!!(message_id =~ message_id_discourse_regexp)
end
# TODO (martin) 2023-01-01 We should remove these backwards-compatible
# formats for the Message-ID and solely use the discourse/post/999@host
# format.
def message_id_post_id_regexp
Regexp.new "topic/\\d+/(\\d+|\\d+\.\\w+)@#{Regexp.escape(host)}"
end
def message_id_topic_id_regexp
Regexp.new "topic/(\\d+|\\d+\.\\w+)@#{Regexp.escape(host)}"
end
def message_id_discourse_regexp
Regexp.new "discourse/post/(\\d+)@#{Regexp.escape(host)}"
end
def message_id_rfc_format(message_id)
message_id.present? && !is_message_id_rfc?(message_id) ? "<#{message_id}>" : message_id
end
def message_id_clean(message_id)
message_id.present? && is_message_id_rfc?(message_id) ? message_id.gsub(/^<|>$/, "") : message_id
end
def is_message_id_rfc?(message_id)
message_id.start_with?('<') && message_id.include?('@') && message_id.end_with?('>')
end
def host
Email::Sender.host_for(Discourse.base_url)
end
end
end
end