mirror of
https://github.com/discourse/discourse.git
synced 2024-11-26 05:03:39 +08:00
30990006a9
This reduces chances of errors where consumers of strings mutate inputs and reduces memory usage of the app. Test suite passes now, but there may be some stuff left, so we will run a few sites on a branch prior to merging
193 lines
4.8 KiB
Ruby
193 lines
4.8 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
class PlainTextToMarkdown
|
|
SIGNATURE_SEPARATOR ||= "-- ".freeze
|
|
|
|
URL_REGEX ||= /((?:https?:(?:\/{1,3}|[a-z0-9%])|www\d{0,3}[.])(?:[^\s()<>]+|\([^\s()<>]+\))+(?:\([^\s()<>]+\)|[^`!()\[\]{};:'".,<>?«»“”‘’\s]))/i
|
|
|
|
def initialize(plaintext, opts = {})
|
|
@plaintext = plaintext
|
|
@lines = []
|
|
|
|
@format_flowed = opts[:format_flowed] || false
|
|
@delete_flowed_space = opts[:delete_flowed_space] || false
|
|
end
|
|
|
|
def to_markdown
|
|
prepare_lines
|
|
classify_lines
|
|
|
|
markdown = +""
|
|
last_quote_level = 0
|
|
last_line_blank = false
|
|
|
|
@lines.each do |line|
|
|
current_line_blank = line.text.blank?
|
|
|
|
unless last_line_blank && current_line_blank
|
|
if line.quote_level > 0
|
|
quote_identifiers = ">" * line.quote_level
|
|
markdown << quote_identifiers << "\n" unless line.quote_level >= last_quote_level || current_line_blank
|
|
markdown << quote_identifiers
|
|
markdown << " " unless current_line_blank
|
|
else
|
|
markdown << "\n" unless last_quote_level == 0 || current_line_blank
|
|
end
|
|
|
|
markdown << convert_text(line)
|
|
markdown << "\n"
|
|
end
|
|
|
|
last_line_blank = current_line_blank
|
|
last_quote_level = line.quote_level
|
|
end
|
|
|
|
markdown.rstrip!
|
|
markdown
|
|
end
|
|
|
|
private
|
|
|
|
class CodeBlock < Struct.new(:start_line, :end_line)
|
|
def initialize(start_line, end_line = nil)
|
|
super
|
|
end
|
|
|
|
def valid?
|
|
start_line.present? && end_line.present?
|
|
end
|
|
end
|
|
|
|
class Line < Struct.new(:text, :quote_level, :code_block)
|
|
def initialize(text, quote_level = 0, code_block = nil)
|
|
super
|
|
end
|
|
|
|
def valid_code_block?
|
|
code_block&.valid?
|
|
end
|
|
end
|
|
|
|
def prepare_lines
|
|
previous_line = nil
|
|
|
|
@plaintext.each_line do |text|
|
|
text.chomp!
|
|
line = Line.new(text)
|
|
|
|
remove_quote_level_indicators!(line)
|
|
|
|
if @format_flowed
|
|
line = merge_lines(line, previous_line)
|
|
@lines << line unless line == previous_line
|
|
else
|
|
@lines << line
|
|
end
|
|
|
|
previous_line = line
|
|
end
|
|
end
|
|
|
|
def classify_lines
|
|
previous_line = nil
|
|
|
|
@lines.each do |line|
|
|
classify_line_as_code!(line, previous_line)
|
|
|
|
previous_line = line
|
|
end
|
|
end
|
|
|
|
# @param line [Line]
|
|
def remove_quote_level_indicators!(line)
|
|
match_data = line.text.match(/^(?<indicators>>+)\s?(?<text>.*)/)
|
|
|
|
if match_data
|
|
line.text = match_data[:text]
|
|
line.quote_level = match_data[:indicators].length
|
|
end
|
|
end
|
|
|
|
# @param line [Line]
|
|
# @param previous_line [Line]
|
|
# @return [Line]
|
|
def merge_lines(line, previous_line)
|
|
return line if previous_line.nil? || line.text.blank?
|
|
return line if line.text == SIGNATURE_SEPARATOR || previous_line.text == SIGNATURE_SEPARATOR
|
|
return line unless line.quote_level == previous_line.quote_level && previous_line.text.end_with?(" ")
|
|
|
|
previous_line.text = previous_line.text[0...-1] if @delete_flowed_space
|
|
previous_line.text += line.text
|
|
previous_line
|
|
end
|
|
|
|
# @param line [Line]
|
|
# @param previous_line [Line]
|
|
def classify_line_as_code!(line, previous_line)
|
|
line.code_block = previous_line.code_block unless previous_line.nil? || previous_line.valid_code_block?
|
|
return unless line.text =~ /^\s{0,3}```/
|
|
|
|
if line.code_block.present?
|
|
line.code_block.end_line = line
|
|
else
|
|
line.code_block = CodeBlock.new(line)
|
|
end
|
|
end
|
|
|
|
# @param line [Line]
|
|
# @return [string]
|
|
def convert_text(line)
|
|
text = line.text
|
|
|
|
if line.valid_code_block?
|
|
code_block = line.code_block
|
|
return code_block.start_line == line || code_block.end_line == line ? text.lstrip : text
|
|
end
|
|
|
|
converted_text = replace_duplicate_links(text)
|
|
converted_text = escape_special_characters(converted_text)
|
|
converted_text = indent_with_non_breaking_spaces(converted_text)
|
|
converted_text
|
|
end
|
|
|
|
def replace_duplicate_links(text)
|
|
text.to_enum(:scan, URL_REGEX)
|
|
.map { $& }
|
|
.group_by { |url| url }
|
|
.keep_if { |_, urls | urls.length > 1 }
|
|
.keys.each do |url|
|
|
|
|
text.gsub!(Regexp.new(%Q|#{url}(\s*[()\\[\\]<>«»'"“”‘’]?#{url}[()\\[\\]<>«»'"“”‘’]?)|, Regexp::IGNORECASE), url)
|
|
end
|
|
|
|
text
|
|
end
|
|
|
|
def indent_with_non_breaking_spaces(text)
|
|
text.sub(/^\s+/) do |s|
|
|
# replace tabs with 2 spaces
|
|
s.gsub!("\t", " ")
|
|
|
|
# replace indentation with non-breaking spaces
|
|
s.length > 1 ? " " * s.length : s
|
|
end
|
|
end
|
|
|
|
def escape_special_characters(text)
|
|
escaped_text = +""
|
|
|
|
text.split(URL_REGEX).each do |text_part|
|
|
if text_part =~ URL_REGEX
|
|
# no escaping withing URLs
|
|
escaped_text << text_part
|
|
else
|
|
# escape Markdown and HTML
|
|
text_part.gsub!(/[\\`*_{}\[\]()#+\-.!~]/) { |c| "\\#{c}" }
|
|
escaped_text << CGI.escapeHTML(text_part)
|
|
end
|
|
end
|
|
|
|
escaped_text
|
|
end
|
|
end
|