discourse/app/services/inline_uploads.rb

# frozen_string_literal: true

require_dependency "pretty_text"

class InlineUploads
  PLACEHOLDER = "__replace__"
  PATH_PLACEHOLDER = "__replace_path__"

  UPLOAD_REGEXP_PATTERN = "/original/(\\dX/(?:[a-f0-9]/)*[a-f0-9]{40}[a-zA-Z0-9.]*)"
  private_constant :UPLOAD_REGEXP_PATTERN

  def self.process(markdown, on_missing: nil)
    markdown = markdown.dup

    match_md_reference(markdown) do |match, src, replacement, index|
      if upload = Upload.get_from_url(src)
        markdown = markdown.sub(match, replacement.sub!(PATH_PLACEHOLDER, "__#{upload.sha1}__"))
      end
    end

    cooked_fragment = Nokogiri::HTML::fragment(PrettyText.cook(markdown, disable_emojis: true))
    link_occurences = []

    cooked_fragment.traverse do |node|
      if node.name == "img"
        # Do nothing
      elsif !(node.children.count == 1 && (node.children[0].name != "img" && node.children[0].children.blank?)) &&
        !(node.name == "a" && node.children.count > 1 && !node_children_names(node).include?("img"))

        next
      end

      if seen_link = matched_uploads(node).first
        if (actual_link = (node.attributes["href"]&.value || node.attributes["src"]&.value))
          link_occurences << { link: actual_link, is_valid: true }
        elsif node.name != "p"
          link_occurences << { link: actual_link, is_valid: false }
        end
      end
    end

    raw_matches = []

    match_bbcode_img(markdown) do |match, src, replacement, index|
      raw_matches << [match, src, replacement, index]
    end

    match_md_inline_img(markdown) do |match, src, replacement, index|
      raw_matches << [match, src, replacement, index]
    end

    match_img(markdown) do |match, src, replacement, index|
      raw_matches << [match, src, replacement, index]
    end

    match_anchor(markdown) do |match, href, replacement, index|
      raw_matches << [match, href, replacement, index]
    end

    db = RailsMultisite::ConnectionManagement.current_db

    regexps = [
      /(https?:\/\/[a-zA-Z0-9\.\/-]+\/uploads\/#{db}#{UPLOAD_REGEXP_PATTERN})/,
    ]

    if Discourse.store.external?
      regexps << /((https?:)?#{SiteSetting.Upload.s3_base_url}#{UPLOAD_REGEXP_PATTERN})/
      regexps << /(#{SiteSetting.Upload.s3_cdn_url}#{UPLOAD_REGEXP_PATTERN})/
    end

    regexps.each do |regexp|
      indexes = Set.new

      markdown.scan(/(\n{2,}|\A)#{regexp}$/) do |match|
        if match[1].present?
          extension = match[1].split(".")[-1].downcase
          index = $~.offset(2)[0]
          indexes << index
          if FileHelper.supported_images.include?(extension)
            raw_matches << [match[1], match[1], +"![](#{PLACEHOLDER})", index]
          else
            raw_matches << [match[1], match[1], ++"#{Discourse.base_url}#{PATH_PLACEHOLDER}", index]
          end
        end
      end

      markdown.scan(/^#{regexp}(\s)/) do |match|
        if match[0].present?
          index = $~.offset(0)[0]
          next if indexes.include?(index)
          indexes << index

          raw_matches << [
            match[0],
            match[0],
            +"#{Discourse.base_url}#{PATH_PLACEHOLDER}",
            $~.offset(0)[0]
          ]
        end
      end

      markdown.scan(/\[[^\[\]]*\]: #{regexp}/) do |match|
        if match[0].present?
          index = $~.offset(1)[0]
          next if indexes.include?(index)
          indexes << index
        end
      end

      markdown.scan(/(([\n\s\)\]\<])+)#{regexp}/) do |match|
        if matched_uploads(match[2]).present?
          next if indexes.include?($~.offset(3)[0])

          raw_matches << [
            match[2],
            match[2],
            +"#{Discourse.base_url}#{PATH_PLACEHOLDER}",
            $~.offset(0)[0]
          ]
        end
      end
    end

    raw_matches
      .sort { |a, b| a[3] <=> b[3] }
      .each do |match, link, replace_with, _index|

      node_info = link_occurences.shift
      next unless node_info&.dig(:is_valid)

      if link.include?(node_info[:link])
        begin
          uri = URI(link)
        rescue URI::Error
        end

        if !Discourse.store.external?
          host = uri&.host

          hosts = [Discourse.current_hostname]

          if cdn_url = GlobalSetting.cdn_url
            hosts << URI(GlobalSetting.cdn_url).hostname
          end

          if host && !hosts.include?(host)
            next
          end
        end

        upload = Upload.get_from_url(link)

        if upload
          replace_with.sub!(PLACEHOLDER, upload.short_url)
          replace_with.sub!(PATH_PLACEHOLDER, upload.short_path)
          markdown.sub!(match, replace_with)
        else
          on_missing.call(link) if on_missing
        end
      end
    end

    markdown.scan(/(__([a-f0-9]{40})__)/) do |match|
      upload = Upload.find_by(sha1: match[1])
      markdown = markdown.sub(match[0], upload.short_path)
    end

    markdown
  end

  def self.match_md_inline_img(markdown, external_src: false)
    markdown.scan(/(!?\[([^\[\]]*)\]\(([a-zA-z0-9\.\/:-]+)([ ]*['"]{1}[^\)]*['"]{1}[ ]*)?\))/) do |match|
      if (matched_uploads(match[2]).present? || external_src) && block_given?
        yield(
          match[0],
          match[2],
          +"#{match[0].start_with?("!") ? "!" : ""}[#{match[1]}](#{PLACEHOLDER}#{match[3]})",
          $~.offset(0)[0]
        )
      end
    end
  end

  def self.match_bbcode_img(markdown, external_src: false)
    markdown.scan(/(\[img\]\s*([^\[\]\s]+)\s*\[\/img\])/) do |match|
      if (matched_uploads(match[1]).present? && block_given?) || external_src
        yield(match[0], match[1], +"![](#{PLACEHOLDER})", $~.offset(0)[0])
      end
    end
  end

  def self.match_md_reference(markdown)
    markdown.scan(/(\[([^\]]+)\]:([ ]+)(\S+))/) do |match|
      if match[3] && matched_uploads(match[3]).present? && block_given?
        yield(
          match[0],
          match[3],
          +"[#{match[1]}]:#{match[2]}#{Discourse.base_url}#{PATH_PLACEHOLDER}",
          $~.offset(0)[0]
        )
      end
    end
  end

  def self.match_anchor(markdown, external_href: false)
    markdown.scan(/((<a[^<]+>)([^<\a>]*?)<\/a>)/) do |match|
      node = Nokogiri::HTML::fragment(match[0]).children[0]
      href =  node.attributes["href"]&.value

      if href && (matched_uploads(href).present? || external_href)
        has_attachment = node.attributes["class"]&.value
        index = $~.offset(0)[0]
        text = match[2].strip.gsub("\n", "").gsub(/ +/, " ")
        text = "#{text}|attachment" if has_attachment

        yield(match[0], href, +"[#{text}](#{PLACEHOLDER})", index) if block_given?
      end
    end
  end

  def self.match_img(markdown, external_src: false)
    markdown.scan(/(([ ]*)<(?!img)[^<>]+\/?>)?([\r\n]*)(([ ]*)<img ([^>\n]+)>([ ]*))([\r\n]*)/) do |match|
      node = Nokogiri::HTML::fragment(match[3].strip).children[0]
      src =  node.attributes["src"]&.value

      if src && (matched_uploads(src).present? || external_src)
        text = node.attributes["alt"]&.value
        width = node.attributes["width"]&.value.to_i
        height = node.attributes["height"]&.value.to_i
        title = node.attributes["title"]&.value
        text = "#{text}|#{width}x#{height}" if width > 0 && height > 0
        after_html_tag = match[0].present?

        spaces_before =
          if after_html_tag && !match[0].end_with?("/>")
            (match[4].length > 0 ? match[4] : "  ")
          else
            ""
          end

        replacement = +"#{spaces_before}![#{text}](#{PLACEHOLDER}#{title.present? ? " \"#{title}\"" : ""})"

        if after_html_tag && (num_newlines = match[2].length) <= 1
          replacement.prepend("\n" * (num_newlines == 0 ? 2 : 1))
        end

        if after_html_tag && !match[0].end_with?("/>") && (num_newlines = match[7].length) <= 1
          replacement += ("\n" * (num_newlines == 0 ? 2 : 1))
        end

        match[3].strip! if !after_html_tag

        if (match[1].nil? || match[1].length < 4)
          if (match[4].nil? || match[4].length < 4)
            yield(match[3], src, replacement, $~.offset(0)[0]) if block_given?
          else
            yield(match[3], src, match[3].sub(src, PATH_PLACEHOLDER), $~.offset(0)[0]) if block_given?
          end
        else
          yield(match[3], src, match[3].sub(src, PATH_PLACEHOLDER), $~.offset(0)[0]) if block_given?
        end
      end
    end
  end

  def self.matched_uploads(node)
    matches = []

    base_url = Discourse.base_url.sub(/https?:\/\//, "(https?://)")

    if GlobalSetting.cdn_url
      cdn_url = GlobalSetting.cdn_url.sub(/https?:\/\//, "(https?://)")
    end

    regexps = [
      /(upload:\/\/([a-zA-Z0-9]+)[a-zA-Z0-9\.]*)/,
      /(\/uploads\/short-url\/([a-zA-Z0-9]+)[a-zA-Z0-9\.]*)/,
      /(#{base_url}\/uploads\/short-url\/([a-zA-Z0-9]+)[a-zA-Z0-9\.]*)/,
    ]

    db = RailsMultisite::ConnectionManagement.current_db

    if Discourse.store.external?
      if Rails.configuration.multisite
        regexps << /((https?:)?#{SiteSetting.Upload.s3_base_url}\/uploads\/#{db}#{UPLOAD_REGEXP_PATTERN})/
        regexps << /(#{SiteSetting.Upload.s3_cdn_url}\/uploads\/#{db}#{UPLOAD_REGEXP_PATTERN})/
      else
        regexps << /((https?:)?#{SiteSetting.Upload.s3_base_url}#{UPLOAD_REGEXP_PATTERN})/
        regexps << /(#{SiteSetting.Upload.s3_cdn_url}#{UPLOAD_REGEXP_PATTERN})/
        regexps << /(\/uploads\/#{db}#{UPLOAD_REGEXP_PATTERN})/
        regexps << /(#{base_url}\/uploads\/#{db}#{UPLOAD_REGEXP_PATTERN})/
        regexps << /(#{cdn_url}\/uploads\/#{db}#{UPLOAD_REGEXP_PATTERN})/ if cdn_url
      end
    else
      regexps << /(\/uploads\/#{db}#{UPLOAD_REGEXP_PATTERN})/
      regexps << /(#{base_url}\/uploads\/#{db}#{UPLOAD_REGEXP_PATTERN})/
      regexps << /(#{cdn_url}\/uploads\/#{db}#{UPLOAD_REGEXP_PATTERN})/ if cdn_url
    end

    node = node.to_s

    regexps.each do |regexp|
      node.scan(/(^|[\n\s"'\(>])#{regexp}($|[\n\s"'\)<])/) do |matched|
        matches << matched[1]
      end
    end

    matches
  end
  private_class_method :matched_uploads

  def self.node_children_names(node, names = Set.new)
    if node.children.blank?
      names << node.name
      return names
    end

    node.children.each do |child|
      names = node_children_names(child, names)
    end

    names
  end
  private_class_method :node_children_names
end
DEV: Class that converts MD with old attachment links to new MD. 2019-05-30 14:38:46 +08:00			`# frozen_string_literal: true`

			`require_dependency "pretty_text"`

			`class InlineUploads`
DEV: Switch `InlineUploads` to a regexp based implementation. 2019-06-03 15:41:26 +08:00			`PLACEHOLDER = "__replace__"`
DEV: Support more formats of inline images. 2019-06-06 15:50:16 +08:00			`PATH_PLACEHOLDER = "__replace_path__"`
DEV: Switch `InlineUploads` to a regexp based implementation. 2019-06-03 15:41:26 +08:00
FIX: `InlineUploads` does not correct urls with uppercase extension. 2019-06-13 11:18:49 +08:00			`UPLOAD_REGEXP_PATTERN = "/original/(\\dX/(?:[a-f0-9]/)[a-f0-9]{40}[a-zA-Z0-9.])"`
DEV: Switch `InlineUploads` to a regexp based implementation. 2019-06-03 15:41:26 +08:00			`private_constant :UPLOAD_REGEXP_PATTERN`

DEV: Class that converts MD with old attachment links to new MD. 2019-05-30 14:38:46 +08:00			`def self.process(markdown, on_missing: nil)`
			`markdown = markdown.dup`
FIX: Edge cases with markdown references for `InlineUploads`. 2019-06-13 12:08:01 +08:00
			`match_md_reference(markdown) do \|match, src, replacement, index\|`
			`if upload = Upload.get_from_url(src)`
			`markdown = markdown.sub(match, replacement.sub!(PATH_PLACEHOLDER, "__#{upload.sha1}__"))`
			`end`
			`end`

DEV: Switch `InlineUploads` to a regexp based implementation. 2019-06-03 15:41:26 +08:00			`cooked_fragment = Nokogiri::HTML::fragment(PrettyText.cook(markdown, disable_emojis: true))`
DEV: Class that converts MD with old attachment links to new MD. 2019-05-30 14:38:46 +08:00			`link_occurences = []`

			`cooked_fragment.traverse do \|node\|`
			`if node.name == "img"`
			`# Do nothing`
FIX: `InlineUploads` should replace attachment links with markdown text. 2019-06-19 11:14:46 +08:00			`elsif !(node.children.count == 1 && (node.children[0].name != "img" && node.children[0].children.blank?)) &&`
			`!(node.name == "a" && node.children.count > 1 && !node_children_names(node).include?("img"))`

DEV: Class that converts MD with old attachment links to new MD. 2019-05-30 14:38:46 +08:00			`next`
			`end`

			`if seen_link = matched_uploads(node).first`
FIX: Cover more edge cases in `InlineUploads`. 2019-06-12 16:41:44 +08:00			`if (actual_link = (node.attributes["href"]&.value \|\| node.attributes["src"]&.value))`
			`link_occurences << { link: actual_link, is_valid: true }`
			`elsif node.name != "p"`
			`link_occurences << { link: actual_link, is_valid: false }`
			`end`
DEV: Class that converts MD with old attachment links to new MD. 2019-05-30 14:38:46 +08:00			`end`
			`end`

DEV: Switch `InlineUploads` to a regexp based implementation. 2019-06-03 15:41:26 +08:00			`raw_matches = []`
DEV: Class that converts MD with old attachment links to new MD. 2019-05-30 14:38:46 +08:00
DEV: Support more formats of inline images. 2019-06-06 15:50:16 +08:00			`match_bbcode_img(markdown) do \|match, src, replacement, index\|`
			`raw_matches << [match, src, replacement, index]`
DEV: Switch `InlineUploads` to a regexp based implementation. 2019-06-03 15:41:26 +08:00			`end`

DEV: Support more formats of inline images. 2019-06-06 15:50:16 +08:00			`match_md_inline_img(markdown) do \|match, src, replacement, index\|`
			`raw_matches << [match, src, replacement, index]`
DEV: Switch `InlineUploads` to a regexp based implementation. 2019-06-03 15:41:26 +08:00			`end`
DEV: Class that converts MD with old attachment links to new MD. 2019-05-30 14:38:46 +08:00
DEV: Support more formats of inline images. 2019-06-06 15:50:16 +08:00			`match_img(markdown) do \|match, src, replacement, index\|`
			`raw_matches << [match, src, replacement, index]`
			`end`
DEV: Switch `InlineUploads` to a regexp based implementation. 2019-06-03 15:41:26 +08:00
DEV: Support more formats of inline images. 2019-06-06 15:50:16 +08:00			`match_anchor(markdown) do \|match, href, replacement, index\|`
			`raw_matches << [match, href, replacement, index]`
DEV: Switch `InlineUploads` to a regexp based implementation. 2019-06-03 15:41:26 +08:00			`end`

			`db = RailsMultisite::ConnectionManagement.current_db`

			`regexps = [`
DEV: More improvements to `InlineUploads`. * Convert inline links to short path ``` <link> <link> <link> ``` to ``` <short_path> <short_path> <short_path> ``` 2019-06-07 11:46:52 +08:00			`/(https?:\/\/[a-zA-Z0-9\.\/-]+\/uploads\/#{db}#{UPLOAD_REGEXP_PATTERN})/,`
DEV: Switch `InlineUploads` to a regexp based implementation. 2019-06-03 15:41:26 +08:00			`]`

			`if Discourse.store.external?`
FIX: Cover more edge cases in `InlineUploads`. 2019-06-12 16:41:44 +08:00			`regexps << /((https?:)?#{SiteSetting.Upload.s3_base_url}#{UPLOAD_REGEXP_PATTERN})/`
DEV: More improvements to `InlineUploads`. * Convert inline links to short path ``` <link> <link> <link> ``` to ``` <short_path> <short_path> <short_path> ``` 2019-06-07 11:46:52 +08:00			`regexps << /(#{SiteSetting.Upload.s3_cdn_url}#{UPLOAD_REGEXP_PATTERN})/`
DEV: Switch `InlineUploads` to a regexp based implementation. 2019-06-03 15:41:26 +08:00			`end`

			`regexps.each do \|regexp\|`
DEV: More improvements to `InlineUploads`. * Convert inline links to short path ``` <link> <link> <link> ``` to ``` <short_path> <short_path> <short_path> ``` 2019-06-07 11:46:52 +08:00			`indexes = Set.new`

			`markdown.scan(/(\n{2,}\|\A)#{regexp}$/) do \|match\|`
			`if match[1].present?`
FIX: skip markdown conversion for hotlinked non image urls 2019-07-16 20:35:17 +08:00			`extension = match[1].split(".")[-1].downcase`
DEV: More improvements to `InlineUploads`. * Convert inline links to short path ``` <link> <link> <link> ``` to ``` <short_path> <short_path> <short_path> ``` 2019-06-07 11:46:52 +08:00			`index = $~.offset(2)[0]`
			`indexes << index`
FIX: when 'raw' started with non-image upload url it's not converted to short-url. dd0f0494c64e75046a4f26e3e46c132edb750cb0 2019-07-17 13:43:50 +08:00			`if FileHelper.supported_images.include?(extension)`
			`raw_matches << [match[1], match[1], +"![](#{PLACEHOLDER})", index]`
			`else`
			`raw_matches << [match[1], match[1], ++"#{Discourse.base_url}#{PATH_PLACEHOLDER}", index]`
			`end`
DEV: More improvements to `InlineUploads`. * Convert inline links to short path ``` <link> <link> <link> ``` to ``` <short_path> <short_path> <short_path> ``` 2019-06-07 11:46:52 +08:00			`end`
			`end`

			`markdown.scan(/^#{regexp}(\s)/) do \|match\|`
			`if match[0].present?`
			`index = $~.offset(0)[0]`
			`next if indexes.include?(index)`
			`indexes << index`

			`raw_matches << [`
			`match[0],`
			`match[0],`
			`+"#{Discourse.base_url}#{PATH_PLACEHOLDER}",`
			`$~.offset(0)[0]`
			`]`
			`end`
			`end`

			`markdown.scan(/\[[^\[\]]*\]: #{regexp}/) do \|match\|`
			`if match[0].present?`
			`index = $~.offset(1)[0]`
			`next if indexes.include?(index)`
			`indexes << index`
			`end`
			`end`

DEV: Fix edge case for `InlineUploads`. 2019-06-14 13:47:44 +08:00			`markdown.scan(/(([\n\s\)\]\<])+)#{regexp}/) do \|match\|`
DEV: More improvements to `InlineUploads`. * Convert inline links to short path ``` <link> <link> <link> ``` to ``` <short_path> <short_path> <short_path> ``` 2019-06-07 11:46:52 +08:00			`if matched_uploads(match[2]).present?`
			`next if indexes.include?($~.offset(3)[0])`

			`raw_matches << [`
			`match[2],`
			`match[2],`
			`+"#{Discourse.base_url}#{PATH_PLACEHOLDER}",`
			`$~.offset(0)[0]`
			`]`
DEV: Switch `InlineUploads` to a regexp based implementation. 2019-06-03 15:41:26 +08:00			`end`
			`end`
			`end`

			`raw_matches`
			`.sort { \|a, b\| a[3] <=> b[3] }`
			`.each do \|match, link, replace_with, _index\|`

			`node_info = link_occurences.shift`
			`next unless node_info&.dig(:is_valid)`

			`if link.include?(node_info[:link])`
			`begin`
			`uri = URI(link)`
			`rescue URI::Error`
			`end`

			`if !Discourse.store.external?`
DEV: `InlineUploads` should process CDN upload URLs as well. 2019-06-14 13:11:53 +08:00			`host = uri&.host`

Fix the build. 2019-06-14 13:56:35 +08:00			`hosts = [Discourse.current_hostname]`

			`if cdn_url = GlobalSetting.cdn_url`
			`hosts << URI(GlobalSetting.cdn_url).hostname`
			`end`

			`if host && !hosts.include?(host)`
DEV: `InlineUploads` should process CDN upload URLs as well. 2019-06-14 13:11:53 +08:00			`next`
			`end`
DEV: Switch `InlineUploads` to a regexp based implementation. 2019-06-03 15:41:26 +08:00			`end`

			`upload = Upload.get_from_url(link)`

			`if upload`
DEV: Support more formats of inline images. 2019-06-06 15:50:16 +08:00			`replace_with.sub!(PLACEHOLDER, upload.short_url)`
			`replace_with.sub!(PATH_PLACEHOLDER, upload.short_path)`
			`markdown.sub!(match, replace_with)`
DEV: Switch `InlineUploads` to a regexp based implementation. 2019-06-03 15:41:26 +08:00			`else`
			`on_missing.call(link) if on_missing`
DEV: Class that converts MD with old attachment links to new MD. 2019-05-30 14:38:46 +08:00			`end`
			`end`
			`end`

FIX: Edge cases with markdown references for `InlineUploads`. 2019-06-13 12:08:01 +08:00			`markdown.scan(/(__([a-f0-9]{40})__)/) do \|match\|`
			`upload = Upload.find_by(sha1: match[1])`
			`markdown = markdown.sub(match[0], upload.short_path)`
			`end`

DEV: Class that converts MD with old attachment links to new MD. 2019-05-30 14:38:46 +08:00			`markdown`
			`end`

DEV: Support more formats of inline images. 2019-06-06 15:50:16 +08:00			`def self.match_md_inline_img(markdown, external_src: false)`
			`markdown.scan(/(!?\[([^\[\]])\]\(([a-zA-z0-9\.\/:-]+)([ ]['"]{1}[^\)]['"]{1}[ ])?\))/) do \|match\|`
			`if (matched_uploads(match[2]).present? \|\| external_src) && block_given?`
			`yield(`
			`match[0],`
			`match[2],`
			`+"#{match[0].start_with?("!") ? "!" : ""}[#{match[1]}](#{PLACEHOLDER}#{match[3]})",`
			`$~.offset(0)[0]`
			`)`
			`end`
			`end`
			`end`

Fix the build. 2019-06-13 13:53:43 +08:00			`def self.match_bbcode_img(markdown, external_src: false)`
FIX: BBcode edge case for `InlineUploads`. 2019-06-20 11:47:32 +08:00			`markdown.scan(/(\[img\]\s([^\[\]\s]+)\s\[\/img\])/) do \|match\|`
Fix the build. 2019-06-13 13:53:43 +08:00			`if (matched_uploads(match[1]).present? && block_given?) \|\| external_src`
FIX: `InlineUploads` matching on external bbcode img url. 2019-06-13 13:47:18 +08:00			`yield(match[0], match[1], +"![](#{PLACEHOLDER})", $~.offset(0)[0])`
			`end`
DEV: Support more formats of inline images. 2019-06-06 15:50:16 +08:00			`end`
			`end`

			`def self.match_md_reference(markdown)`
			`markdown.scan(/(\[([^\]]+)\]:([ ]+)(\S+))/) do \|match\|`
DEV: More improvements to `InlineUploads`. * Convert inline links to short path ``` <link> <link> <link> ``` to ``` <short_path> <short_path> <short_path> ``` 2019-06-07 11:46:52 +08:00			`if match[3] && matched_uploads(match[3]).present? && block_given?`
DEV: Support more formats of inline images. 2019-06-06 15:50:16 +08:00			`yield(`
			`match[0],`
			`match[3],`
			`+"[#{match[1]}]:#{match[2]}#{Discourse.base_url}#{PATH_PLACEHOLDER}",`
			`$~.offset(0)[0]`
			`)`
			`end`
			`end`
			`end`

			`def self.match_anchor(markdown, external_href: false)`
			`markdown.scan(/((<a[^<]+>)([^<\a>]*?)<\/a>)/) do \|match\|`
			`node = Nokogiri::HTML::fragment(match[0]).children[0]`
			`href = node.attributes["href"]&.value`

			`if href && (matched_uploads(href).present? \|\| external_href)`
			`has_attachment = node.attributes["class"]&.value`
			`index = $~.offset(0)[0]`
			`text = match[2].strip.gsub("\n", "").gsub(/ +/, " ")`
			`text = "#{text}\|attachment" if has_attachment`

			`yield(match[0], href, +"[#{text}](#{PLACEHOLDER})", index) if block_given?`
			`end`
			`end`
			`end`

			`def self.match_img(markdown, external_src: false)`
FIX: Support carriage return in `InlineUploads`. Follow up to 8deaef387242203b706162614b41066205d3a10e. 2019-06-21 14:07:06 +08:00			`markdown.scan(/(([ ])<(?!img)[^<>]+\/?>)?([\r\n])(([ ])<img ([^>\n]+)>([ ]))([\r\n]*)/) do \|match\|`
FIX: Don't replace img tags within anchor tags with markdown format. Follow up to 9a25b0d614daeac167e1ba1220aaefe489162fb6. 2019-06-21 12:32:02 +08:00			`node = Nokogiri::HTML::fragment(match[3].strip).children[0]`
FIX: `InlineUploads` raises an error when img tag is invalid. 2019-06-12 10:31:00 +08:00			`src = node.attributes["src"]&.value`
DEV: Support more formats of inline images. 2019-06-06 15:50:16 +08:00
FIX: `InlineUploads` raises an error when img tag is invalid. 2019-06-12 10:31:00 +08:00			`if src && (matched_uploads(src).present? \|\| external_src)`
DEV: Support more formats of inline images. 2019-06-06 15:50:16 +08:00			`text = node.attributes["alt"]&.value`
FIX: only add image size when with & height are in pixels 2019-07-06 02:34:11 +08:00			`width = node.attributes["width"]&.value.to_i`
			`height = node.attributes["height"]&.value.to_i`
DEV: Support more formats of inline images. 2019-06-06 15:50:16 +08:00			`title = node.attributes["title"]&.value`
FIX: only add image size when with & height are in pixels 2019-07-06 02:34:11 +08:00			`text = "#{text}\|#{width}x#{height}" if width > 0 && height > 0`
DEV: Support more formats of inline images. 2019-06-06 15:50:16 +08:00			`after_html_tag = match[0].present?`

			`spaces_before =`
			`if after_html_tag && !match[0].end_with?("/>")`
FIX: Don't replace img tags within anchor tags with markdown format. Follow up to 9a25b0d614daeac167e1ba1220aaefe489162fb6. 2019-06-21 12:32:02 +08:00			`(match[4].length > 0 ? match[4] : " ")`
DEV: Support more formats of inline images. 2019-06-06 15:50:16 +08:00			`else`
			`""`
			`end`

			`replacement = +"#{spaces_before}![#{text}](#{PLACEHOLDER}#{title.present? ? " \"#{title}\"" : ""})"`

FIX: Don't replace img tags within anchor tags with markdown format. Follow up to 9a25b0d614daeac167e1ba1220aaefe489162fb6. 2019-06-21 12:32:02 +08:00			`if after_html_tag && (num_newlines = match[2].length) <= 1`
DEV: Support more formats of inline images. 2019-06-06 15:50:16 +08:00			`replacement.prepend("\n" * (num_newlines == 0 ? 2 : 1))`
			`end`

FIX: Don't replace img tags within anchor tags with markdown format. Follow up to 9a25b0d614daeac167e1ba1220aaefe489162fb6. 2019-06-21 12:32:02 +08:00			`if after_html_tag && !match[0].end_with?("/>") && (num_newlines = match[7].length) <= 1`
DEV: Support more formats of inline images. 2019-06-06 15:50:16 +08:00			`replacement += ("\n" * (num_newlines == 0 ? 2 : 1))`
			`end`

FIX: Don't replace img tags within anchor tags with markdown format. Follow up to 9a25b0d614daeac167e1ba1220aaefe489162fb6. 2019-06-21 12:32:02 +08:00			`match[3].strip! if !after_html_tag`
DEV: Support more formats of inline images. 2019-06-06 15:50:16 +08:00
FIX: Support carriage return in `InlineUploads`. Follow up to 8deaef387242203b706162614b41066205d3a10e. 2019-06-21 14:07:06 +08:00			`if (match[1].nil? \|\| match[1].length < 4)`
			`if (match[4].nil? \|\| match[4].length < 4)`
			`yield(match[3], src, replacement, $~.offset(0)[0]) if block_given?`
			`else`
			`yield(match[3], src, match[3].sub(src, PATH_PLACEHOLDER), $~.offset(0)[0]) if block_given?`
			`end`
FIX: Don't replace img tags within anchor tags with markdown format. Follow up to 9a25b0d614daeac167e1ba1220aaefe489162fb6. 2019-06-21 12:32:02 +08:00			`else`
			`yield(match[3], src, match[3].sub(src, PATH_PLACEHOLDER), $~.offset(0)[0]) if block_given?`
			`end`
DEV: Support more formats of inline images. 2019-06-06 15:50:16 +08:00			`end`
			`end`
			`end`

DEV: Class that converts MD with old attachment links to new MD. 2019-05-30 14:38:46 +08:00			`def self.matched_uploads(node)`
			`matches = []`

DEV: Support both `http` and `https` for `InlineUploads`. 2019-06-14 12:48:31 +08:00			`base_url = Discourse.base_url.sub(/https?:\/\//, "(https?://)")`
Fix the build. 2019-06-14 13:56:35 +08:00
			`if GlobalSetting.cdn_url`
			`cdn_url = GlobalSetting.cdn_url.sub(/https?:\/\//, "(https?://)")`
			`end`
DEV: Support both `http` and `https` for `InlineUploads`. 2019-06-14 12:48:31 +08:00
DEV: Class that converts MD with old attachment links to new MD. 2019-05-30 14:38:46 +08:00			`regexps = [`
FIX: `InlineUploads` does not correct urls with uppercase extension. 2019-06-13 11:18:49 +08:00			`/(upload:\/\/([a-zA-Z0-9]+)[a-zA-Z0-9\.]*)/,`
			`/(\/uploads\/short-url\/([a-zA-Z0-9]+)[a-zA-Z0-9\.]*)/,`
DEV: Support both `http` and `https` for `InlineUploads`. 2019-06-14 12:48:31 +08:00			`/(#{base_url}\/uploads\/short-url\/([a-zA-Z0-9]+)[a-zA-Z0-9\.]*)/,`
DEV: Class that converts MD with old attachment links to new MD. 2019-05-30 14:38:46 +08:00			`]`

			`db = RailsMultisite::ConnectionManagement.current_db`

			`if Discourse.store.external?`
			`if Rails.configuration.multisite`
FIX: Cover more edge cases in `InlineUploads`. 2019-06-12 16:41:44 +08:00			`regexps << /((https?:)?#{SiteSetting.Upload.s3_base_url}\/uploads\/#{db}#{UPLOAD_REGEXP_PATTERN})/`
DEV: Switch `InlineUploads` to a regexp based implementation. 2019-06-03 15:41:26 +08:00			`regexps << /(#{SiteSetting.Upload.s3_cdn_url}\/uploads\/#{db}#{UPLOAD_REGEXP_PATTERN})/`
DEV: Class that converts MD with old attachment links to new MD. 2019-05-30 14:38:46 +08:00			`else`
FIX: Cover more edge cases in `InlineUploads`. 2019-06-12 16:41:44 +08:00			`regexps << /((https?:)?#{SiteSetting.Upload.s3_base_url}#{UPLOAD_REGEXP_PATTERN})/`
DEV: Switch `InlineUploads` to a regexp based implementation. 2019-06-03 15:41:26 +08:00			`regexps << /(#{SiteSetting.Upload.s3_cdn_url}#{UPLOAD_REGEXP_PATTERN})/`
			`regexps << /(\/uploads\/#{db}#{UPLOAD_REGEXP_PATTERN})/`
DEV: Support both `http` and `https` for `InlineUploads`. 2019-06-14 12:48:31 +08:00			`regexps << /(#{base_url}\/uploads\/#{db}#{UPLOAD_REGEXP_PATTERN})/`
Fix the build. 2019-06-14 13:56:35 +08:00			`regexps << /(#{cdn_url}\/uploads\/#{db}#{UPLOAD_REGEXP_PATTERN})/ if cdn_url`
DEV: Class that converts MD with old attachment links to new MD. 2019-05-30 14:38:46 +08:00			`end`
			`else`
DEV: Switch `InlineUploads` to a regexp based implementation. 2019-06-03 15:41:26 +08:00			`regexps << /(\/uploads\/#{db}#{UPLOAD_REGEXP_PATTERN})/`
DEV: Support both `http` and `https` for `InlineUploads`. 2019-06-14 12:48:31 +08:00			`regexps << /(#{base_url}\/uploads\/#{db}#{UPLOAD_REGEXP_PATTERN})/`
Fix the build. 2019-06-14 13:56:35 +08:00			`regexps << /(#{cdn_url}\/uploads\/#{db}#{UPLOAD_REGEXP_PATTERN})/ if cdn_url`
DEV: Class that converts MD with old attachment links to new MD. 2019-05-30 14:38:46 +08:00			`end`

			`node = node.to_s`

			`regexps.each do \|regexp\|`
FIX: Cover more edge cases in `InlineUploads`. 2019-06-12 16:41:44 +08:00			`node.scan(/(^\|[\n\s"'\(>])#{regexp}($\|[\n\s"'\)<])/) do \|matched\|`
			`matches << matched[1]`
DEV: Class that converts MD with old attachment links to new MD. 2019-05-30 14:38:46 +08:00			`end`
			`end`

			`matches`
			`end`
			`private_class_method :matched_uploads`
FIX: `InlineUploads` should replace attachment links with markdown text. 2019-06-19 11:14:46 +08:00
			`def self.node_children_names(node, names = Set.new)`
			`if node.children.blank?`
			`names << node.name`
			`return names`
			`end`

			`node.children.each do \|child\|`
			`names = node_children_names(child, names)`
			`end`

			`names`
			`end`
			`private_class_method :node_children_names`
DEV: Class that converts MD with old attachment links to new MD. 2019-05-30 14:38:46 +08:00			`end`