2019-05-30 14:38:46 +08:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
class InlineUploads
|
2019-06-03 15:41:26 +08:00
|
|
|
PLACEHOLDER = "__replace__"
|
2019-06-06 15:50:16 +08:00
|
|
|
PATH_PLACEHOLDER = "__replace_path__"
|
2019-06-03 15:41:26 +08:00
|
|
|
|
2019-09-14 00:21:19 +08:00
|
|
|
UPLOAD_REGEXP_PATTERN = "/original/(\\dX/(?:\\h/)*\\h{40}[a-zA-Z0-9.]*)(\\?v=\\d+)?"
|
2019-06-03 15:41:26 +08:00
|
|
|
private_constant :UPLOAD_REGEXP_PATTERN
|
|
|
|
|
2019-05-30 14:38:46 +08:00
|
|
|
def self.process(markdown, on_missing: nil)
|
|
|
|
markdown = markdown.dup
|
2019-06-13 12:08:01 +08:00
|
|
|
|
|
|
|
match_md_reference(markdown) do |match, src, replacement, index|
|
|
|
|
if upload = Upload.get_from_url(src)
|
|
|
|
markdown = markdown.sub(match, replacement.sub!(PATH_PLACEHOLDER, "__#{upload.sha1}__"))
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-05-05 11:46:57 +08:00
|
|
|
cooked_fragment = Nokogiri::HTML5::fragment(PrettyText.cook(markdown, disable_emojis: true))
|
2022-02-28 10:20:58 +08:00
|
|
|
link_occurrences = []
|
2019-05-30 14:38:46 +08:00
|
|
|
|
|
|
|
cooked_fragment.traverse do |node|
|
|
|
|
if node.name == "img"
|
|
|
|
# Do nothing
|
2019-06-19 11:14:46 +08:00
|
|
|
elsif !(node.children.count == 1 && (node.children[0].name != "img" && node.children[0].children.blank?)) &&
|
|
|
|
!(node.name == "a" && node.children.count > 1 && !node_children_names(node).include?("img"))
|
2019-05-30 14:38:46 +08:00
|
|
|
next
|
|
|
|
end
|
|
|
|
|
|
|
|
if seen_link = matched_uploads(node).first
|
2019-06-12 16:41:44 +08:00
|
|
|
if (actual_link = (node.attributes["href"]&.value || node.attributes["src"]&.value))
|
2022-02-28 10:20:58 +08:00
|
|
|
link_occurrences << { link: actual_link, is_valid: true }
|
2019-06-12 16:41:44 +08:00
|
|
|
elsif node.name != "p"
|
2022-02-28 10:20:58 +08:00
|
|
|
link_occurrences << { link: seen_link, is_valid: false }
|
2019-06-12 16:41:44 +08:00
|
|
|
end
|
2019-05-30 14:38:46 +08:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-06-03 15:41:26 +08:00
|
|
|
raw_matches = []
|
2019-05-30 14:38:46 +08:00
|
|
|
|
2019-06-06 15:50:16 +08:00
|
|
|
match_bbcode_img(markdown) do |match, src, replacement, index|
|
|
|
|
raw_matches << [match, src, replacement, index]
|
2019-06-03 15:41:26 +08:00
|
|
|
end
|
|
|
|
|
2019-06-06 15:50:16 +08:00
|
|
|
match_md_inline_img(markdown) do |match, src, replacement, index|
|
|
|
|
raw_matches << [match, src, replacement, index]
|
2019-06-03 15:41:26 +08:00
|
|
|
end
|
2019-05-30 14:38:46 +08:00
|
|
|
|
2019-06-06 15:50:16 +08:00
|
|
|
match_img(markdown) do |match, src, replacement, index|
|
|
|
|
raw_matches << [match, src, replacement, index]
|
|
|
|
end
|
2019-06-03 15:41:26 +08:00
|
|
|
|
2019-06-06 15:50:16 +08:00
|
|
|
match_anchor(markdown) do |match, href, replacement, index|
|
|
|
|
raw_matches << [match, href, replacement, index]
|
2019-06-03 15:41:26 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
regexps = [
|
2019-12-18 13:51:57 +08:00
|
|
|
/(https?:\/\/[a-zA-Z0-9\.\/-]+\/#{Discourse.store.upload_path}#{UPLOAD_REGEXP_PATTERN})/,
|
2019-06-03 15:41:26 +08:00
|
|
|
]
|
|
|
|
|
|
|
|
if Discourse.store.external?
|
2019-09-14 00:21:19 +08:00
|
|
|
regexps << /((?:https?:)?#{SiteSetting.Upload.s3_base_url}#{UPLOAD_REGEXP_PATTERN})/
|
2019-06-07 11:46:52 +08:00
|
|
|
regexps << /(#{SiteSetting.Upload.s3_cdn_url}#{UPLOAD_REGEXP_PATTERN})/
|
2019-06-03 15:41:26 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
regexps.each do |regexp|
|
2019-06-07 11:46:52 +08:00
|
|
|
indexes = Set.new
|
|
|
|
|
|
|
|
markdown.scan(/(\n{2,}|\A)#{regexp}$/) do |match|
|
2019-09-14 00:21:19 +08:00
|
|
|
if match[1].present? && match[2].present?
|
|
|
|
extension = match[2].split(".")[-1].downcase
|
2019-06-07 11:46:52 +08:00
|
|
|
index = $~.offset(2)[0]
|
|
|
|
indexes << index
|
2019-07-17 13:43:50 +08:00
|
|
|
if FileHelper.supported_images.include?(extension)
|
|
|
|
raw_matches << [match[1], match[1], +"![](#{PLACEHOLDER})", index]
|
|
|
|
else
|
2019-07-17 18:24:42 +08:00
|
|
|
raw_matches << [match[1], match[1], +"#{Discourse.base_url}#{PATH_PLACEHOLDER}", index]
|
2019-07-17 13:43:50 +08:00
|
|
|
end
|
2019-06-07 11:46:52 +08:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
markdown.scan(/^#{regexp}(\s)/) do |match|
|
|
|
|
if match[0].present?
|
|
|
|
index = $~.offset(0)[0]
|
2019-09-14 00:21:19 +08:00
|
|
|
next if !indexes.add?(index)
|
|
|
|
raw_matches << [match[0], match[0], +"#{Discourse.base_url}#{PATH_PLACEHOLDER}", index]
|
2019-06-07 11:46:52 +08:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
markdown.scan(/\[[^\[\]]*\]: #{regexp}/) do |match|
|
2019-09-14 00:21:19 +08:00
|
|
|
indexes.add($~.offset(1)[0]) if match[0].present?
|
2019-06-07 11:46:52 +08:00
|
|
|
end
|
|
|
|
|
2019-06-14 13:47:44 +08:00
|
|
|
markdown.scan(/(([\n\s\)\]\<])+)#{regexp}/) do |match|
|
2019-06-07 11:46:52 +08:00
|
|
|
if matched_uploads(match[2]).present?
|
2019-09-14 00:21:19 +08:00
|
|
|
next if !indexes.add?($~.offset(3)[0])
|
|
|
|
index = $~.offset(0)[0]
|
|
|
|
raw_matches << [match[2], match[2], +"#{Discourse.base_url}#{PATH_PLACEHOLDER}", index]
|
2019-06-03 15:41:26 +08:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
raw_matches
|
|
|
|
.sort { |a, b| a[3] <=> b[3] }
|
|
|
|
.each do |match, link, replace_with, _index|
|
|
|
|
|
2022-02-28 10:20:58 +08:00
|
|
|
node_info = link_occurrences.shift
|
2019-06-03 15:41:26 +08:00
|
|
|
next unless node_info&.dig(:is_valid)
|
|
|
|
|
|
|
|
if link.include?(node_info[:link])
|
|
|
|
begin
|
|
|
|
uri = URI(link)
|
|
|
|
rescue URI::Error
|
|
|
|
end
|
|
|
|
|
|
|
|
if !Discourse.store.external?
|
2019-06-14 13:11:53 +08:00
|
|
|
host = uri&.host
|
|
|
|
|
2019-06-14 13:56:35 +08:00
|
|
|
hosts = [Discourse.current_hostname]
|
|
|
|
|
|
|
|
if cdn_url = GlobalSetting.cdn_url
|
|
|
|
hosts << URI(GlobalSetting.cdn_url).hostname
|
|
|
|
end
|
|
|
|
|
|
|
|
if host && !hosts.include?(host)
|
2019-06-14 13:11:53 +08:00
|
|
|
next
|
|
|
|
end
|
2019-06-03 15:41:26 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
upload = Upload.get_from_url(link)
|
|
|
|
|
|
|
|
if upload
|
2019-06-06 15:50:16 +08:00
|
|
|
replace_with.sub!(PLACEHOLDER, upload.short_url)
|
|
|
|
replace_with.sub!(PATH_PLACEHOLDER, upload.short_path)
|
|
|
|
markdown.sub!(match, replace_with)
|
2019-06-03 15:41:26 +08:00
|
|
|
else
|
|
|
|
on_missing.call(link) if on_missing
|
2019-05-30 14:38:46 +08:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-09-14 00:21:19 +08:00
|
|
|
markdown.scan(/(__(\h{40})__)/) do |match|
|
2019-06-13 12:08:01 +08:00
|
|
|
upload = Upload.find_by(sha1: match[1])
|
|
|
|
markdown = markdown.sub(match[0], upload.short_path)
|
|
|
|
end
|
|
|
|
|
2019-05-30 14:38:46 +08:00
|
|
|
markdown
|
|
|
|
end
|
|
|
|
|
2019-06-06 15:50:16 +08:00
|
|
|
def self.match_md_inline_img(markdown, external_src: false)
|
2020-04-21 09:47:48 +08:00
|
|
|
markdown.scan(/(!?\[([^\[\]]*)\]\(([^\s\)]+)([ ]*['"]{1}[^\)]*['"]{1}[ ]*)?\))/) do |match|
|
2021-10-14 00:59:05 +08:00
|
|
|
if (external_src || matched_uploads(match[2]).present?) && block_given?
|
2019-06-06 15:50:16 +08:00
|
|
|
yield(
|
|
|
|
match[0],
|
|
|
|
match[2],
|
|
|
|
+"#{match[0].start_with?("!") ? "!" : ""}[#{match[1]}](#{PLACEHOLDER}#{match[3]})",
|
|
|
|
$~.offset(0)[0]
|
|
|
|
)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-06-13 13:53:43 +08:00
|
|
|
def self.match_bbcode_img(markdown, external_src: false)
|
2019-09-13 00:55:15 +08:00
|
|
|
markdown.scan(/(\[img\]\s*([^\[\]\s]+)\s*\[\/img\])/i) do |match|
|
2021-10-14 00:59:05 +08:00
|
|
|
if (external_src || (matched_uploads(match[1]).present?)) && block_given?
|
2019-06-13 13:47:18 +08:00
|
|
|
yield(match[0], match[1], +"![](#{PLACEHOLDER})", $~.offset(0)[0])
|
|
|
|
end
|
2019-06-06 15:50:16 +08:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.match_md_reference(markdown)
|
|
|
|
markdown.scan(/(\[([^\]]+)\]:([ ]+)(\S+))/) do |match|
|
2019-06-07 11:46:52 +08:00
|
|
|
if match[3] && matched_uploads(match[3]).present? && block_given?
|
2019-06-06 15:50:16 +08:00
|
|
|
yield(
|
|
|
|
match[0],
|
|
|
|
match[3],
|
|
|
|
+"[#{match[1]}]:#{match[2]}#{Discourse.base_url}#{PATH_PLACEHOLDER}",
|
|
|
|
$~.offset(0)[0]
|
|
|
|
)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.match_anchor(markdown, external_href: false)
|
2019-09-13 00:55:15 +08:00
|
|
|
markdown.scan(/((<a[^<]+>)([^<\a>]*?)<\/a>)/i) do |match|
|
2020-05-05 11:46:57 +08:00
|
|
|
node = Nokogiri::HTML5::fragment(match[0]).children[0]
|
2019-06-06 15:50:16 +08:00
|
|
|
href = node.attributes["href"]&.value
|
|
|
|
|
2021-10-14 00:59:05 +08:00
|
|
|
if href && (external_href || matched_uploads(href).present?)
|
2019-06-06 15:50:16 +08:00
|
|
|
has_attachment = node.attributes["class"]&.value
|
|
|
|
index = $~.offset(0)[0]
|
|
|
|
text = match[2].strip.gsub("\n", "").gsub(/ +/, " ")
|
|
|
|
text = "#{text}|attachment" if has_attachment
|
|
|
|
|
|
|
|
yield(match[0], href, +"[#{text}](#{PLACEHOLDER})", index) if block_given?
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-07-08 13:50:30 +08:00
|
|
|
def self.match_img(markdown, external_src: false, uploads: nil)
|
2019-09-13 03:25:14 +08:00
|
|
|
markdown.scan(/(<(?!img)[^<>]+\/?>)?(\s*)(<img [^>\n]+>)/i) do |match|
|
2020-05-05 11:46:57 +08:00
|
|
|
node = Nokogiri::HTML5::fragment(match[2].strip).children[0]
|
2021-05-14 21:52:40 +08:00
|
|
|
src = node&.attributes&.[]("src")&.value
|
2019-06-06 15:50:16 +08:00
|
|
|
|
2021-10-14 00:59:05 +08:00
|
|
|
if src && (external_src || matched_uploads(src).present?)
|
2020-07-08 13:50:30 +08:00
|
|
|
upload = uploads&.[](src)
|
2022-03-29 17:55:10 +08:00
|
|
|
node["src"] = upload&.short_url || PLACEHOLDER
|
2020-07-08 13:50:30 +08:00
|
|
|
|
2019-09-13 03:25:14 +08:00
|
|
|
spaces_before = match[1].present? ? match[1][/ +$/].size : 0
|
2022-03-29 17:55:10 +08:00
|
|
|
replacement = +"#{" " * spaces_before}#{node.to_s}"
|
2019-06-06 15:50:16 +08:00
|
|
|
|
2019-09-13 03:25:14 +08:00
|
|
|
yield(match[2], src, replacement, $~.offset(0)[0]) if block_given?
|
2019-06-06 15:50:16 +08:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2022-05-17 00:56:00 +08:00
|
|
|
def self.replace_hotlinked_image_urls(raw:, &blk)
|
|
|
|
replace = Proc.new do |match, match_src, replacement, _index|
|
|
|
|
upload = blk.call(match_src)
|
|
|
|
next if !upload
|
|
|
|
|
|
|
|
replacement =
|
|
|
|
if replacement.include?(InlineUploads::PLACEHOLDER)
|
|
|
|
replacement.sub(InlineUploads::PLACEHOLDER, upload.short_url)
|
|
|
|
elsif replacement.include?(InlineUploads::PATH_PLACEHOLDER)
|
|
|
|
replacement.sub(InlineUploads::PATH_PLACEHOLDER, upload.short_path)
|
|
|
|
end
|
|
|
|
|
|
|
|
raw = raw.gsub(
|
|
|
|
match,
|
|
|
|
replacement
|
|
|
|
)
|
|
|
|
end
|
|
|
|
|
|
|
|
# there are 6 ways to insert an image in a post
|
|
|
|
# HTML tag - <img src="http://...">
|
|
|
|
InlineUploads.match_img(raw, external_src: true, &replace)
|
|
|
|
|
|
|
|
# BBCode tag - [img]http://...[/img]
|
|
|
|
InlineUploads.match_bbcode_img(raw, external_src: true, &replace)
|
|
|
|
|
|
|
|
# Markdown linked image - [![alt](http://...)](http://...)
|
|
|
|
# Markdown inline - ![alt](http://...)
|
|
|
|
# Markdown inline - ![](http://... "image title")
|
|
|
|
# Markdown inline - ![alt](http://... "image title")
|
|
|
|
InlineUploads.match_md_inline_img(raw, external_src: true, &replace)
|
|
|
|
|
|
|
|
raw
|
|
|
|
end
|
|
|
|
|
2019-05-30 14:38:46 +08:00
|
|
|
def self.matched_uploads(node)
|
2019-12-18 13:51:57 +08:00
|
|
|
upload_path = Discourse.store.upload_path
|
2019-09-14 00:21:19 +08:00
|
|
|
base_url = Discourse.base_url.sub(/https?:\/\//, "(https?://)")
|
2019-07-29 18:57:56 +08:00
|
|
|
|
2019-05-30 14:38:46 +08:00
|
|
|
regexps = [
|
2019-06-13 11:18:49 +08:00
|
|
|
/(upload:\/\/([a-zA-Z0-9]+)[a-zA-Z0-9\.]*)/,
|
|
|
|
/(\/uploads\/short-url\/([a-zA-Z0-9]+)[a-zA-Z0-9\.]*)/,
|
2019-06-14 12:48:31 +08:00
|
|
|
/(#{base_url}\/uploads\/short-url\/([a-zA-Z0-9]+)[a-zA-Z0-9\.]*)/,
|
2019-12-18 13:51:57 +08:00
|
|
|
/(#{GlobalSetting.relative_url_root}\/#{upload_path}#{UPLOAD_REGEXP_PATTERN})/,
|
|
|
|
/(#{base_url}\/#{upload_path}#{UPLOAD_REGEXP_PATTERN})/,
|
2019-05-30 14:38:46 +08:00
|
|
|
]
|
|
|
|
|
2019-09-14 00:21:19 +08:00
|
|
|
if GlobalSetting.cdn_url && (cdn_url = GlobalSetting.cdn_url.sub(/https?:\/\//, "(https?://)"))
|
2019-12-18 13:51:57 +08:00
|
|
|
regexps << /(#{cdn_url}\/#{upload_path}#{UPLOAD_REGEXP_PATTERN})/
|
2019-09-11 10:02:42 +08:00
|
|
|
if GlobalSetting.relative_url_root.present?
|
2019-12-18 13:51:57 +08:00
|
|
|
regexps << /(#{cdn_url}#{GlobalSetting.relative_url_root}\/#{upload_path}#{UPLOAD_REGEXP_PATTERN})/
|
2019-09-11 10:02:42 +08:00
|
|
|
end
|
2019-09-11 09:50:48 +08:00
|
|
|
end
|
2019-05-30 14:38:46 +08:00
|
|
|
|
|
|
|
if Discourse.store.external?
|
|
|
|
if Rails.configuration.multisite
|
2019-12-18 13:51:57 +08:00
|
|
|
regexps << /((https?:)?#{SiteSetting.Upload.s3_base_url}\/#{upload_path}#{UPLOAD_REGEXP_PATTERN})/
|
|
|
|
regexps << /(#{SiteSetting.Upload.s3_cdn_url}\/#{upload_path}#{UPLOAD_REGEXP_PATTERN})/
|
2019-05-30 14:38:46 +08:00
|
|
|
else
|
2019-06-12 16:41:44 +08:00
|
|
|
regexps << /((https?:)?#{SiteSetting.Upload.s3_base_url}#{UPLOAD_REGEXP_PATTERN})/
|
2019-06-03 15:41:26 +08:00
|
|
|
regexps << /(#{SiteSetting.Upload.s3_cdn_url}#{UPLOAD_REGEXP_PATTERN})/
|
2019-05-30 14:38:46 +08:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-09-14 00:21:19 +08:00
|
|
|
matches = []
|
2019-05-30 14:38:46 +08:00
|
|
|
node = node.to_s
|
|
|
|
|
|
|
|
regexps.each do |regexp|
|
2019-06-12 16:41:44 +08:00
|
|
|
node.scan(/(^|[\n\s"'\(>])#{regexp}($|[\n\s"'\)<])/) do |matched|
|
|
|
|
matches << matched[1]
|
2019-05-30 14:38:46 +08:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
matches
|
|
|
|
end
|
|
|
|
private_class_method :matched_uploads
|
2019-06-19 11:14:46 +08:00
|
|
|
|
|
|
|
def self.node_children_names(node, names = Set.new)
|
|
|
|
if node.children.blank?
|
|
|
|
names << node.name
|
|
|
|
return names
|
|
|
|
end
|
|
|
|
|
|
|
|
node.children.each do |child|
|
|
|
|
names = node_children_names(child, names)
|
|
|
|
end
|
|
|
|
|
|
|
|
names
|
|
|
|
end
|
|
|
|
private_class_method :node_children_names
|
2019-05-30 14:38:46 +08:00
|
|
|
end
|