discourse/lib/cooked_post_processor.rb

714 lines
21 KiB
Ruby
Raw Normal View History

# frozen_string_literal: true
# Post processing that we can do after a post has already been cooked.
2013-07-08 07:39:08 +08:00
# For example, inserting the onebox content, or image sizes/thumbnails.
2013-02-06 03:16:51 +08:00
class CookedPostProcessor
INLINE_ONEBOX_LOADING_CSS_CLASS = "inline-onebox-loading"
INLINE_ONEBOX_CSS_CLASS = "inline-onebox"
PERF: Improve quality of `PostSearchData#raw_data`. (#7275) This commit fixes the follow quality issue with `PostSearchData#raw_data`: 1. URLs are being tokenized and links with similar href and characters are being duplicated in the raw data. `Post#cooked`: ``` <p><a href=\"https://meta.discourse.org/some.png\" class=\"onebox\" target=\"_blank\" rel=\"nofollow noopener\">https://meta.discourse.org/some.png</a></p> ``` `PostSearchData#raw_data` Before: ``` This is a test topic 0 Uncategorized https://meta.discourse.org/some.png discourse org/some png https://meta.discourse.org/some.png discourse org/some png ``` `PostSearchData#raw_data` After: ``` This is a test topic 0 Uncategorized https://meta.discourse.org/some.png meta discourse org ``` 2. Ligthbox being included in search pollutes the `PostSearchData#raw_data` unncessarily. From 28 March 2018 to 28 March 2019, searches for the term `image` on `meta.discourse.org` had a click through rate of 2.1%. Non-lightboxed images are not included in indexing for search yet we were indexing content within a lightbox. Also, search for terms like `image` was affected we were using `Pasted image` as the filename for uploads that were pasted. `Post#cooked` ``` <p>Let me see how I can fix this image<br>\n<div class=\"lightbox-wrapper\"><a class=\"lightbox\" href=\"https://meta.discourse.org/some.png\" title=\"some.png\" rel=\"nofollow noopener\"><img src=\"https://meta.discourse.org/some.png\" width=\"275\" height=\"299\"><div class=\"meta\">\n<svg class=\"fa d-icon d-icon-far-image svg-icon\" aria-hidden=\"true\"><use xlink:href=\"#far-image\"></use></svg><span class=\"filename\">some.png</span><span class=\"informations\">1750×2000</span><svg class=\"fa d-icon d-icon-discourse-expand svg-icon\" aria-hidden=\"true\"><use xlink:href=\"#discourse-expand\"></use></svg>\n</div></a></div></p> ``` `PostSearchData#raw_data` Before: ``` This is a test topic 0 Uncategorized Let me see how I can fix this image some.png png https://meta.discourse.org/some.png discourse org/some png some.png png 1750×2000 ``` `PostSearchData#raw_data` After: ``` This is a test topic 0 Uncategorized Let me see how I can fix this image ``` In terms of indexing performance, we now have to parse the given HTML through nokogiri twice. However performance is not a huge worry here since a string length of 194170 takes only 30ms to scrub plus the indexing takes place in a background job.
2019-04-01 10:14:29 +08:00
LIGHTBOX_WRAPPER_CSS_CLASS = "lightbox-wrapper"
LOADING_SIZE = 10
LOADING_COLORS = 32
attr_reader :cooking_options, :doc
2017-07-28 09:20:09 +08:00
def initialize(post, opts = {})
2013-02-06 03:16:51 +08:00
@dirty = false
@opts = opts
@post = post
@previous_cooked = (@post.cooked || "").dup
# NOTE: we re-cook the post here in order to prevent timing issues with edits
# cf. https://meta.discourse.org/t/edit-of-rebaked-post-doesnt-show-in-html-only-in-raw/33815/6
@cooking_options = post.cooking_options || opts[:cooking_options] || {}
@cooking_options[:topic_id] = post.topic_id
@cooking_options = @cooking_options.symbolize_keys
2016-04-13 02:09:59 +08:00
@doc = Nokogiri::HTML::fragment(post.cook(post.raw, @cooking_options))
@has_oneboxes = post.post_analyzer.found_oneboxes?
2013-02-19 14:57:14 +08:00
@size_cache = {}
@disable_loading_image = !!opts[:disable_loading_image]
@omit_nofollow = post.omit_nofollow?
2013-02-06 03:16:51 +08:00
end
def post_process(bypass_bump: false, new_post: false)
DistributedMutex.synchronize("post_process_#{@post.id}", validity: 10.minutes) do
DiscourseEvent.trigger(:before_post_process_cooked, @doc, @post)
remove_full_quote_on_direct_reply if new_post
post_process_oneboxes
post_process_images
post_process_quotes
optimize_urls
remove_user_ids
update_post_image
2017-10-24 01:09:38 +08:00
enforce_nofollow
pull_hotlinked_images(bypass_bump)
grant_badges
@post.link_post_uploads(fragments: @doc)
2017-06-24 02:35:10 +08:00
DiscourseEvent.trigger(:post_process_cooked, @doc, @post)
nil
end
end
2016-04-07 00:02:18 +08:00
def has_emoji?
(@doc.css("img.emoji") - @doc.css(".quote img")).size > 0
end
def grant_badges
return unless Guardian.new.can_see?(@post)
2016-04-14 04:38:24 +08:00
BadgeGranter.grant(Badge.find(Badge::FirstEmoji), @post.user, post_id: @post.id) if has_emoji?
BadgeGranter.grant(Badge.find(Badge::FirstOnebox), @post.user, post_id: @post.id) if @has_oneboxes
BadgeGranter.grant(Badge.find(Badge::FirstReplyByEmail), @post.user, post_id: @post.id) if @post.is_reply_by_email?
2013-02-06 03:16:51 +08:00
end
def post_process_quotes
@doc.css("aside.quote").each do |q|
post_number = q['data-post']
topic_id = q['data-topic']
if topic_id && post_number
comparer = QuoteComparer.new(
topic_id.to_i,
post_number.to_i,
q.css('blockquote').text
)
if comparer.modified?
q['class'] = ((q['class'] || '') + " quote-modified").strip
end
end
end
end
def remove_full_quote_on_direct_reply
return if !SiteSetting.remove_full_quote
return if @post.post_number == 1
return if @doc.css("aside.quote").size != 1
previous = Post
.where("post_number < ? AND topic_id = ? AND post_type = ? AND NOT hidden", @post.post_number, @post.topic_id, Post.types[:regular])
.order("post_number DESC")
.limit(1)
.pluck(:cooked)
.first
return if previous.blank?
previous_text = Nokogiri::HTML::fragment(previous).text.strip
quoted_text = @doc.css("aside.quote:first-child blockquote").first&.text&.strip || ""
return if previous_text.gsub(/(\s){2,}/, '\1') != quoted_text.gsub(/(\s){2,}/, '\1')
quote_regexp = /\A\s*\[quote.+?\[\/quote\]/im
quoteless_raw = @post.raw.sub(quote_regexp, "").strip
return if @post.raw.strip == quoteless_raw
PostRevisor.new(@post).revise!(
Discourse.system_user,
{
raw: quoteless_raw,
edit_reason: I18n.t(:removed_direct_reply_full_quotes)
},
skip_validations: true,
bypass_bump: true
)
end
def add_image_placeholder!(img)
src = img["src"].sub(/^https?:/i, "")
if large_images.include?(src)
return add_large_image_placeholder!(img)
elsif broken_images.include?(src)
return add_broken_image_placeholder!(img)
end
false
end
def add_large_image_placeholder!(img)
url = img["src"]
is_hyperlinked = is_a_hyperlink?(img)
placeholder = create_node("div", "large-image-placeholder")
img.add_next_sibling(placeholder)
placeholder.add_child(img)
a = create_link_node(nil, url, true)
img.add_next_sibling(a)
span = create_span_node("url", url)
a.add_child(span)
Upgrade to FontAwesome 5 (take two) (#6673) * Add missing icons to set * Revert FA5 revert This reverts commit 42572ff * use new SVG syntax in locales * Noscript page changes (remove login button, center "powered by" footer text) * Cast wider net for SVG icons in settings - include any _icon setting for SVG registry (offers better support for plugin settings) - let themes store multiple pipe-delimited icons in a setting - also replaces broken onebox image icon with SVG reference in cooked post processor * interpolate icons in locales * Fix composer whisper icon alignment * Add support for stacked icons * SECURITY: enforce hostname to match discourse hostname This ensures that the hostname rails uses for various helpers always matches the Discourse hostname * load SVG sprite with pre-initializers * FIX: enable caching on SVG sprites * PERF: use JSONP for SVG sprites so they are served from CDN This avoids needing to deal with CORS for loading of the SVG Note, added the svg- prefix to the filename so we can quickly tell in dev tools what the file is * Add missing SVG sprite JSONP script to CSP * Upgrade to FA 5.5.0 * Add support for all FA4.7 icons - adds complete frontend and backend for renamed FA4.7 icons - improves performance of SvgSprite.bundle and SvgSprite.all_icons * Fix group avatar flair preview - adds an endpoint at /svg-sprites/search/:keyword - adds frontend ajax call that pulls icon in avatar flair preview even when it is not in subset * Remove FA 4.7 font files
2018-11-27 05:49:57 +08:00
span.add_previous_sibling(create_icon_node("far-image"))
span.add_next_sibling(create_span_node("help", I18n.t("upload.placeholders.too_large", max_size_kb: SiteSetting.max_image_size_kb)))
# Only if the image is already linked
if is_hyperlinked
parent = placeholder.parent
parent.add_next_sibling(placeholder)
if parent.name == 'a' && parent["href"].present?
if url == parent["href"]
parent.remove
else
parent["class"] = "link"
a.add_previous_sibling(parent)
lspan = create_span_node("url", parent["href"])
parent.add_child(lspan)
lspan.add_previous_sibling(create_icon_node("link"))
end
end
end
img.remove
true
end
def add_broken_image_placeholder!(img)
img.name = "span"
Upgrade to FontAwesome 5 (take two) (#6673) * Add missing icons to set * Revert FA5 revert This reverts commit 42572ff * use new SVG syntax in locales * Noscript page changes (remove login button, center "powered by" footer text) * Cast wider net for SVG icons in settings - include any _icon setting for SVG registry (offers better support for plugin settings) - let themes store multiple pipe-delimited icons in a setting - also replaces broken onebox image icon with SVG reference in cooked post processor * interpolate icons in locales * Fix composer whisper icon alignment * Add support for stacked icons * SECURITY: enforce hostname to match discourse hostname This ensures that the hostname rails uses for various helpers always matches the Discourse hostname * load SVG sprite with pre-initializers * FIX: enable caching on SVG sprites * PERF: use JSONP for SVG sprites so they are served from CDN This avoids needing to deal with CORS for loading of the SVG Note, added the svg- prefix to the filename so we can quickly tell in dev tools what the file is * Add missing SVG sprite JSONP script to CSP * Upgrade to FA 5.5.0 * Add support for all FA4.7 icons - adds complete frontend and backend for renamed FA4.7 icons - improves performance of SvgSprite.bundle and SvgSprite.all_icons * Fix group avatar flair preview - adds an endpoint at /svg-sprites/search/:keyword - adds frontend ajax call that pulls icon in avatar flair preview even when it is not in subset * Remove FA 4.7 font files
2018-11-27 05:49:57 +08:00
img.set_attribute("class", "broken-image")
img.set_attribute("title", I18n.t("post.image_placeholder.broken"))
Upgrade to FontAwesome 5 (take two) (#6673) * Add missing icons to set * Revert FA5 revert This reverts commit 42572ff * use new SVG syntax in locales * Noscript page changes (remove login button, center "powered by" footer text) * Cast wider net for SVG icons in settings - include any _icon setting for SVG registry (offers better support for plugin settings) - let themes store multiple pipe-delimited icons in a setting - also replaces broken onebox image icon with SVG reference in cooked post processor * interpolate icons in locales * Fix composer whisper icon alignment * Add support for stacked icons * SECURITY: enforce hostname to match discourse hostname This ensures that the hostname rails uses for various helpers always matches the Discourse hostname * load SVG sprite with pre-initializers * FIX: enable caching on SVG sprites * PERF: use JSONP for SVG sprites so they are served from CDN This avoids needing to deal with CORS for loading of the SVG Note, added the svg- prefix to the filename so we can quickly tell in dev tools what the file is * Add missing SVG sprite JSONP script to CSP * Upgrade to FA 5.5.0 * Add support for all FA4.7 icons - adds complete frontend and backend for renamed FA4.7 icons - improves performance of SvgSprite.bundle and SvgSprite.all_icons * Fix group avatar flair preview - adds an endpoint at /svg-sprites/search/:keyword - adds frontend ajax call that pulls icon in avatar flair preview even when it is not in subset * Remove FA 4.7 font files
2018-11-27 05:49:57 +08:00
img << "<svg class=\"fa d-icon d-icon-unlink svg-icon\" aria-hidden=\"true\"><use xlink:href=\"#unlink\"></use></svg>"
img.remove_attribute("src")
img.remove_attribute("width")
img.remove_attribute("height")
true
end
def large_images
@large_images ||=
begin
JSON.parse(@post.custom_fields[Post::LARGE_IMAGES].presence || "[]")
rescue JSON::ParserError
[]
end
end
def broken_images
@broken_images ||=
begin
JSON.parse(@post.custom_fields[Post::BROKEN_IMAGES].presence || "[]")
rescue JSON::ParserError
[]
end
end
def downloaded_images
@downloaded_images ||= @post.downloaded_images
end
2013-07-08 07:39:08 +08:00
def extract_images
# all images with a src attribute
@doc.css("img[src]") -
# minus data images
@doc.css("img[src^='data']") -
# minus emojis
2015-08-05 18:57:31 +08:00
@doc.css("img.emoji") -
# minus oneboxed images
oneboxed_images -
# minus images inside quotes
@doc.css(".quote img")
2013-07-08 07:39:08 +08:00
end
def extract_images_for_post
# all images with a src attribute
@doc.css("img[src]") -
# minus emojis
@doc.css("img.emoji") -
# minus images inside quotes
@doc.css(".quote img")
end
def oneboxed_images
@doc.css(".onebox-body img, .onebox img, img.onebox")
2017-06-02 17:39:06 +08:00
end
2013-11-06 02:04:47 +08:00
def limit_size!(img)
2013-11-26 01:36:13 +08:00
# retrieve the size from
# 1) the width/height attributes
# 2) the dimension from the preview (image_sizes)
# 3) the dimension of the original image (HTTP request)
w, h = get_size_from_attributes(img) ||
get_size_from_image_sizes(img["src"], @opts[:image_sizes]) ||
get_size(img["src"])
2013-11-06 02:04:47 +08:00
# limit the size of the thumbnail
img["width"], img["height"] = ImageSizer.resize(w, h)
2013-07-08 07:39:08 +08:00
end
2013-11-26 01:36:13 +08:00
def get_size_from_attributes(img)
w, h = img["width"].to_i, img["height"].to_i
return [w, h] unless w <= 0 || h <= 0
# if only width or height are specified attempt to scale image
if w > 0 || h > 0
w = w.to_f
h = h.to_f
return unless original_image_size = get_size(img["src"])
original_width, original_height = original_image_size.map(&:to_f)
if w > 0
2017-07-28 09:20:09 +08:00
ratio = w / original_width
[w.floor, (original_height * ratio).floor]
else
2017-07-28 09:20:09 +08:00
ratio = h / original_height
[(original_width * ratio).floor, h.floor]
end
end
2013-11-26 01:36:13 +08:00
end
2013-11-06 02:04:47 +08:00
def get_size_from_image_sizes(src, image_sizes)
return unless image_sizes.present?
image_sizes.each do |image_size|
url, size = image_size[0], image_size[1]
if url && url.include?(src) &&
size && size["width"].to_i > 0 && size["height"].to_i > 0
return [size["width"], size["height"]]
end
2013-11-06 02:04:47 +08:00
end
end
2013-02-21 09:07:36 +08:00
def add_to_size_cache(url, w, h)
@size_cache[url] = [w, h]
end
2013-11-06 02:04:47 +08:00
def get_size(url)
return @size_cache[url] if @size_cache.has_key?(url)
2013-11-06 02:04:47 +08:00
absolute_url = url
absolute_url = Discourse.base_url_no_prefix + absolute_url if absolute_url =~ /^\/[^\/]/
return unless absolute_url
2013-11-06 02:04:47 +08:00
# FastImage fails when there's no scheme
absolute_url = SiteSetting.scheme + ":" + absolute_url if absolute_url.start_with?("//")
2013-11-06 02:04:47 +08:00
return unless is_valid_image_url?(absolute_url)
2013-11-06 02:04:47 +08:00
# we can *always* crawl our own images
2016-03-08 11:38:26 +08:00
return unless SiteSetting.crawl_images? || Discourse.store.has_been_uploaded?(url)
@size_cache[url] = FastImage.size(absolute_url)
rescue Zlib::BufError, URI::Error, OpenSSL::SSL::SSLError
# FastImage.size raises BufError for some gifs, leave it.
2013-06-18 04:46:48 +08:00
end
2013-11-06 02:04:47 +08:00
def is_valid_image_url?(url)
uri = URI.parse(url)
%w(http https).include? uri.scheme
rescue URI::Error
2013-02-19 14:57:14 +08:00
end
2013-11-06 02:04:47 +08:00
def convert_to_link!(img)
2013-02-19 14:57:14 +08:00
src = img["src"]
return if src.blank? || is_a_hyperlink?(img) || is_svg?(img)
2013-02-19 14:57:14 +08:00
width, height = img["width"].to_i, img["height"].to_i
# TODO: store original dimentions in db
original_width, original_height = (get_size(src) || [0, 0]).map(&:to_i)
2013-02-19 14:57:14 +08:00
# can't reach the image...
if original_width == 0 || original_height == 0
Rails.logger.info "Can't reach '#{src}' to get its dimension."
return
end
return if original_width <= width && original_height <= height
return if original_width <= SiteSetting.max_image_width && original_height <= SiteSetting.max_image_height
2013-07-08 07:39:08 +08:00
crop = SiteSetting.min_ratio_to_crop > 0
crop &&= original_width.to_f / original_height.to_f < SiteSetting.min_ratio_to_crop
if crop
width, height = ImageSizer.crop(original_width, original_height)
img["width"] = width
img["height"] = height
end
if upload = Upload.get_from_url(src)
upload.create_thumbnail!(width, height, crop: crop)
each_responsive_ratio do |ratio|
resized_w = (width * ratio).to_i
resized_h = (height * ratio).to_i
if upload.width && resized_w <= upload.width
upload.create_thumbnail!(resized_w, resized_h, crop: crop)
end
end
unless @disable_loading_image
upload.create_thumbnail!(LOADING_SIZE, LOADING_SIZE, format: 'png', colors: LOADING_COLORS)
end
2013-07-08 07:39:08 +08:00
end
2013-02-19 14:57:14 +08:00
add_lightbox!(img, original_width, original_height, upload, cropped: crop)
2013-07-08 07:39:08 +08:00
end
def loading_image(upload)
upload.thumbnail(LOADING_SIZE, LOADING_SIZE)
end
2013-11-06 02:04:47 +08:00
def is_a_hyperlink?(img)
2013-02-19 14:57:14 +08:00
parent = img.parent
while parent
return true if parent.name == "a"
parent = parent.parent if parent.respond_to?(:parent)
2013-02-19 14:57:14 +08:00
end
false
2013-07-08 07:39:08 +08:00
end
2013-02-19 14:57:14 +08:00
def each_responsive_ratio
SiteSetting
.responsive_post_image_sizes
.split('|')
.map(&:to_f)
.sort
.each { |r| yield r if r > 1 }
end
def add_lightbox!(img, original_width, original_height, upload, cropped: false)
2013-06-26 08:44:20 +08:00
# first, create a div to hold our lightbox
PERF: Improve quality of `PostSearchData#raw_data`. (#7275) This commit fixes the follow quality issue with `PostSearchData#raw_data`: 1. URLs are being tokenized and links with similar href and characters are being duplicated in the raw data. `Post#cooked`: ``` <p><a href=\"https://meta.discourse.org/some.png\" class=\"onebox\" target=\"_blank\" rel=\"nofollow noopener\">https://meta.discourse.org/some.png</a></p> ``` `PostSearchData#raw_data` Before: ``` This is a test topic 0 Uncategorized https://meta.discourse.org/some.png discourse org/some png https://meta.discourse.org/some.png discourse org/some png ``` `PostSearchData#raw_data` After: ``` This is a test topic 0 Uncategorized https://meta.discourse.org/some.png meta discourse org ``` 2. Ligthbox being included in search pollutes the `PostSearchData#raw_data` unncessarily. From 28 March 2018 to 28 March 2019, searches for the term `image` on `meta.discourse.org` had a click through rate of 2.1%. Non-lightboxed images are not included in indexing for search yet we were indexing content within a lightbox. Also, search for terms like `image` was affected we were using `Pasted image` as the filename for uploads that were pasted. `Post#cooked` ``` <p>Let me see how I can fix this image<br>\n<div class=\"lightbox-wrapper\"><a class=\"lightbox\" href=\"https://meta.discourse.org/some.png\" title=\"some.png\" rel=\"nofollow noopener\"><img src=\"https://meta.discourse.org/some.png\" width=\"275\" height=\"299\"><div class=\"meta\">\n<svg class=\"fa d-icon d-icon-far-image svg-icon\" aria-hidden=\"true\"><use xlink:href=\"#far-image\"></use></svg><span class=\"filename\">some.png</span><span class=\"informations\">1750×2000</span><svg class=\"fa d-icon d-icon-discourse-expand svg-icon\" aria-hidden=\"true\"><use xlink:href=\"#discourse-expand\"></use></svg>\n</div></a></div></p> ``` `PostSearchData#raw_data` Before: ``` This is a test topic 0 Uncategorized Let me see how I can fix this image some.png png https://meta.discourse.org/some.png discourse org/some png some.png png 1750×2000 ``` `PostSearchData#raw_data` After: ``` This is a test topic 0 Uncategorized Let me see how I can fix this image ``` In terms of indexing performance, we now have to parse the given HTML through nokogiri twice. However performance is not a huge worry here since a string length of 194170 takes only 30ms to scrub plus the indexing takes place in a background job.
2019-04-01 10:14:29 +08:00
lightbox = create_node("div", LIGHTBOX_WRAPPER_CSS_CLASS)
2013-07-08 07:39:08 +08:00
img.add_next_sibling(lightbox)
lightbox.add_child(img)
2013-06-26 08:44:20 +08:00
# then, the link to our larger image
a = create_link_node("lightbox", img["src"])
2013-02-19 14:57:14 +08:00
img.add_next_sibling(a)
if upload
a["data-download-href"] = Discourse.store.download_url(upload)
end
2013-02-19 14:57:14 +08:00
a.add_child(img)
2013-07-08 07:39:08 +08:00
# replace the image by its thumbnail
2013-11-06 02:04:47 +08:00
w, h = img["width"].to_i, img["height"].to_i
if upload
thumbnail = upload.thumbnail(w, h)
if thumbnail && thumbnail.filesize.to_i < upload.filesize
img["src"] = thumbnail.url
srcset = +""
each_responsive_ratio do |ratio|
resized_w = (w * ratio).to_i
resized_h = (h * ratio).to_i
if !cropped && upload.width && resized_w > upload.width
cooked_url = UrlHelper.cook_url(upload.url)
srcset << ", #{cooked_url} #{ratio.to_s.sub(/\.0$/, "")}x"
elsif t = upload.thumbnail(resized_w, resized_h)
cooked_url = UrlHelper.cook_url(t.url)
srcset << ", #{cooked_url} #{ratio.to_s.sub(/\.0$/, "")}x"
end
img["srcset"] = "#{UrlHelper.cook_url(img["src"])}#{srcset}" if srcset.present?
end
else
img["src"] = upload.url
end
if small_upload = loading_image(upload)
img["data-small-upload"] = small_upload.url
end
end
2013-07-08 07:39:08 +08:00
2013-06-26 08:44:20 +08:00
# then, some overlay informations
meta = create_node("div", "meta")
2013-07-08 07:39:08 +08:00
img.add_next_sibling(meta)
2013-06-22 00:29:40 +08:00
2013-11-06 02:04:47 +08:00
filename = get_filename(upload, img["src"])
informations = +"#{original_width}×#{original_height}"
informations << " #{upload.human_filesize}" if upload
2013-06-22 00:29:40 +08:00
2016-08-11 11:27:12 +08:00
a["title"] = CGI.escapeHTML(img["title"] || filename)
2013-11-30 03:03:39 +08:00
meta.add_child create_icon_node("far-image")
2016-08-11 11:27:12 +08:00
meta.add_child create_span_node("filename", a["title"])
2013-06-26 08:44:20 +08:00
meta.add_child create_span_node("informations", informations)
meta.add_child create_icon_node("discourse-expand")
2013-06-22 00:29:40 +08:00
end
2013-02-19 14:57:14 +08:00
def get_filename(upload, src)
return File.basename(src) unless upload
return upload.original_filename unless upload.original_filename =~ /^blob(\.png)?$/i
2013-11-06 02:04:47 +08:00
return I18n.t("upload.pasted_image_filename")
end
def create_node(tag_name, klass)
node = Nokogiri::XML::Node.new(tag_name, @doc)
node["class"] = klass if klass.present?
node
end
2017-07-28 09:20:09 +08:00
def create_span_node(klass, content = nil)
span = create_node("span", klass)
2013-06-22 00:29:40 +08:00
span.content = content if content
span
2013-02-06 03:16:51 +08:00
end
def create_icon_node(klass)
Upgrade to FontAwesome 5 (take two) (#6673) * Add missing icons to set * Revert FA5 revert This reverts commit 42572ff * use new SVG syntax in locales * Noscript page changes (remove login button, center "powered by" footer text) * Cast wider net for SVG icons in settings - include any _icon setting for SVG registry (offers better support for plugin settings) - let themes store multiple pipe-delimited icons in a setting - also replaces broken onebox image icon with SVG reference in cooked post processor * interpolate icons in locales * Fix composer whisper icon alignment * Add support for stacked icons * SECURITY: enforce hostname to match discourse hostname This ensures that the hostname rails uses for various helpers always matches the Discourse hostname * load SVG sprite with pre-initializers * FIX: enable caching on SVG sprites * PERF: use JSONP for SVG sprites so they are served from CDN This avoids needing to deal with CORS for loading of the SVG Note, added the svg- prefix to the filename so we can quickly tell in dev tools what the file is * Add missing SVG sprite JSONP script to CSP * Upgrade to FA 5.5.0 * Add support for all FA4.7 icons - adds complete frontend and backend for renamed FA4.7 icons - improves performance of SvgSprite.bundle and SvgSprite.all_icons * Fix group avatar flair preview - adds an endpoint at /svg-sprites/search/:keyword - adds frontend ajax call that pulls icon in avatar flair preview even when it is not in subset * Remove FA 4.7 font files
2018-11-27 05:49:57 +08:00
icon = create_node("svg", "fa d-icon d-icon-#{klass} svg-icon")
icon.set_attribute("aria-hidden", "true")
icon << "<use xlink:href=\"##{klass}\"></use>"
end
def create_link_node(klass, url, external = false)
a = create_node("a", klass)
a["href"] = url
if external
a["target"] = "_blank"
a["rel"] = "nofollow noopener"
end
a
end
def update_post_image
img = extract_images_for_post.first
return if img.blank?
if img["src"].present?
@post.update_column(:image_url, img["src"][0...255]) # post
@post.topic.update_column(:image_url, img["src"][0...255]) if @post.is_first_post? # topic
2013-07-08 07:39:08 +08:00
end
end
2013-11-06 02:04:47 +08:00
def post_process_oneboxes
limit = SiteSetting.max_oneboxes_per_post
oneboxes = {}
inlineOneboxes = {}
Oneboxer.apply(@doc, extra_paths: [".#{INLINE_ONEBOX_LOADING_CSS_CLASS}"]) do |url, element|
is_onebox = element["class"] == Oneboxer::ONEBOX_CSS_CLASS
map = is_onebox ? oneboxes : inlineOneboxes
skip_onebox = limit <= 0 && !map[url]
if skip_onebox
if is_onebox
element.remove_class('onebox')
else
remove_inline_onebox_loading_class(element)
end
next
end
limit -= 1
map[url] = true
if is_onebox
onebox = Oneboxer.onebox(url,
invalidate_oneboxes: !!@opts[:invalidate_oneboxes],
user_id: @post&.user_id,
category_id: @post&.topic&.category_id
)
@has_oneboxes = true if onebox.present?
onebox
else
process_inline_onebox(element)
false
end
end
2017-10-24 01:09:38 +08:00
2017-06-02 17:39:06 +08:00
oneboxed_images.each do |img|
next if img["src"].blank?
src = img["src"].sub(/^https?:/i, "")
parent = img.parent
img_classes = (img["class"] || "").split(" ")
link_classes = ((parent&.name == "a" && parent["class"]) || "").split(" ")
if img_classes.include?("onebox") || link_classes.include?("onebox")
next if add_image_placeholder!(img)
elsif large_images.include?(src) || broken_images.include?(src)
img.remove
next
end
upload_id = downloaded_images[src]
upload = Upload.find_by_id(upload_id) if upload_id
img["src"] = upload.url if upload.present?
2017-10-24 01:09:38 +08:00
# make sure we grab dimensions for oneboxed images
# and wrap in a div
limit_size!(img)
next if img["class"]&.include?('onebox-avatar')
parent = parent&.parent if parent&.name == "a"
parent_class = parent && parent["class"]
width = img["width"].to_i
height = img["height"].to_i
if parent_class&.include?("onebox-body") && width > 0 && height > 0
# special instruction for width == height, assume we are dealing with an avatar
if (img["width"].to_i == img["height"].to_i)
found = false
parent = img
while parent = parent.parent
if parent["class"] && parent["class"].include?("whitelistedgeneric")
found = true
break
end
end
if found
img["class"] = img["class"].to_s + " onebox-avatar"
next
end
end
2017-11-28 09:32:35 +08:00
if width < 64 && height < 64
img["class"] = img["class"].to_s + " onebox-full-image"
else
img.delete('width')
img.delete('height')
new_parent = img.add_next_sibling("<div class='aspect-image' style='--aspect-ratio:#{width}/#{height};'/>")
new_parent.first.add_child(img)
end
elsif (parent_class&.include?("instagram-images") || parent_class&.include?("tweet-images") || parent_class&.include?("scale-images")) && width > 0 && height > 0
img.remove_attribute("width")
img.remove_attribute("height")
parent["class"] = "aspect-image-full-size"
parent["style"] = "--aspect-ratio:#{width}/#{height};"
end
end
if @omit_nofollow || !SiteSetting.add_rel_nofollow_to_user_content
@doc.css(".onebox-body a, .onebox a").each { |a| a.remove_attribute("rel") }
end
2013-02-06 03:16:51 +08:00
end
2013-11-06 02:04:47 +08:00
def optimize_urls
%w{href data-download-href}.each do |selector|
@doc.css("a[#{selector}]").each do |a|
a[selector] = UrlHelper.cook_url(a[selector].to_s)
end
2013-11-06 02:04:47 +08:00
end
%w{src data-small-upload}.each do |selector|
@doc.css("img[#{selector}]").each do |img|
img[selector] = UrlHelper.cook_url(img[selector].to_s)
end
2013-11-06 02:04:47 +08:00
end
2013-02-06 03:16:51 +08:00
end
2017-10-24 01:09:38 +08:00
def remove_user_ids
@doc.css("a[href]").each do |a|
uri = begin
URI(a["href"])
rescue URI::Error
next
end
next if uri.hostname != Discourse.current_hostname
query = Rack::Utils.parse_nested_query(uri.query)
next if !query.delete("u")
uri.query = query.map { |k, v| "#{k}=#{v}" }.join("&").presence
a["href"] = uri.to_s
end
end
def enforce_nofollow
if !@omit_nofollow && SiteSetting.add_rel_nofollow_to_user_content
PrettyText.add_rel_nofollow_to_user_content(@doc)
end
end
2013-02-06 03:16:51 +08:00
2013-11-22 08:52:26 +08:00
def pull_hotlinked_images(bypass_bump = false)
2013-11-15 23:46:41 +08:00
# have we enough disk space?
disable_if_low_on_disk_space # But still enqueue the job
# don't download remote images for posts that are more than n days old
return unless @post.created_at > (Date.today - SiteSetting.download_remote_images_max_days_old)
2013-11-06 02:04:47 +08:00
# we only want to run the job whenever it's changed by a user
return if @post.last_editor_id && @post.last_editor_id <= 0
2013-11-06 02:04:47 +08:00
# make sure no other job is scheduled
Jobs.cancel_scheduled_job(:pull_hotlinked_images, post_id: @post.id)
# schedule the job
delay = SiteSetting.editing_grace_period + 1
2013-11-22 08:52:26 +08:00
Jobs.enqueue_in(delay.seconds.to_i, :pull_hotlinked_images, post_id: @post.id, bypass_bump: bypass_bump)
2013-07-11 04:55:37 +08:00
end
2013-11-15 23:46:41 +08:00
def disable_if_low_on_disk_space
return false if !SiteSetting.download_remote_images_to_local
return false if available_disk_space >= SiteSetting.download_remote_images_threshold
return false if Discourse.store.external?
SiteSetting.download_remote_images_to_local = false
# log the site setting change
reason = I18n.t("disable_remote_images_download_reason")
staff_action_logger = StaffActionLogger.new(Discourse.system_user)
2017-07-28 09:20:09 +08:00
staff_action_logger.log_site_setting_change("download_remote_images_to_local", true, false, details: reason)
# also send a private message to the site contact user
notify_about_low_disk_space
true
2013-11-15 23:46:41 +08:00
end
def notify_about_low_disk_space
SystemMessage.create_from_system_user(Discourse.site_contact_user, :download_remote_images_disabled)
end
2013-11-15 23:46:41 +08:00
def available_disk_space
100 - `df -P #{Rails.root}/public/uploads | tail -1 | tr -s ' ' | cut -d ' ' -f 5`.to_i
2013-11-15 23:46:41 +08:00
end
def dirty?
@previous_cooked != html
end
def html
@doc.try(:to_html)
2013-02-06 03:16:51 +08:00
end
private
def post_process_images
extract_images.each do |img|
unless add_image_placeholder!(img)
limit_size!(img)
convert_to_link!(img)
end
end
end
def process_inline_onebox(element)
inline_onebox = InlineOneboxer.lookup(
element.attributes["href"].value,
invalidate: !!@opts[:invalidate_oneboxes]
)
if title = inline_onebox&.dig(:title)
element.children = CGI.escapeHTML(title)
element.add_class(INLINE_ONEBOX_CSS_CLASS)
end
remove_inline_onebox_loading_class(element)
end
def remove_inline_onebox_loading_class(element)
element.remove_class(INLINE_ONEBOX_LOADING_CSS_CLASS)
end
def is_svg?(img)
path =
begin
URI(img["src"]).path
rescue URI::Error
nil
end
File.extname(path) == '.svg' if path
end
2013-02-06 03:16:51 +08:00
end