2013-07-07 01:10:53 +08:00
|
|
|
# Post processing that we can do after a post has already been cooked.
|
2013-07-08 07:39:08 +08:00
|
|
|
# For example, inserting the onebox content, or image sizes/thumbnails.
|
2013-02-06 03:16:51 +08:00
|
|
|
|
2013-11-20 20:10:08 +08:00
|
|
|
require_dependency 'url_helper'
|
2013-02-06 03:16:51 +08:00
|
|
|
|
|
|
|
class CookedPostProcessor
|
2013-06-22 00:29:40 +08:00
|
|
|
include ActionView::Helpers::NumberHelper
|
2013-02-19 14:57:14 +08:00
|
|
|
|
2013-02-06 03:16:51 +08:00
|
|
|
def initialize(post, opts={})
|
|
|
|
@dirty = false
|
|
|
|
@opts = opts
|
|
|
|
@post = post
|
2013-12-06 18:16:13 +08:00
|
|
|
@previous_cooked = (@post.cooked || "").dup
|
2015-09-30 00:51:26 +08:00
|
|
|
# NOTE: we re-cook the post here in order to prevent timing issues with edits
|
|
|
|
# cf. https://meta.discourse.org/t/edit-of-rebaked-post-doesnt-show-in-html-only-in-raw/33815/6
|
|
|
|
cooking_options = post.cooking_options || opts[:cooking_options] || {}
|
|
|
|
cooking_options[:topic_id] = post.topic_id
|
|
|
|
@doc = Nokogiri::HTML::fragment(post.cook(post.raw, cooking_options.symbolize_keys))
|
2013-02-19 14:57:14 +08:00
|
|
|
@size_cache = {}
|
2013-02-06 03:16:51 +08:00
|
|
|
end
|
|
|
|
|
2013-11-22 08:52:26 +08:00
|
|
|
def post_process(bypass_bump = false)
|
2015-08-14 11:05:13 +08:00
|
|
|
DistributedMutex.synchronize("post_process_#{@post.id}") do
|
|
|
|
keep_reverse_index_up_to_date
|
|
|
|
post_process_images
|
|
|
|
post_process_oneboxes
|
|
|
|
optimize_urls
|
|
|
|
pull_hotlinked_images(bypass_bump)
|
|
|
|
end
|
2013-02-06 03:16:51 +08:00
|
|
|
end
|
|
|
|
|
2013-11-06 02:04:47 +08:00
|
|
|
def keep_reverse_index_up_to_date
|
|
|
|
upload_ids = Set.new
|
2013-10-14 20:27:41 +08:00
|
|
|
|
2014-07-18 23:54:18 +08:00
|
|
|
@doc.css("a[href]").each do |a|
|
2013-11-06 02:04:47 +08:00
|
|
|
href = a["href"].to_s
|
2013-07-11 04:55:37 +08:00
|
|
|
if upload = Upload.get_from_url(href)
|
2013-11-06 02:04:47 +08:00
|
|
|
upload_ids << upload.id
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2014-07-18 23:54:18 +08:00
|
|
|
@doc.css("img[src]").each do |img|
|
2013-11-06 02:04:47 +08:00
|
|
|
src = img["src"].to_s
|
|
|
|
if upload = Upload.get_from_url(src)
|
|
|
|
upload_ids << upload.id
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
values = upload_ids.map{ |u| "(#{@post.id},#{u})" }.join(",")
|
|
|
|
PostUpload.transaction do
|
|
|
|
PostUpload.delete_all(post_id: @post.id)
|
|
|
|
if upload_ids.length > 0
|
|
|
|
PostUpload.exec_sql("INSERT INTO post_uploads (post_id, upload_id) VALUES #{values}")
|
2013-07-11 04:55:37 +08:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2013-02-26 00:42:20 +08:00
|
|
|
def post_process_images
|
2013-07-08 07:39:08 +08:00
|
|
|
images = extract_images
|
2013-07-07 01:10:53 +08:00
|
|
|
return if images.blank?
|
2013-02-06 03:16:51 +08:00
|
|
|
|
2013-02-19 14:57:14 +08:00
|
|
|
images.each do |img|
|
2013-11-06 02:04:47 +08:00
|
|
|
limit_size!(img)
|
|
|
|
convert_to_link!(img)
|
2013-04-13 22:31:20 +08:00
|
|
|
end
|
2013-02-19 14:57:14 +08:00
|
|
|
|
2015-10-15 17:00:47 +08:00
|
|
|
update_topic_image
|
2013-06-15 18:29:20 +08:00
|
|
|
end
|
2013-04-13 22:31:20 +08:00
|
|
|
|
2013-07-08 07:39:08 +08:00
|
|
|
def extract_images
|
2014-07-18 23:54:18 +08:00
|
|
|
# all image with a src attribute
|
|
|
|
@doc.css("img[src]") -
|
|
|
|
# minus, data images
|
|
|
|
@doc.css("img[src^='data']") -
|
2015-08-05 18:57:31 +08:00
|
|
|
# minus, emojis
|
|
|
|
@doc.css("img.emoji") -
|
2014-07-18 23:54:18 +08:00
|
|
|
# minus, image inside oneboxes
|
2014-07-21 21:59:34 +08:00
|
|
|
oneboxed_images -
|
2014-08-19 20:40:23 +08:00
|
|
|
# minus, images inside quotes
|
2014-07-18 23:54:18 +08:00
|
|
|
@doc.css(".quote img")
|
2013-07-08 07:39:08 +08:00
|
|
|
end
|
|
|
|
|
2015-10-15 17:00:47 +08:00
|
|
|
def extract_images_for_topic
|
|
|
|
# all image with a src attribute
|
|
|
|
@doc.css("img[src]") -
|
|
|
|
# minus, emojis
|
|
|
|
@doc.css("img.emoji") -
|
|
|
|
# minus, image inside oneboxes
|
|
|
|
oneboxed_images -
|
|
|
|
# minus, images inside quotes
|
|
|
|
@doc.css(".quote img")
|
|
|
|
end
|
|
|
|
|
2014-07-21 21:59:34 +08:00
|
|
|
def oneboxed_images
|
|
|
|
@doc.css(".onebox-result img, .onebox img")
|
|
|
|
end
|
|
|
|
|
2013-11-06 02:04:47 +08:00
|
|
|
def limit_size!(img)
|
2013-11-26 01:36:13 +08:00
|
|
|
# retrieve the size from
|
|
|
|
# 1) the width/height attributes
|
|
|
|
# 2) the dimension from the preview (image_sizes)
|
|
|
|
# 3) the dimension of the original image (HTTP request)
|
|
|
|
w, h = get_size_from_attributes(img) ||
|
|
|
|
get_size_from_image_sizes(img["src"], @opts[:image_sizes]) ||
|
|
|
|
get_size(img["src"])
|
2013-11-06 02:04:47 +08:00
|
|
|
# limit the size of the thumbnail
|
|
|
|
img["width"], img["height"] = ImageSizer.resize(w, h)
|
2013-07-08 07:39:08 +08:00
|
|
|
end
|
|
|
|
|
2013-11-26 01:36:13 +08:00
|
|
|
def get_size_from_attributes(img)
|
|
|
|
w, h = img["width"].to_i, img["height"].to_i
|
2015-08-30 05:56:25 +08:00
|
|
|
return [w, h] unless w <= 0 || h <= 0
|
|
|
|
# if only width or height are specified attempt to scale image
|
|
|
|
if w > 0 || h > 0
|
|
|
|
w = w.to_f
|
|
|
|
h = h.to_f
|
|
|
|
original_width, original_height = get_size(img["src"]).map {|integer| integer.to_f}
|
|
|
|
if w > 0
|
|
|
|
ratio = w/original_width
|
|
|
|
return [w.floor, (original_height*ratio).floor]
|
|
|
|
else
|
|
|
|
ratio = h/original_height
|
|
|
|
return [(original_width*ratio).floor, h.floor]
|
|
|
|
end
|
|
|
|
end
|
2013-11-26 01:36:13 +08:00
|
|
|
end
|
|
|
|
|
2013-11-06 02:04:47 +08:00
|
|
|
def get_size_from_image_sizes(src, image_sizes)
|
|
|
|
return unless image_sizes.present?
|
|
|
|
image_sizes.each do |image_size|
|
|
|
|
url, size = image_size[0], image_size[1]
|
2015-03-17 01:57:15 +08:00
|
|
|
if url && url.include?(src) &&
|
|
|
|
size && size["width"].to_i > 0 && size["height"].to_i > 0
|
|
|
|
return [size["width"], size["height"]]
|
|
|
|
end
|
2013-11-06 02:04:47 +08:00
|
|
|
end
|
2013-06-15 18:29:20 +08:00
|
|
|
end
|
2013-02-21 09:07:36 +08:00
|
|
|
|
2013-11-06 02:04:47 +08:00
|
|
|
def get_size(url)
|
2015-08-08 01:31:15 +08:00
|
|
|
return @size_cache[url] if @size_cache.has_key?(url)
|
|
|
|
|
2013-11-06 02:04:47 +08:00
|
|
|
absolute_url = url
|
|
|
|
absolute_url = Discourse.base_url_no_prefix + absolute_url if absolute_url =~ /^\/[^\/]/
|
|
|
|
# FastImage fails when there's no scheme
|
2013-12-16 18:44:59 +08:00
|
|
|
absolute_url = SiteSetting.scheme + ":" + absolute_url if absolute_url.start_with?("//")
|
2015-08-08 01:31:15 +08:00
|
|
|
|
2013-11-06 02:04:47 +08:00
|
|
|
return unless is_valid_image_url?(absolute_url)
|
2015-08-08 01:31:15 +08:00
|
|
|
|
2013-11-06 02:04:47 +08:00
|
|
|
# we can *always* crawl our own images
|
|
|
|
return unless SiteSetting.crawl_images? || Discourse.store.has_been_uploaded?(url)
|
2015-08-08 01:31:15 +08:00
|
|
|
|
2013-11-06 02:04:47 +08:00
|
|
|
@size_cache[url] ||= FastImage.size(absolute_url)
|
|
|
|
rescue Zlib::BufError # FastImage.size raises BufError for some gifs
|
2013-06-18 04:46:48 +08:00
|
|
|
end
|
|
|
|
|
2013-11-06 02:04:47 +08:00
|
|
|
def is_valid_image_url?(url)
|
|
|
|
uri = URI.parse(url)
|
|
|
|
%w(http https).include? uri.scheme
|
|
|
|
rescue URI::InvalidURIError
|
2013-02-19 14:57:14 +08:00
|
|
|
end
|
|
|
|
|
2013-11-06 02:04:47 +08:00
|
|
|
def convert_to_link!(img)
|
2013-02-19 14:57:14 +08:00
|
|
|
src = img["src"]
|
2013-07-07 01:10:53 +08:00
|
|
|
return unless src.present?
|
2013-02-19 14:57:14 +08:00
|
|
|
|
2013-07-07 01:10:53 +08:00
|
|
|
width, height = img["width"].to_i, img["height"].to_i
|
2013-04-13 22:31:20 +08:00
|
|
|
original_width, original_height = get_size(src)
|
2013-02-19 14:57:14 +08:00
|
|
|
|
2015-08-08 01:31:15 +08:00
|
|
|
# can't reach the image...
|
|
|
|
if original_width.nil? || original_height.nil?
|
2015-08-12 22:10:42 +08:00
|
|
|
Rails.logger.info "Can't reach '#{src}' to get its dimension."
|
2015-08-08 01:31:15 +08:00
|
|
|
return
|
|
|
|
end
|
|
|
|
|
2013-07-08 07:39:08 +08:00
|
|
|
return if original_width.to_i <= width && original_height.to_i <= height
|
2013-08-26 06:24:24 +08:00
|
|
|
return if original_width.to_i <= SiteSetting.max_image_width && original_height.to_i <= SiteSetting.max_image_height
|
|
|
|
|
2013-11-06 02:04:47 +08:00
|
|
|
return if is_a_hyperlink?(img)
|
2013-07-08 07:39:08 +08:00
|
|
|
|
2013-11-06 02:04:47 +08:00
|
|
|
if upload = Upload.get_from_url(src)
|
2013-09-27 16:55:50 +08:00
|
|
|
upload.create_thumbnail!(width, height)
|
2013-07-08 07:39:08 +08:00
|
|
|
end
|
2013-02-19 14:57:14 +08:00
|
|
|
|
2013-07-08 07:39:08 +08:00
|
|
|
add_lightbox!(img, original_width, original_height, upload)
|
|
|
|
end
|
|
|
|
|
2013-11-06 02:04:47 +08:00
|
|
|
def is_a_hyperlink?(img)
|
2013-02-19 14:57:14 +08:00
|
|
|
parent = img.parent
|
|
|
|
while parent
|
2013-11-20 20:10:08 +08:00
|
|
|
return true if parent.name == "a"
|
2013-02-19 14:57:14 +08:00
|
|
|
break unless parent.respond_to? :parent
|
|
|
|
parent = parent.parent
|
|
|
|
end
|
2013-11-20 20:10:08 +08:00
|
|
|
false
|
2013-07-08 07:39:08 +08:00
|
|
|
end
|
2013-02-19 14:57:14 +08:00
|
|
|
|
2013-07-08 07:39:08 +08:00
|
|
|
def add_lightbox!(img, original_width, original_height, upload=nil)
|
2013-06-26 08:44:20 +08:00
|
|
|
# first, create a div to hold our lightbox
|
2013-07-08 07:39:08 +08:00
|
|
|
lightbox = Nokogiri::XML::Node.new("div", @doc)
|
2013-12-02 17:06:48 +08:00
|
|
|
lightbox["class"] = "lightbox-wrapper"
|
2013-07-08 07:39:08 +08:00
|
|
|
img.add_next_sibling(lightbox)
|
|
|
|
lightbox.add_child(img)
|
|
|
|
|
2013-06-26 08:44:20 +08:00
|
|
|
# then, the link to our larger image
|
2013-07-08 07:39:08 +08:00
|
|
|
a = Nokogiri::XML::Node.new("a", @doc)
|
2013-02-19 14:57:14 +08:00
|
|
|
img.add_next_sibling(a)
|
2014-10-16 01:20:04 +08:00
|
|
|
|
|
|
|
if upload && Discourse.store.internal?
|
|
|
|
a["data-download-href"] = Discourse.store.download_url(upload)
|
|
|
|
end
|
|
|
|
|
2013-11-06 02:04:47 +08:00
|
|
|
a["href"] = img["src"]
|
2013-02-19 14:57:14 +08:00
|
|
|
a["class"] = "lightbox"
|
|
|
|
a.add_child(img)
|
2013-07-08 07:39:08 +08:00
|
|
|
|
|
|
|
# replace the image by its thumbnail
|
2013-11-06 02:04:47 +08:00
|
|
|
w, h = img["width"].to_i, img["height"].to_i
|
|
|
|
img["src"] = upload.thumbnail(w, h).url if upload && upload.has_thumbnail?(w, h)
|
2013-07-08 07:39:08 +08:00
|
|
|
|
2013-06-26 08:44:20 +08:00
|
|
|
# then, some overlay informations
|
2013-07-08 07:39:08 +08:00
|
|
|
meta = Nokogiri::XML::Node.new("div", @doc)
|
2013-06-26 08:44:20 +08:00
|
|
|
meta["class"] = "meta"
|
2013-07-08 07:39:08 +08:00
|
|
|
img.add_next_sibling(meta)
|
2013-06-22 00:29:40 +08:00
|
|
|
|
2013-11-06 02:04:47 +08:00
|
|
|
filename = get_filename(upload, img["src"])
|
2013-06-22 00:29:40 +08:00
|
|
|
informations = "#{original_width}x#{original_height}"
|
2013-07-24 15:24:28 +08:00
|
|
|
informations << " #{number_to_human_size(upload.filesize)}" if upload
|
2013-06-22 00:29:40 +08:00
|
|
|
|
2014-11-04 05:03:06 +08:00
|
|
|
a["title"] = img["title"] || filename
|
2013-11-30 03:03:39 +08:00
|
|
|
|
2014-11-04 05:03:06 +08:00
|
|
|
meta.add_child create_span_node("filename", img["title"] || filename)
|
2013-06-26 08:44:20 +08:00
|
|
|
meta.add_child create_span_node("informations", informations)
|
|
|
|
meta.add_child create_span_node("expand")
|
2013-06-22 00:29:40 +08:00
|
|
|
end
|
2013-02-19 14:57:14 +08:00
|
|
|
|
2013-06-27 03:53:31 +08:00
|
|
|
def get_filename(upload, src)
|
|
|
|
return File.basename(src) unless upload
|
2013-07-04 06:39:23 +08:00
|
|
|
return upload.original_filename unless upload.original_filename =~ /^blob(\.png)?$/i
|
2013-11-06 02:04:47 +08:00
|
|
|
return I18n.t("upload.pasted_image_filename")
|
2013-06-27 03:53:31 +08:00
|
|
|
end
|
|
|
|
|
2013-06-22 00:29:40 +08:00
|
|
|
def create_span_node(klass, content=nil)
|
2013-07-08 07:39:08 +08:00
|
|
|
span = Nokogiri::XML::Node.new("span", @doc)
|
2013-06-22 00:29:40 +08:00
|
|
|
span.content = content if content
|
2013-11-06 02:04:47 +08:00
|
|
|
span["class"] = klass
|
2013-06-22 00:29:40 +08:00
|
|
|
span
|
2013-02-06 03:16:51 +08:00
|
|
|
end
|
|
|
|
|
2015-10-15 17:00:47 +08:00
|
|
|
def update_topic_image
|
2015-04-24 01:33:29 +08:00
|
|
|
if @post.is_first_post?
|
2015-10-15 17:00:47 +08:00
|
|
|
img = extract_images_for_topic.first
|
2015-06-11 00:53:14 +08:00
|
|
|
@post.topic.update_column(:image_url, img["src"][0...255]) if img["src"].present?
|
2013-07-08 07:39:08 +08:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2013-11-06 02:04:47 +08:00
|
|
|
def post_process_oneboxes
|
|
|
|
args = {
|
|
|
|
post_id: @post.id,
|
|
|
|
invalidate_oneboxes: !!@opts[:invalidate_oneboxes],
|
|
|
|
}
|
|
|
|
|
2014-07-21 21:59:34 +08:00
|
|
|
# apply oneboxes
|
2015-09-25 18:14:38 +08:00
|
|
|
Oneboxer.apply(@doc, topic_id: @post.topic_id) { |url|
|
2015-09-25 12:52:43 +08:00
|
|
|
Oneboxer.onebox(url, args)
|
|
|
|
}
|
2014-07-21 21:59:34 +08:00
|
|
|
|
|
|
|
# make sure we grab dimensions for oneboxed images
|
2014-09-08 17:02:54 +08:00
|
|
|
oneboxed_images.each { |img| limit_size!(img) }
|
2013-02-06 03:16:51 +08:00
|
|
|
end
|
|
|
|
|
2013-11-06 02:04:47 +08:00
|
|
|
def optimize_urls
|
2014-10-16 01:20:04 +08:00
|
|
|
%w{href data-download-href}.each do |selector|
|
|
|
|
@doc.css("a[#{selector}]").each do |a|
|
|
|
|
href = a["#{selector}"].to_s
|
2015-06-12 18:02:36 +08:00
|
|
|
a["#{selector}"] = UrlHelper.schemaless UrlHelper.absolute(href) if UrlHelper.is_local(href)
|
2014-10-16 01:20:04 +08:00
|
|
|
end
|
2013-11-06 02:04:47 +08:00
|
|
|
end
|
|
|
|
|
2014-07-18 23:54:18 +08:00
|
|
|
@doc.css("img[src]").each do |img|
|
2013-11-06 02:04:47 +08:00
|
|
|
src = img["src"].to_s
|
2015-06-12 18:02:36 +08:00
|
|
|
img["src"] = UrlHelper.schemaless UrlHelper.absolute(src) if UrlHelper.is_local(src)
|
2013-11-06 02:04:47 +08:00
|
|
|
end
|
2013-02-06 03:16:51 +08:00
|
|
|
end
|
|
|
|
|
2013-11-22 08:52:26 +08:00
|
|
|
def pull_hotlinked_images(bypass_bump = false)
|
2013-11-15 22:22:18 +08:00
|
|
|
# is the job enabled?
|
|
|
|
return unless SiteSetting.download_remote_images_to_local?
|
2013-11-15 23:46:41 +08:00
|
|
|
# have we enough disk space?
|
|
|
|
return if disable_if_low_on_disk_space
|
2013-11-06 02:04:47 +08:00
|
|
|
# we only want to run the job whenever it's changed by a user
|
2013-12-12 10:41:34 +08:00
|
|
|
return if @post.last_editor_id == Discourse.system_user.id
|
2013-11-06 02:04:47 +08:00
|
|
|
# make sure no other job is scheduled
|
|
|
|
Jobs.cancel_scheduled_job(:pull_hotlinked_images, post_id: @post.id)
|
|
|
|
# schedule the job
|
|
|
|
delay = SiteSetting.ninja_edit_window + 1
|
2013-11-22 08:52:26 +08:00
|
|
|
Jobs.enqueue_in(delay.seconds.to_i, :pull_hotlinked_images, post_id: @post.id, bypass_bump: bypass_bump)
|
2013-07-11 04:55:37 +08:00
|
|
|
end
|
|
|
|
|
2013-11-15 23:46:41 +08:00
|
|
|
def disable_if_low_on_disk_space
|
2014-10-16 01:20:04 +08:00
|
|
|
return false if available_disk_space >= SiteSetting.download_remote_images_threshold
|
|
|
|
|
|
|
|
SiteSetting.download_remote_images_to_local = false
|
|
|
|
# log the site setting change
|
|
|
|
reason = I18n.t("disable_remote_images_download_reason")
|
|
|
|
staff_action_logger = StaffActionLogger.new(Discourse.system_user)
|
|
|
|
staff_action_logger.log_site_setting_change("download_remote_images_to_local", true, false, { details: reason })
|
2015-08-15 05:46:15 +08:00
|
|
|
|
2014-10-16 01:20:04 +08:00
|
|
|
# also send a private message to the site contact user
|
2015-08-15 05:46:15 +08:00
|
|
|
notify_about_low_disk_space
|
2014-10-16 01:20:04 +08:00
|
|
|
|
|
|
|
true
|
2013-11-15 23:46:41 +08:00
|
|
|
end
|
|
|
|
|
2015-08-15 05:46:15 +08:00
|
|
|
def notify_about_low_disk_space
|
|
|
|
SystemMessage.create_from_system_user(Discourse.site_contact_user, :download_remote_images_disabled)
|
|
|
|
end
|
|
|
|
|
2013-11-15 23:46:41 +08:00
|
|
|
def available_disk_space
|
2015-01-27 05:25:32 +08:00
|
|
|
100 - `df -P #{Rails.root}/public/uploads | tail -1 | tr -s ' ' | cut -d ' ' -f 5`.to_i
|
2013-11-15 23:46:41 +08:00
|
|
|
end
|
|
|
|
|
2013-06-15 18:29:20 +08:00
|
|
|
def dirty?
|
2013-12-06 18:16:13 +08:00
|
|
|
@previous_cooked != html
|
2013-06-15 18:29:20 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
def html
|
|
|
|
@doc.try(:to_html)
|
2013-02-06 03:16:51 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
end
|