discourse/lib/cooked_post_processor.rb

258 lines
7.4 KiB
Ruby
Raw Normal View History

# Post processing that we can do after a post has already been cooked.
2013-07-08 07:39:08 +08:00
# For example, inserting the onebox content, or image sizes/thumbnails.
2013-02-06 03:16:51 +08:00
2013-11-06 02:04:47 +08:00
require_dependency "oneboxer"
require_dependency 'url_helper'
2013-02-06 03:16:51 +08:00
class CookedPostProcessor
2013-06-22 00:29:40 +08:00
include ActionView::Helpers::NumberHelper
include UrlHelper
2013-02-19 14:57:14 +08:00
2013-02-06 03:16:51 +08:00
def initialize(post, opts={})
@dirty = false
@opts = opts
@post = post
@previous_cooked = (@post.cooked || "").dup
@doc = Nokogiri::HTML::fragment(post.cooked)
2013-02-19 14:57:14 +08:00
@size_cache = {}
2013-02-06 03:16:51 +08:00
end
2013-11-22 08:52:26 +08:00
def post_process(bypass_bump = false)
2013-11-06 02:04:47 +08:00
keep_reverse_index_up_to_date
post_process_images
post_process_oneboxes
2013-11-06 02:04:47 +08:00
optimize_urls
2013-11-22 08:52:26 +08:00
pull_hotlinked_images(bypass_bump)
2013-02-06 03:16:51 +08:00
end
2013-11-06 02:04:47 +08:00
def keep_reverse_index_up_to_date
upload_ids = Set.new
2013-10-14 20:27:41 +08:00
2013-11-06 02:04:47 +08:00
@doc.search("a").each do |a|
href = a["href"].to_s
2013-07-11 04:55:37 +08:00
if upload = Upload.get_from_url(href)
2013-11-06 02:04:47 +08:00
upload_ids << upload.id
end
end
@doc.search("img").each do |img|
src = img["src"].to_s
if upload = Upload.get_from_url(src)
upload_ids << upload.id
end
end
values = upload_ids.map{ |u| "(#{@post.id},#{u})" }.join(",")
PostUpload.transaction do
PostUpload.delete_all(post_id: @post.id)
if upload_ids.length > 0
PostUpload.exec_sql("INSERT INTO post_uploads (post_id, upload_id) VALUES #{values}")
2013-07-11 04:55:37 +08:00
end
end
end
2013-02-26 00:42:20 +08:00
def post_process_images
2013-07-08 07:39:08 +08:00
images = extract_images
return if images.blank?
2013-02-06 03:16:51 +08:00
2013-02-19 14:57:14 +08:00
images.each do |img|
2013-11-06 02:04:47 +08:00
src, width, height = img["src"], img["width"], img["height"]
limit_size!(img)
convert_to_link!(img)
2013-04-13 22:31:20 +08:00
end
2013-02-19 14:57:14 +08:00
2013-11-06 02:04:47 +08:00
update_topic_image(images)
end
2013-04-13 22:31:20 +08:00
2013-07-08 07:39:08 +08:00
def extract_images
2013-11-06 02:04:47 +08:00
# do not extract images inside oneboxes or quotes
2013-07-08 07:39:08 +08:00
@doc.css("img") - @doc.css(".onebox-result img") - @doc.css(".quote img")
end
2013-11-06 02:04:47 +08:00
def limit_size!(img)
2013-11-26 01:36:13 +08:00
# retrieve the size from
# 1) the width/height attributes
# 2) the dimension from the preview (image_sizes)
# 3) the dimension of the original image (HTTP request)
w, h = get_size_from_attributes(img) ||
get_size_from_image_sizes(img["src"], @opts[:image_sizes]) ||
get_size(img["src"])
2013-11-06 02:04:47 +08:00
# limit the size of the thumbnail
img["width"], img["height"] = ImageSizer.resize(w, h)
2013-07-08 07:39:08 +08:00
end
2013-11-26 01:36:13 +08:00
def get_size_from_attributes(img)
w, h = img["width"].to_i, img["height"].to_i
return [w, h] if w > 0 && h > 0
end
2013-11-06 02:04:47 +08:00
def get_size_from_image_sizes(src, image_sizes)
return unless image_sizes.present?
image_sizes.each do |image_size|
url, size = image_size[0], image_size[1]
return [size["width"], size["height"]] if url.include?(src)
end
end
2013-02-21 09:07:36 +08:00
2013-11-06 02:04:47 +08:00
def get_size(url)
absolute_url = url
absolute_url = Discourse.base_url_no_prefix + absolute_url if absolute_url =~ /^\/[^\/]/
# FastImage fails when there's no scheme
absolute_url = (SiteSetting.use_ssl? ? "https:" : "http:") + absolute_url if absolute_url.start_with?("//")
return unless is_valid_image_url?(absolute_url)
# we can *always* crawl our own images
return unless SiteSetting.crawl_images? || Discourse.store.has_been_uploaded?(url)
@size_cache[url] ||= FastImage.size(absolute_url)
rescue Zlib::BufError # FastImage.size raises BufError for some gifs
2013-06-18 04:46:48 +08:00
end
2013-11-06 02:04:47 +08:00
def is_valid_image_url?(url)
uri = URI.parse(url)
%w(http https).include? uri.scheme
rescue URI::InvalidURIError
2013-02-19 14:57:14 +08:00
end
2013-11-06 02:04:47 +08:00
def convert_to_link!(img)
2013-02-19 14:57:14 +08:00
src = img["src"]
return unless src.present?
2013-02-19 14:57:14 +08:00
width, height = img["width"].to_i, img["height"].to_i
2013-04-13 22:31:20 +08:00
original_width, original_height = get_size(src)
2013-02-19 14:57:14 +08:00
2013-07-08 07:39:08 +08:00
return if original_width.to_i <= width && original_height.to_i <= height
2013-08-26 06:24:24 +08:00
return if original_width.to_i <= SiteSetting.max_image_width && original_height.to_i <= SiteSetting.max_image_height
2013-11-06 02:04:47 +08:00
return if is_a_hyperlink?(img)
2013-07-08 07:39:08 +08:00
2013-11-06 02:04:47 +08:00
if upload = Upload.get_from_url(src)
2013-09-27 16:55:50 +08:00
upload.create_thumbnail!(width, height)
2013-07-08 07:39:08 +08:00
# TODO: optimize_image!(img)
end
2013-02-19 14:57:14 +08:00
2013-07-08 07:39:08 +08:00
add_lightbox!(img, original_width, original_height, upload)
end
2013-11-06 02:04:47 +08:00
def is_a_hyperlink?(img)
2013-02-19 14:57:14 +08:00
parent = img.parent
while parent
return true if parent.name == "a"
2013-02-19 14:57:14 +08:00
break unless parent.respond_to? :parent
parent = parent.parent
end
false
2013-07-08 07:39:08 +08:00
end
2013-02-19 14:57:14 +08:00
2013-07-08 07:39:08 +08:00
def add_lightbox!(img, original_width, original_height, upload=nil)
2013-06-26 08:44:20 +08:00
# first, create a div to hold our lightbox
2013-07-08 07:39:08 +08:00
lightbox = Nokogiri::XML::Node.new("div", @doc)
2013-12-02 17:06:48 +08:00
lightbox["class"] = "lightbox-wrapper"
2013-07-08 07:39:08 +08:00
img.add_next_sibling(lightbox)
lightbox.add_child(img)
2013-06-26 08:44:20 +08:00
# then, the link to our larger image
2013-07-08 07:39:08 +08:00
a = Nokogiri::XML::Node.new("a", @doc)
2013-02-19 14:57:14 +08:00
img.add_next_sibling(a)
2013-11-06 02:04:47 +08:00
a["href"] = img["src"]
2013-02-19 14:57:14 +08:00
a["class"] = "lightbox"
a.add_child(img)
2013-07-08 07:39:08 +08:00
# replace the image by its thumbnail
2013-11-06 02:04:47 +08:00
w, h = img["width"].to_i, img["height"].to_i
img["src"] = upload.thumbnail(w, h).url if upload && upload.has_thumbnail?(w, h)
2013-07-08 07:39:08 +08:00
2013-06-26 08:44:20 +08:00
# then, some overlay informations
2013-07-08 07:39:08 +08:00
meta = Nokogiri::XML::Node.new("div", @doc)
2013-06-26 08:44:20 +08:00
meta["class"] = "meta"
2013-07-08 07:39:08 +08:00
img.add_next_sibling(meta)
2013-06-22 00:29:40 +08:00
2013-11-06 02:04:47 +08:00
filename = get_filename(upload, img["src"])
2013-06-22 00:29:40 +08:00
informations = "#{original_width}x#{original_height}"
2013-07-24 15:24:28 +08:00
informations << " #{number_to_human_size(upload.filesize)}" if upload
2013-06-22 00:29:40 +08:00
2013-11-30 03:03:39 +08:00
a["title"] = filename
2013-06-26 08:44:20 +08:00
meta.add_child create_span_node("filename", filename)
meta.add_child create_span_node("informations", informations)
meta.add_child create_span_node("expand")
2013-06-22 00:29:40 +08:00
end
2013-02-19 14:57:14 +08:00
def get_filename(upload, src)
return File.basename(src) unless upload
return upload.original_filename unless upload.original_filename =~ /^blob(\.png)?$/i
2013-11-06 02:04:47 +08:00
return I18n.t("upload.pasted_image_filename")
end
2013-06-22 00:29:40 +08:00
def create_span_node(klass, content=nil)
2013-07-08 07:39:08 +08:00
span = Nokogiri::XML::Node.new("span", @doc)
2013-06-22 00:29:40 +08:00
span.content = content if content
2013-11-06 02:04:47 +08:00
span["class"] = klass
2013-06-22 00:29:40 +08:00
span
2013-02-06 03:16:51 +08:00
end
2013-11-06 02:04:47 +08:00
def update_topic_image(images)
2013-07-08 07:39:08 +08:00
if @post.post_number == 1
img = images.first
2013-11-06 02:04:47 +08:00
@post.topic.update_column(:image_url, img["src"]) if img["src"].present?
2013-07-08 07:39:08 +08:00
end
end
2013-11-06 02:04:47 +08:00
def post_process_oneboxes
args = {
post_id: @post.id,
invalidate_oneboxes: !!@opts[:invalidate_oneboxes],
}
result = Oneboxer.apply(@doc) do |url, element|
Oneboxer.onebox(url, args)
end
2013-02-06 03:16:51 +08:00
end
2013-11-06 02:04:47 +08:00
def optimize_urls
@doc.search("a").each do |a|
href = a["href"].to_s
2013-11-06 23:59:11 +08:00
a["href"] = schemaless absolute(href) if is_local(href)
2013-11-06 02:04:47 +08:00
end
@doc.search("img").each do |img|
src = img["src"].to_s
2013-11-06 23:59:11 +08:00
img["src"] = schemaless absolute(src) if is_local(src)
2013-11-06 02:04:47 +08:00
end
2013-02-06 03:16:51 +08:00
end
2013-11-22 08:52:26 +08:00
def pull_hotlinked_images(bypass_bump = false)
# is the job enabled?
return unless SiteSetting.download_remote_images_to_local?
2013-11-15 23:46:41 +08:00
# have we enough disk space?
return if disable_if_low_on_disk_space
2013-11-06 02:04:47 +08:00
# we only want to run the job whenever it's changed by a user
return if @post.updated_by == Discourse.system_user
# make sure no other job is scheduled
Jobs.cancel_scheduled_job(:pull_hotlinked_images, post_id: @post.id)
# schedule the job
delay = SiteSetting.ninja_edit_window + 1
2013-11-22 08:52:26 +08:00
Jobs.enqueue_in(delay.seconds.to_i, :pull_hotlinked_images, post_id: @post.id, bypass_bump: bypass_bump)
2013-07-11 04:55:37 +08:00
end
2013-11-15 23:46:41 +08:00
def disable_if_low_on_disk_space
if available_disk_space < SiteSetting.download_remote_images_threshold
SiteSetting.download_remote_images_to_local = false
return true
end
false
end
def available_disk_space
100 - `df -l . | tail -1 | tr -s ' ' | cut -d ' ' -f 5`.to_i
end
def dirty?
@previous_cooked != html
end
def html
@doc.try(:to_html)
2013-02-06 03:16:51 +08:00
end
end