mirror of
https://github.com/discourse/discourse.git
synced 2025-03-22 22:25:57 +08:00
FIX: properly handle too large & broken images in posts
This commit is contained in:
parent
6f2a3cb026
commit
678e28794a
@ -5,11 +5,8 @@ require_dependency 'upload_creator'
|
|||||||
module Jobs
|
module Jobs
|
||||||
|
|
||||||
class PullHotlinkedImages < Jobs::Base
|
class PullHotlinkedImages < Jobs::Base
|
||||||
|
|
||||||
sidekiq_options queue: 'low'
|
sidekiq_options queue: 'low'
|
||||||
|
|
||||||
LARGE_IMAGES = "large_images".freeze
|
|
||||||
|
|
||||||
def initialize
|
def initialize
|
||||||
@max_size = SiteSetting.max_image_size_kb.kilobytes
|
@max_size = SiteSetting.max_image_size_kb.kilobytes
|
||||||
end
|
end
|
||||||
@ -47,26 +44,25 @@ module Jobs
|
|||||||
|
|
||||||
raw = post.raw.dup
|
raw = post.raw.dup
|
||||||
start_raw = raw.dup
|
start_raw = raw.dup
|
||||||
|
|
||||||
downloaded_urls = {}
|
downloaded_urls = {}
|
||||||
large_images = post.custom_fields[LARGE_IMAGES].presence || []
|
|
||||||
|
|
||||||
# recover from bad custom field silently
|
large_images = JSON.parse(post.custom_fields[Post::LARGE_IMAGES].presence || "[]") rescue []
|
||||||
unless Array === large_images
|
broken_images = JSON.parse(post.custom_fields[Post::BROKEN_IMAGES].presence || "[]") rescue []
|
||||||
large_images = []
|
downloaded_images = JSON.parse(post.custom_fields[Post::DOWNLOADED_IMAGES].presence || "{}") rescue {}
|
||||||
end
|
|
||||||
|
|
||||||
broken_images, new_large_images = [], []
|
has_new_large_image = false
|
||||||
|
has_new_broken_image = false
|
||||||
|
has_downloaded_image = false
|
||||||
|
|
||||||
extract_images_from(post.cooked).each do |image|
|
extract_images_from(post.cooked).each do |image|
|
||||||
src = original_src = image['src']
|
src = original_src = image['src']
|
||||||
if src.start_with?("//")
|
src = "#{SiteSetting.force_https ? "https" : "http"}:#{src}" if src.start_with?("//")
|
||||||
src = "#{SiteSetting.force_https ? "https" : "http"}:#{src}"
|
|
||||||
end
|
|
||||||
|
|
||||||
if is_valid_image_url(src)
|
if is_valid_image_url(src)
|
||||||
begin
|
begin
|
||||||
# have we already downloaded that file?
|
# have we already downloaded that file?
|
||||||
unless downloaded_urls.include?(src) || large_images.include?(src) || broken_images.include?(src)
|
unless downloaded_images.include?(src) || large_images.include?(src) || broken_images.include?(src)
|
||||||
if hotlinked = download(src)
|
if hotlinked = download(src)
|
||||||
if File.size(hotlinked.path) <= @max_size
|
if File.size(hotlinked.path) <= @max_size
|
||||||
filename = File.basename(URI.parse(src).path)
|
filename = File.basename(URI.parse(src).path)
|
||||||
@ -74,15 +70,18 @@ module Jobs
|
|||||||
upload = UploadCreator.new(hotlinked, filename, origin: src).create_for(post.user_id)
|
upload = UploadCreator.new(hotlinked, filename, origin: src).create_for(post.user_id)
|
||||||
if upload.persisted?
|
if upload.persisted?
|
||||||
downloaded_urls[src] = upload.url
|
downloaded_urls[src] = upload.url
|
||||||
|
downloaded_images[src.sub(/^https?:/i, "")] = upload.id
|
||||||
|
has_downloaded_image = true
|
||||||
else
|
else
|
||||||
log(:info, "Failed to pull hotlinked image for post: #{post_id}: #{src} - #{upload.errors.full_messages.join("\n")}")
|
log(:info, "Failed to pull hotlinked image for post: #{post_id}: #{src} - #{upload.errors.full_messages.join("\n")}")
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
large_images << original_src
|
large_images << original_src.sub(/^https?:/i, "")
|
||||||
new_large_images << original_src
|
has_new_large_image = true
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
broken_images << original_src
|
broken_images << original_src.sub(/^https?:/i, "")
|
||||||
|
has_new_broken_image = true
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
# have we successfully downloaded that file?
|
# have we successfully downloaded that file?
|
||||||
@ -111,42 +110,24 @@ module Jobs
|
|||||||
log(:error, "Failed to pull hotlinked image (#{src}) post: #{post_id}\n" + e.message + "\n" + e.backtrace.join("\n"))
|
log(:error, "Failed to pull hotlinked image (#{src}) post: #{post_id}\n" + e.message + "\n" + e.backtrace.join("\n"))
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
if new_large_images.length > 0
|
large_images.uniq!
|
||||||
post.custom_fields[LARGE_IMAGES] = large_images
|
broken_images.uniq!
|
||||||
post.save_custom_fields
|
|
||||||
end
|
post.custom_fields[Post::LARGE_IMAGES] = large_images.to_json if large_images.present?
|
||||||
|
post.custom_fields[Post::BROKEN_IMAGES] = broken_images.to_json if broken_images.present?
|
||||||
|
post.custom_fields[Post::DOWNLOADED_IMAGES] = downloaded_images.to_json if downloaded_images.present?
|
||||||
|
# only save custom fields if there are any
|
||||||
|
post.save_custom_fields if large_images.present? || broken_images.present? || downloaded_images.present?
|
||||||
|
|
||||||
post.reload
|
post.reload
|
||||||
|
|
||||||
if start_raw == post.raw && raw != post.raw
|
if start_raw == post.raw && raw != post.raw
|
||||||
changes = { raw: raw, edit_reason: I18n.t("upload.edit_reason") }
|
changes = { raw: raw, edit_reason: I18n.t("upload.edit_reason") }
|
||||||
# we never want that job to bump the topic
|
post.revise(Discourse.system_user, changes, bypass_bump: true)
|
||||||
options = { bypass_bump: true }
|
elsif has_downloaded_image || has_new_large_image || has_new_broken_image
|
||||||
post.revise(Discourse.system_user, changes, options)
|
|
||||||
elsif downloaded_urls.present? || new_large_images.present?
|
|
||||||
post.trigger_post_process(true)
|
post.trigger_post_process(true)
|
||||||
elsif broken_images.present?
|
|
||||||
start_html = post.cooked
|
|
||||||
doc = Nokogiri::HTML::fragment(start_html)
|
|
||||||
images = doc.css("img[src]") - doc.css("img.avatar")
|
|
||||||
images.each do |tag|
|
|
||||||
src = tag['src']
|
|
||||||
if broken_images.include?(src)
|
|
||||||
tag.name = 'span'
|
|
||||||
tag.set_attribute('class', 'broken-image fa fa-chain-broken')
|
|
||||||
tag.set_attribute('title', I18n.t('post.image_placeholder.broken'))
|
|
||||||
tag.remove_attribute('src')
|
|
||||||
tag.remove_attribute('width')
|
|
||||||
tag.remove_attribute('height')
|
|
||||||
end
|
|
||||||
end
|
|
||||||
if start_html == post.cooked && doc.to_html != post.cooked
|
|
||||||
post.update_column(:cooked, doc.to_html)
|
|
||||||
post.publish_change_to_clients! :revised
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -58,7 +58,11 @@ class Post < ActiveRecord::Base
|
|||||||
# We can pass several creating options to a post via attributes
|
# We can pass several creating options to a post via attributes
|
||||||
attr_accessor :image_sizes, :quoted_post_numbers, :no_bump, :invalidate_oneboxes, :cooking_options, :skip_unique_check
|
attr_accessor :image_sizes, :quoted_post_numbers, :no_bump, :invalidate_oneboxes, :cooking_options, :skip_unique_check
|
||||||
|
|
||||||
SHORT_POST_CHARS = 1200
|
LARGE_IMAGES ||= "large_images".freeze
|
||||||
|
BROKEN_IMAGES ||= "broken_images".freeze
|
||||||
|
DOWNLOADED_IMAGES ||= "downloaded_images".freeze
|
||||||
|
|
||||||
|
SHORT_POST_CHARS ||= 1200
|
||||||
|
|
||||||
scope :private_posts_for_user, ->(user) {
|
scope :private_posts_for_user, ->(user) {
|
||||||
where("posts.topic_id IN (SELECT topic_id
|
where("posts.topic_id IN (SELECT topic_id
|
||||||
|
@ -31,9 +31,9 @@ class CookedPostProcessor
|
|||||||
def post_process(bypass_bump = false)
|
def post_process(bypass_bump = false)
|
||||||
DistributedMutex.synchronize("post_process_#{@post.id}") do
|
DistributedMutex.synchronize("post_process_#{@post.id}") do
|
||||||
DiscourseEvent.trigger(:before_post_process_cooked, @doc, @post)
|
DiscourseEvent.trigger(:before_post_process_cooked, @doc, @post)
|
||||||
keep_reverse_index_up_to_date
|
|
||||||
post_process_images
|
|
||||||
post_process_oneboxes
|
post_process_oneboxes
|
||||||
|
post_process_images
|
||||||
|
keep_reverse_index_up_to_date
|
||||||
optimize_urls
|
optimize_urls
|
||||||
update_post_image
|
update_post_image
|
||||||
enforce_nofollow
|
enforce_nofollow
|
||||||
@ -65,28 +65,30 @@ class CookedPostProcessor
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
upload_ids |= oneboxed_image_uploads.pluck(:id)
|
upload_ids |= downloaded_images.values.select { |id| Upload.exists?(id) }
|
||||||
|
|
||||||
values = upload_ids.map { |u| "(#{@post.id},#{u})" }.join(",")
|
values = upload_ids.map { |u| "(#{@post.id},#{u})" }.join(",")
|
||||||
PostUpload.transaction do
|
PostUpload.transaction do
|
||||||
PostUpload.where(post_id: @post.id).delete_all
|
PostUpload.where(post_id: @post.id).delete_all
|
||||||
if upload_ids.length > 0
|
if upload_ids.size > 0
|
||||||
PostUpload.exec_sql("INSERT INTO post_uploads (post_id, upload_id) VALUES #{values}")
|
PostUpload.exec_sql("INSERT INTO post_uploads (post_id, upload_id) VALUES #{values}")
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def post_process_images
|
def post_process_images
|
||||||
images = extract_images
|
extract_images.each do |img|
|
||||||
return if images.blank?
|
src = img["src"].sub(/^https?:/i, "")
|
||||||
|
if large_images.include?(src)
|
||||||
images.each do |img|
|
add_large_image_placeholder!(img)
|
||||||
next if large_images.include?(img["src"]) && add_large_image_placeholder!(img)
|
elsif broken_images.include?(src)
|
||||||
|
add_broken_image_placeholder!(img)
|
||||||
|
else
|
||||||
limit_size!(img)
|
limit_size!(img)
|
||||||
convert_to_link!(img)
|
convert_to_link!(img)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
end
|
||||||
|
|
||||||
def add_large_image_placeholder!(img)
|
def add_large_image_placeholder!(img)
|
||||||
url = img["src"]
|
url = img["src"]
|
||||||
@ -125,32 +127,48 @@ class CookedPostProcessor
|
|||||||
end
|
end
|
||||||
|
|
||||||
img.remove
|
img.remove
|
||||||
true
|
end
|
||||||
|
|
||||||
|
def add_broken_image_placeholder!(img)
|
||||||
|
img.name = "span"
|
||||||
|
img.set_attribute("class", "broken-image fa fa-chain-broken")
|
||||||
|
img.set_attribute("title", I18n.t("post.image_placeholder.broken"))
|
||||||
|
img.remove_attribute("src")
|
||||||
|
img.remove_attribute("width")
|
||||||
|
img.remove_attribute("height")
|
||||||
end
|
end
|
||||||
|
|
||||||
def large_images
|
def large_images
|
||||||
@large_images ||= @post.custom_fields[Jobs::PullHotlinkedImages::LARGE_IMAGES].presence || []
|
@large_images ||= JSON.parse(@post.custom_fields[Post::LARGE_IMAGES].presence || "[]") rescue []
|
||||||
|
end
|
||||||
|
|
||||||
|
def broken_images
|
||||||
|
@broken_images ||= JSON.parse(@post.custom_fields[Post::BROKEN_IMAGES].presence || "[]") rescue []
|
||||||
|
end
|
||||||
|
|
||||||
|
def downloaded_images
|
||||||
|
@downloaded_images ||= JSON.parse(@post.custom_fields[Post::DOWNLOADED_IMAGES].presence || "{}") rescue {}
|
||||||
end
|
end
|
||||||
|
|
||||||
def extract_images
|
def extract_images
|
||||||
# all image with a src attribute
|
# all images with a src attribute
|
||||||
@doc.css("img[src]") -
|
@doc.css("img[src]") -
|
||||||
# minus, data images
|
# minus data images
|
||||||
@doc.css("img[src^='data']") -
|
@doc.css("img[src^='data']") -
|
||||||
# minus, emojis
|
# minus emojis
|
||||||
@doc.css("img.emoji") -
|
@doc.css("img.emoji") -
|
||||||
# minus, image inside oneboxes
|
# minus oneboxed images
|
||||||
oneboxed_images -
|
oneboxed_images -
|
||||||
# minus, images inside quotes
|
# minus images inside quotes
|
||||||
@doc.css(".quote img")
|
@doc.css(".quote img")
|
||||||
end
|
end
|
||||||
|
|
||||||
def extract_images_for_post
|
def extract_images_for_post
|
||||||
# all image with a src attribute
|
# all images with a src attribute
|
||||||
@doc.css("img[src]") -
|
@doc.css("img[src]") -
|
||||||
# minus, emojis
|
# minus emojis
|
||||||
@doc.css("img.emoji") -
|
@doc.css("img.emoji") -
|
||||||
# minus, images inside quotes
|
# minus images inside quotes
|
||||||
@doc.css(".quote img")
|
@doc.css(".quote img")
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -158,19 +176,6 @@ class CookedPostProcessor
|
|||||||
@doc.css(".onebox-body img, .onebox img")
|
@doc.css(".onebox-body img, .onebox img")
|
||||||
end
|
end
|
||||||
|
|
||||||
def oneboxed_image_uploads
|
|
||||||
urls = Set.new
|
|
||||||
|
|
||||||
oneboxed_images.each do |img|
|
|
||||||
url = img["src"].sub(/^https?:/i, "")
|
|
||||||
urls << url
|
|
||||||
urls << "http:#{url}"
|
|
||||||
urls << "https:#{url}"
|
|
||||||
end
|
|
||||||
|
|
||||||
Upload.where(origin: urls.to_a)
|
|
||||||
end
|
|
||||||
|
|
||||||
def limit_size!(img)
|
def limit_size!(img)
|
||||||
# retrieve the size from
|
# retrieve the size from
|
||||||
# 1) the width/height attributes
|
# 1) the width/height attributes
|
||||||
@ -377,15 +382,16 @@ class CookedPostProcessor
|
|||||||
Oneboxer.onebox(url, args)
|
Oneboxer.onebox(url, args)
|
||||||
end
|
end
|
||||||
|
|
||||||
uploads = oneboxed_image_uploads.select(:url, :origin)
|
|
||||||
oneboxed_images.each do |img|
|
oneboxed_images.each do |img|
|
||||||
if large_images.include?(img["src"])
|
src = img["src"].sub(/^https?:/i, "")
|
||||||
|
|
||||||
|
if large_images.include?(src) || broken_images.include?(src)
|
||||||
img.remove
|
img.remove
|
||||||
next
|
next
|
||||||
end
|
end
|
||||||
|
|
||||||
url = img["src"].sub(/^https?:/i, "")
|
upload_id = downloaded_images[src]
|
||||||
upload = uploads.find { |u| u.origin.sub(/^https?:/i, "") == url }
|
upload = Upload.find(upload_id) if upload_id
|
||||||
img["src"] = upload.url if upload.present?
|
img["src"] = upload.url if upload.present?
|
||||||
|
|
||||||
# make sure we grab dimensions for oneboxed images
|
# make sure we grab dimensions for oneboxed images
|
||||||
@ -462,7 +468,7 @@ class CookedPostProcessor
|
|||||||
# don't download remote images for posts that are more than n days old
|
# don't download remote images for posts that are more than n days old
|
||||||
return unless @post.created_at > (Date.today - SiteSetting.download_remote_images_max_days_old)
|
return unless @post.created_at > (Date.today - SiteSetting.download_remote_images_max_days_old)
|
||||||
# we only want to run the job whenever it's changed by a user
|
# we only want to run the job whenever it's changed by a user
|
||||||
return if @post.last_editor_id == Discourse.system_user.id
|
return if @post.last_editor_id && @post.last_editor_id <= 0
|
||||||
# make sure no other job is scheduled
|
# make sure no other job is scheduled
|
||||||
Jobs.cancel_scheduled_job(:pull_hotlinked_images, post_id: @post.id)
|
Jobs.cancel_scheduled_job(:pull_hotlinked_images, post_id: @post.id)
|
||||||
# schedule the job
|
# schedule the job
|
||||||
|
@ -10,9 +10,9 @@ describe CookedPostProcessor do
|
|||||||
let(:post_process) { sequence("post_process") }
|
let(:post_process) { sequence("post_process") }
|
||||||
|
|
||||||
it "post process in sequence" do
|
it "post process in sequence" do
|
||||||
cpp.expects(:keep_reverse_index_up_to_date).in_sequence(post_process)
|
|
||||||
cpp.expects(:post_process_images).in_sequence(post_process)
|
|
||||||
cpp.expects(:post_process_oneboxes).in_sequence(post_process)
|
cpp.expects(:post_process_oneboxes).in_sequence(post_process)
|
||||||
|
cpp.expects(:post_process_images).in_sequence(post_process)
|
||||||
|
cpp.expects(:keep_reverse_index_up_to_date).in_sequence(post_process)
|
||||||
cpp.expects(:optimize_urls).in_sequence(post_process)
|
cpp.expects(:optimize_urls).in_sequence(post_process)
|
||||||
cpp.expects(:pull_hotlinked_images).in_sequence(post_process)
|
cpp.expects(:pull_hotlinked_images).in_sequence(post_process)
|
||||||
cpp.post_process
|
cpp.post_process
|
||||||
|
@ -125,26 +125,6 @@ describe Jobs::PullHotlinkedImages do
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
describe 'replace' do
|
|
||||||
it 'broken image with placeholder' do
|
|
||||||
post = Fabricate(:post, raw: "<img src='#{broken_image_url}'>")
|
|
||||||
|
|
||||||
Jobs::PullHotlinkedImages.new.execute(post_id: post.id)
|
|
||||||
post.reload
|
|
||||||
|
|
||||||
expect(post.cooked).to match(/<span class="broken-image fa fa-chain-broken/)
|
|
||||||
end
|
|
||||||
|
|
||||||
it 'large image with placeholder' do
|
|
||||||
post = Fabricate(:post, raw: "<img src='#{large_image_url}'>")
|
|
||||||
|
|
||||||
Jobs::PullHotlinkedImages.new.execute(post_id: post.id)
|
|
||||||
post.reload
|
|
||||||
|
|
||||||
expect(post.cooked).to match(/<div class="large-image-placeholder"><a href=.*\ target="_blank" .*\>/)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
describe '#is_valid_image_url' do
|
describe '#is_valid_image_url' do
|
||||||
subject { described_class.new }
|
subject { described_class.new }
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user