diff --git a/app/jobs/regular/pull_hotlinked_images.rb b/app/jobs/regular/pull_hotlinked_images.rb
index b3d2bbdb009..b920c60590c 100644
--- a/app/jobs/regular/pull_hotlinked_images.rb
+++ b/app/jobs/regular/pull_hotlinked_images.rb
@@ -5,11 +5,8 @@ require_dependency 'upload_creator'
module Jobs
class PullHotlinkedImages < Jobs::Base
-
sidekiq_options queue: 'low'
- LARGE_IMAGES = "large_images".freeze
-
def initialize
@max_size = SiteSetting.max_image_size_kb.kilobytes
end
@@ -47,26 +44,25 @@ module Jobs
raw = post.raw.dup
start_raw = raw.dup
+
downloaded_urls = {}
- large_images = post.custom_fields[LARGE_IMAGES].presence || []
- # recover from bad custom field silently
- unless Array === large_images
- large_images = []
- end
+ large_images = JSON.parse(post.custom_fields[Post::LARGE_IMAGES].presence || "[]") rescue []
+ broken_images = JSON.parse(post.custom_fields[Post::BROKEN_IMAGES].presence || "[]") rescue []
+ downloaded_images = JSON.parse(post.custom_fields[Post::DOWNLOADED_IMAGES].presence || "{}") rescue {}
- broken_images, new_large_images = [], []
+ has_new_large_image = false
+ has_new_broken_image = false
+ has_downloaded_image = false
extract_images_from(post.cooked).each do |image|
src = original_src = image['src']
- if src.start_with?("//")
- src = "#{SiteSetting.force_https ? "https" : "http"}:#{src}"
- end
+ src = "#{SiteSetting.force_https ? "https" : "http"}:#{src}" if src.start_with?("//")
if is_valid_image_url(src)
begin
# have we already downloaded that file?
- unless downloaded_urls.include?(src) || large_images.include?(src) || broken_images.include?(src)
+ unless downloaded_images.include?(src) || large_images.include?(src) || broken_images.include?(src)
if hotlinked = download(src)
if File.size(hotlinked.path) <= @max_size
filename = File.basename(URI.parse(src).path)
@@ -74,15 +70,18 @@ module Jobs
upload = UploadCreator.new(hotlinked, filename, origin: src).create_for(post.user_id)
if upload.persisted?
downloaded_urls[src] = upload.url
+ downloaded_images[src.sub(/^https?:/i, "")] = upload.id
+ has_downloaded_image = true
else
log(:info, "Failed to pull hotlinked image for post: #{post_id}: #{src} - #{upload.errors.full_messages.join("\n")}")
end
else
- large_images << original_src
- new_large_images << original_src
+ large_images << original_src.sub(/^https?:/i, "")
+ has_new_large_image = true
end
else
- broken_images << original_src
+ broken_images << original_src.sub(/^https?:/i, "")
+ has_new_broken_image = true
end
end
# have we successfully downloaded that file?
@@ -111,42 +110,24 @@ module Jobs
log(:error, "Failed to pull hotlinked image (#{src}) post: #{post_id}\n" + e.message + "\n" + e.backtrace.join("\n"))
end
end
-
end
- if new_large_images.length > 0
- post.custom_fields[LARGE_IMAGES] = large_images
- post.save_custom_fields
- end
+ large_images.uniq!
+ broken_images.uniq!
+
+ post.custom_fields[Post::LARGE_IMAGES] = large_images.to_json if large_images.present?
+ post.custom_fields[Post::BROKEN_IMAGES] = broken_images.to_json if broken_images.present?
+ post.custom_fields[Post::DOWNLOADED_IMAGES] = downloaded_images.to_json if downloaded_images.present?
+ # only save custom fields if there are any
+ post.save_custom_fields if large_images.present? || broken_images.present? || downloaded_images.present?
post.reload
if start_raw == post.raw && raw != post.raw
changes = { raw: raw, edit_reason: I18n.t("upload.edit_reason") }
- # we never want that job to bump the topic
- options = { bypass_bump: true }
- post.revise(Discourse.system_user, changes, options)
- elsif downloaded_urls.present? || new_large_images.present?
+ post.revise(Discourse.system_user, changes, bypass_bump: true)
+ elsif has_downloaded_image || has_new_large_image || has_new_broken_image
post.trigger_post_process(true)
- elsif broken_images.present?
- start_html = post.cooked
- doc = Nokogiri::HTML::fragment(start_html)
- images = doc.css("img[src]") - doc.css("img.avatar")
- images.each do |tag|
- src = tag['src']
- if broken_images.include?(src)
- tag.name = 'span'
- tag.set_attribute('class', 'broken-image fa fa-chain-broken')
- tag.set_attribute('title', I18n.t('post.image_placeholder.broken'))
- tag.remove_attribute('src')
- tag.remove_attribute('width')
- tag.remove_attribute('height')
- end
- end
- if start_html == post.cooked && doc.to_html != post.cooked
- post.update_column(:cooked, doc.to_html)
- post.publish_change_to_clients! :revised
- end
end
end
diff --git a/app/models/post.rb b/app/models/post.rb
index 4dc6999c2cd..87bd0faf0f4 100644
--- a/app/models/post.rb
+++ b/app/models/post.rb
@@ -58,7 +58,11 @@ class Post < ActiveRecord::Base
# We can pass several creating options to a post via attributes
attr_accessor :image_sizes, :quoted_post_numbers, :no_bump, :invalidate_oneboxes, :cooking_options, :skip_unique_check
- SHORT_POST_CHARS = 1200
+ LARGE_IMAGES ||= "large_images".freeze
+ BROKEN_IMAGES ||= "broken_images".freeze
+ DOWNLOADED_IMAGES ||= "downloaded_images".freeze
+
+ SHORT_POST_CHARS ||= 1200
scope :private_posts_for_user, ->(user) {
where("posts.topic_id IN (SELECT topic_id
diff --git a/lib/cooked_post_processor.rb b/lib/cooked_post_processor.rb
index 31ee7d0266a..997b7e895d7 100644
--- a/lib/cooked_post_processor.rb
+++ b/lib/cooked_post_processor.rb
@@ -31,9 +31,9 @@ class CookedPostProcessor
def post_process(bypass_bump = false)
DistributedMutex.synchronize("post_process_#{@post.id}") do
DiscourseEvent.trigger(:before_post_process_cooked, @doc, @post)
- keep_reverse_index_up_to_date
- post_process_images
post_process_oneboxes
+ post_process_images
+ keep_reverse_index_up_to_date
optimize_urls
update_post_image
enforce_nofollow
@@ -65,26 +65,28 @@ class CookedPostProcessor
end
end
- upload_ids |= oneboxed_image_uploads.pluck(:id)
+ upload_ids |= downloaded_images.values.select { |id| Upload.exists?(id) }
values = upload_ids.map { |u| "(#{@post.id},#{u})" }.join(",")
PostUpload.transaction do
PostUpload.where(post_id: @post.id).delete_all
- if upload_ids.length > 0
+ if upload_ids.size > 0
PostUpload.exec_sql("INSERT INTO post_uploads (post_id, upload_id) VALUES #{values}")
end
end
end
def post_process_images
- images = extract_images
- return if images.blank?
-
- images.each do |img|
- next if large_images.include?(img["src"]) && add_large_image_placeholder!(img)
-
- limit_size!(img)
- convert_to_link!(img)
+ extract_images.each do |img|
+ src = img["src"].sub(/^https?:/i, "")
+ if large_images.include?(src)
+ add_large_image_placeholder!(img)
+ elsif broken_images.include?(src)
+ add_broken_image_placeholder!(img)
+ else
+ limit_size!(img)
+ convert_to_link!(img)
+ end
end
end
@@ -125,32 +127,48 @@ class CookedPostProcessor
end
img.remove
- true
+ end
+
+ def add_broken_image_placeholder!(img)
+ img.name = "span"
+ img.set_attribute("class", "broken-image fa fa-chain-broken")
+ img.set_attribute("title", I18n.t("post.image_placeholder.broken"))
+ img.remove_attribute("src")
+ img.remove_attribute("width")
+ img.remove_attribute("height")
end
def large_images
- @large_images ||= @post.custom_fields[Jobs::PullHotlinkedImages::LARGE_IMAGES].presence || []
+ @large_images ||= JSON.parse(@post.custom_fields[Post::LARGE_IMAGES].presence || "[]") rescue []
+ end
+
+ def broken_images
+ @broken_images ||= JSON.parse(@post.custom_fields[Post::BROKEN_IMAGES].presence || "[]") rescue []
+ end
+
+ def downloaded_images
+ @downloaded_images ||= JSON.parse(@post.custom_fields[Post::DOWNLOADED_IMAGES].presence || "{}") rescue {}
end
def extract_images
- # all image with a src attribute
+ # all images with a src attribute
@doc.css("img[src]") -
- # minus, data images
+ # minus data images
@doc.css("img[src^='data']") -
- # minus, emojis
+ # minus emojis
@doc.css("img.emoji") -
- # minus, image inside oneboxes
+ # minus oneboxed images
oneboxed_images -
- # minus, images inside quotes
+ # minus images inside quotes
@doc.css(".quote img")
end
def extract_images_for_post
- # all image with a src attribute
+ # all images with a src attribute
@doc.css("img[src]") -
- # minus, emojis
+ # minus emojis
@doc.css("img.emoji") -
- # minus, images inside quotes
+ # minus images inside quotes
@doc.css(".quote img")
end
@@ -158,19 +176,6 @@ class CookedPostProcessor
@doc.css(".onebox-body img, .onebox img")
end
- def oneboxed_image_uploads
- urls = Set.new
-
- oneboxed_images.each do |img|
- url = img["src"].sub(/^https?:/i, "")
- urls << url
- urls << "http:#{url}"
- urls << "https:#{url}"
- end
-
- Upload.where(origin: urls.to_a)
- end
-
def limit_size!(img)
# retrieve the size from
# 1) the width/height attributes
@@ -377,15 +382,16 @@ class CookedPostProcessor
Oneboxer.onebox(url, args)
end
- uploads = oneboxed_image_uploads.select(:url, :origin)
oneboxed_images.each do |img|
- if large_images.include?(img["src"])
+ src = img["src"].sub(/^https?:/i, "")
+
+ if large_images.include?(src) || broken_images.include?(src)
img.remove
next
end
- url = img["src"].sub(/^https?:/i, "")
- upload = uploads.find { |u| u.origin.sub(/^https?:/i, "") == url }
+ upload_id = downloaded_images[src]
+ upload = Upload.find(upload_id) if upload_id
img["src"] = upload.url if upload.present?
# make sure we grab dimensions for oneboxed images
@@ -462,7 +468,7 @@ class CookedPostProcessor
# don't download remote images for posts that are more than n days old
return unless @post.created_at > (Date.today - SiteSetting.download_remote_images_max_days_old)
# we only want to run the job whenever it's changed by a user
- return if @post.last_editor_id == Discourse.system_user.id
+ return if @post.last_editor_id && @post.last_editor_id <= 0
# make sure no other job is scheduled
Jobs.cancel_scheduled_job(:pull_hotlinked_images, post_id: @post.id)
# schedule the job
diff --git a/spec/components/cooked_post_processor_spec.rb b/spec/components/cooked_post_processor_spec.rb
index b30137d5b75..b6bf69b1b0e 100644
--- a/spec/components/cooked_post_processor_spec.rb
+++ b/spec/components/cooked_post_processor_spec.rb
@@ -10,9 +10,9 @@ describe CookedPostProcessor do
let(:post_process) { sequence("post_process") }
it "post process in sequence" do
- cpp.expects(:keep_reverse_index_up_to_date).in_sequence(post_process)
- cpp.expects(:post_process_images).in_sequence(post_process)
cpp.expects(:post_process_oneboxes).in_sequence(post_process)
+ cpp.expects(:post_process_images).in_sequence(post_process)
+ cpp.expects(:keep_reverse_index_up_to_date).in_sequence(post_process)
cpp.expects(:optimize_urls).in_sequence(post_process)
cpp.expects(:pull_hotlinked_images).in_sequence(post_process)
cpp.post_process
diff --git a/spec/jobs/pull_hotlinked_images_spec.rb b/spec/jobs/pull_hotlinked_images_spec.rb
index 3447fd00796..fd0b65df612 100644
--- a/spec/jobs/pull_hotlinked_images_spec.rb
+++ b/spec/jobs/pull_hotlinked_images_spec.rb
@@ -125,26 +125,6 @@ describe Jobs::PullHotlinkedImages do
end
end
- describe 'replace' do
- it 'broken image with placeholder' do
- post = Fabricate(:post, raw: "
")
-
- Jobs::PullHotlinkedImages.new.execute(post_id: post.id)
- post.reload
-
- expect(post.cooked).to match(/")
-
- Jobs::PullHotlinkedImages.new.execute(post_id: post.id)
- post.reload
-
- expect(post.cooked).to match(/