From 1c3c0f04d910483e6d36917e56ef83e01539f10b Mon Sep 17 00:00:00 2001 From: Bianca Nenciu Date: Fri, 29 Oct 2021 17:58:05 +0300 Subject: [PATCH] FEATURE: Pull hotlinked images in user bios (#14726) --- app/jobs/regular/pull_hotlinked_images.rb | 2 +- .../pull_user_profile_hotlinked_images.rb | 47 +++++++++++++++++++ app/jobs/scheduled/clean_up_uploads.rb | 1 + app/models/user_profile.rb | 13 +++++ ...pull_user_profile_hotlinked_images_spec.rb | 27 +++++++++++ 5 files changed, 89 insertions(+), 1 deletion(-) create mode 100644 app/jobs/regular/pull_user_profile_hotlinked_images.rb create mode 100644 spec/jobs/pull_user_profile_hotlinked_images_spec.rb diff --git a/app/jobs/regular/pull_hotlinked_images.rb b/app/jobs/regular/pull_hotlinked_images.rb index 934941c5bad..b532366deb5 100644 --- a/app/jobs/regular/pull_hotlinked_images.rb +++ b/app/jobs/regular/pull_hotlinked_images.rb @@ -241,7 +241,7 @@ module Jobs ) end - private + protected def normalize_src(src) uri = Addressable::URI.heuristic_parse(src) diff --git a/app/jobs/regular/pull_user_profile_hotlinked_images.rb b/app/jobs/regular/pull_user_profile_hotlinked_images.rb new file mode 100644 index 00000000000..ae87a9043d7 --- /dev/null +++ b/app/jobs/regular/pull_user_profile_hotlinked_images.rb @@ -0,0 +1,47 @@ +# frozen_string_literal: true + +module Jobs + class PullUserProfileHotlinkedImages < ::Jobs::PullHotlinkedImages + def execute(args) + @user_id = args[:user_id] + raise Discourse::InvalidParameters.new(:user_id) if @user_id.blank? + + user_profile = UserProfile.find_by(user_id: @user_id) + return if user_profile.blank? + + large_image_urls = [] + broken_image_urls = [] + downloaded_images = {} + + extract_images_from(user_profile.bio_cooked).each do |node| + download_src = original_src = node['src'] || node['href'] + download_src = "#{SiteSetting.force_https ? "https" : "http"}:#{original_src}" if original_src.start_with?("//") + normalized_src = normalize_src(download_src) + + next if !should_download_image?(download_src) + + begin + already_attempted_download = downloaded_images.include?(normalized_src) || large_image_urls.include?(normalized_src) || broken_image_urls.include?(normalized_src) + if !already_attempted_download + downloaded_images[normalized_src] = attempt_download(download_src, @user_id) + end + rescue ImageTooLargeError + large_image_urls << normalized_src + rescue ImageBrokenError + broken_image_urls << normalized_src + end + + # have we successfully downloaded that file? + if upload = downloaded_images[normalized_src] + user_profile.bio_raw = replace_in_raw(original_src: original_src, upload: upload, raw: user_profile.bio_raw) + end + rescue => e + raise e if Rails.env.test? + log(:error, "Failed to pull hotlinked image (#{download_src}) user: #{@user_id}\n" + e.message + "\n" + e.backtrace.join("\n")) + end + + user_profile.skip_pull_hotlinked_image = true + user_profile.save! + end + end +end diff --git a/app/jobs/scheduled/clean_up_uploads.rb b/app/jobs/scheduled/clean_up_uploads.rb index 770ff7144b8..db666f78d28 100644 --- a/app/jobs/scheduled/clean_up_uploads.rb +++ b/app/jobs/scheduled/clean_up_uploads.rb @@ -38,6 +38,7 @@ module Jobs encoded_sha = Base62.encode(upload.sha1.hex) next if ReviewableQueuedPost.pending.where("payload->>'raw' LIKE '%#{upload.sha1}%' OR payload->>'raw' LIKE '%#{encoded_sha}%'").exists? next if Draft.where("data LIKE '%#{upload.sha1}%' OR data LIKE '%#{encoded_sha}%'").exists? + next if UserProfile.where("bio_raw LIKE '%#{upload.sha1}%' OR bio_raw LIKE '%#{encoded_sha}%'").exists? if defined?(ChatMessage) && ChatMessage.where("message LIKE ? OR message LIKE ?", "%#{upload.sha1}%", "%#{encoded_sha}%").exists? next diff --git a/app/models/user_profile.rb b/app/models/user_profile.rb index 574f295baa7..4b2f4f62b61 100644 --- a/app/models/user_profile.rb +++ b/app/models/user_profile.rb @@ -12,6 +12,7 @@ class UserProfile < ActiveRecord::Base validates :user, presence: true before_save :cook after_save :trigger_badges + after_save :pull_hotlinked_image validate :website_domain_validator, if: Proc.new { |c| c.new_record? || c.website_changed? } @@ -19,6 +20,8 @@ class UserProfile < ActiveRecord::Base BAKED_VERSION = 1 + attr_accessor :skip_pull_hotlinked_image + def bio_excerpt(length = 350, opts = {}) return nil if bio_cooked.blank? excerpt = PrettyText.excerpt(bio_cooked, length, opts).sub(/
$/, '') @@ -113,6 +116,16 @@ class UserProfile < ActiveRecord::Base BadgeGranter.queue_badge_grant(Badge::Trigger::UserChange, user: self) end + def pull_hotlinked_image + if !skip_pull_hotlinked_image && saved_change_to_bio_raw? + Jobs.enqueue_in( + SiteSetting.editing_grace_period, + :pull_user_profile_hotlinked_images, + user_id: self.user_id + ) + end + end + private def cooked diff --git a/spec/jobs/pull_user_profile_hotlinked_images_spec.rb b/spec/jobs/pull_user_profile_hotlinked_images_spec.rb new file mode 100644 index 00000000000..3f5a41f90f3 --- /dev/null +++ b/spec/jobs/pull_user_profile_hotlinked_images_spec.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +require 'rails_helper' + +describe Jobs::PullUserProfileHotlinkedImages do + fab!(:user) { Fabricate(:user) } + + let(:image_url) { "http://wiki.mozilla.org/images/2/2e/Longcat1.png" } + let(:png) { Base64.decode64("R0lGODlhAQABALMAAAAAAIAAAACAAICAAAAAgIAAgACAgMDAwICAgP8AAAD/AP//AAAA//8A/wD//wBiZCH5BAEAAA8ALAAAAAABAAEAAAQC8EUAOw==") } + + before do + stub_request(:get, image_url).to_return(body: png, headers: { "Content-Type" => "image/png" }) + SiteSetting.download_remote_images_to_local = true + end + + describe '#execute' do + before do + stub_image_size + end + + it 'replaces images' do + user.user_profile.update!(bio_raw: "![](#{image_url})") + expect { Jobs::PullUserProfileHotlinkedImages.new.execute(user_id: user.id) }.to change { Upload.count }.by(1) + expect(user.user_profile.reload.bio_cooked).to include(Upload.last.url) + end + end +end