diff --git a/lib/file_store/base_store.rb b/lib/file_store/base_store.rb index 7d22640b137..504267ebf4d 100644 --- a/lib/file_store/base_store.rb +++ b/lib/file_store/base_store.rb @@ -76,13 +76,13 @@ module FileStore not_implemented end - def download(upload) + def download(upload, max_file_size_kb: nil) DistributedMutex.synchronize("download_#{upload.sha1}", validity: 3.minutes) do filename = "#{upload.sha1}#{File.extname(upload.original_filename)}" file = get_from_cache(filename) if !file - max_file_size_kb = [SiteSetting.max_image_size_kb, SiteSetting.max_attachment_size_kb].max.kilobytes + max_file_size_kb ||= [SiteSetting.max_image_size_kb, SiteSetting.max_attachment_size_kb].max.kilobytes url = upload.secure? ? Discourse.store.signed_url_for_path(upload.url) : diff --git a/lib/shrink_uploaded_image.rb b/lib/shrink_uploaded_image.rb new file mode 100644 index 00000000000..e5f57eb883a --- /dev/null +++ b/lib/shrink_uploaded_image.rb @@ -0,0 +1,236 @@ +# frozen_string_literal: true + +class ShrinkUploadedImage + attr_reader :upload, :path + + def initialize(upload:, path:, max_pixels:, verbose: false, interactive: false) + @upload = upload + @path = path + @max_pixels = max_pixels + @verbose = verbose + @interactive = interactive + end + + def perform + OptimizedImage.downsize(path, path, "#{@max_pixels}@", filename: upload.original_filename) + sha1 = Upload.generate_digest(path) + + if sha1 == upload.sha1 + log "No sha1 change" + return false + end + + w, h = FastImage.size(path, timeout: 15, raise_on_failure: true) + + if !w || !h + log "Invalid image dimensions after resizing" + return false + end + + # Neither #dup or #clone provide a complete copy + original_upload = Upload.find(upload.id) + ww, hh = ImageSizer.resize(w, h) + + # A different upload record that matches the sha1 of the downsized image + existing_upload = Upload.find_by(sha1: sha1) + @upload = existing_upload if existing_upload + + upload.attributes = { + sha1: sha1, + width: w, + height: h, + thumbnail_width: ww, + thumbnail_height: hh, + filesize: File.size(path) + } + + if upload.filesize > upload.filesize_was + log "No filesize reduction" + return false + end + + unless existing_upload + url = Discourse.store.store_upload(File.new(path), upload) + + unless url + log "Couldn't store the upload" + return false + end + + upload.url = url + end + + log "base62: #{original_upload.base62_sha1} -> #{Upload.base62_sha1(sha1)}" + log "sha: #{original_upload.sha1} -> #{sha1}" + log "(an exisiting upload)" if existing_upload + + success = true + posts = Post.unscoped.joins(:post_uploads).where(post_uploads: { upload_id: original_upload.id }).uniq.sort_by(&:created_at) + + posts.each do |post| + transform_post(post, original_upload, upload) + + if post.custom_fields[Post::DOWNLOADED_IMAGES].present? + downloaded_images = JSON.parse(post.custom_fields[Post::DOWNLOADED_IMAGES]) + end + + if post.raw_changed? + log "Updating post" + elsif downloaded_images&.has_value?(original_upload.id) + log "A hotlinked, unreferenced image" + elsif post.raw.include?(upload.short_url) + log "Already processed" + elsif post.trashed? + log "A deleted post" + elsif !post.topic || post.topic.trashed? + log "A deleted topic" + elsif post.cooked.include?(original_upload.sha1) + if post.raw.include?("#{Discourse.base_url.sub(/^https?:\/\//i, "")}/t/") + log "Updating a topic onebox" + else + log "Updating an external onebox" + end + else + log "Could not find the upload URL" + success = false + end + + log "#{Discourse.base_url}/p/#{post.id}" + end + + if posts.empty? + log "Upload not used in any posts" + + if User.where(uploaded_avatar_id: original_upload.id).exists? + log "Used as a User avatar" + elsif UserAvatar.where(gravatar_upload_id: original_upload.id).exists? + log "Used as a UserAvatar gravatar" + elsif UserAvatar.where(custom_upload_id: original_upload.id).exists? + log "Used as a UserAvatar custom upload" + elsif UserProfile.where(profile_background_upload_id: original_upload.id).exists? + log "Used as a UserProfile profile background" + elsif UserProfile.where(card_background_upload_id: original_upload.id).exists? + log "Used as a UserProfile card background" + elsif Category.where(uploaded_logo_id: original_upload.id).exists? + log "Used as a Category logo" + elsif Category.where(uploaded_background_id: original_upload.id).exists? + log "Used as a Category background" + elsif CustomEmoji.where(upload_id: original_upload.id).exists? + log "Used as a CustomEmoji" + elsif ThemeField.where(upload_id: original_upload.id).exists? + log "Used as a ThemeField" + else + success = false + end + end + + unless success + if @interactive + print "Press any key to continue with the upload" + STDIN.beep + STDIN.getch + puts " k" + else + if !existing_upload && !Upload.where(url: upload.url).exists? + # We're bailing, so clean up the just uploaded file + Discourse.store.remove_upload(upload) + end + + log "⏩ Skipping" + return false + end + end + + unless upload.save + if !existing_upload && !Upload.where(url: upload.url).exists? + # We're bailing, so clean up the just uploaded file + Discourse.store.remove_upload(upload) + end + + log "⏩ Skipping an invalid upload" + return false + end + + if existing_upload + begin + PostUpload.where(upload_id: original_upload.id).update_all(upload_id: upload.id) + rescue ActiveRecord::RecordNotUnique, PG::UniqueViolation + end + + User.where(uploaded_avatar_id: original_upload.id).update_all(uploaded_avatar_id: upload.id) + UserAvatar.where(gravatar_upload_id: original_upload.id).update_all(gravatar_upload_id: upload.id) + UserAvatar.where(custom_upload_id: original_upload.id).update_all(custom_upload_id: upload.id) + UserProfile.where(profile_background_upload_id: original_upload.id).update_all(profile_background_upload_id: upload.id) + UserProfile.where(card_background_upload_id: original_upload.id).update_all(card_background_upload_id: upload.id) + Category.where(uploaded_logo_id: original_upload.id).update_all(uploaded_logo_id: upload.id) + Category.where(uploaded_background_id: original_upload.id).update_all(uploaded_background_id: upload.id) + CustomEmoji.where(upload_id: original_upload.id).update_all(upload_id: upload.id) + ThemeField.where(upload_id: original_upload.id).update_all(upload_id: upload.id) + else + upload.optimized_images.each(&:destroy!) + end + + posts.each do |post| + DistributedMutex.synchronize("process_post_#{post.id}") do + current_post = Post.unscoped.find(post.id) + + # If the post became outdated, reapply changes + if current_post.updated_at != post.updated_at + transform_post(current_post, original_upload, upload) + post = current_post + end + + if post.raw_changed? + post.update_columns( + raw: post.raw, + updated_at: Time.zone.now + ) + end + + if existing_upload && post.custom_fields[Post::DOWNLOADED_IMAGES].present? + downloaded_images = JSON.parse(post.custom_fields[Post::DOWNLOADED_IMAGES]) + + downloaded_images.transform_values! do |upload_id| + upload_id == original_upload.id ? upload.id : upload_id + end + + post.custom_fields[Post::DOWNLOADED_IMAGES] = downloaded_images.to_json if downloaded_images.present? + post.save_custom_fields + end + + post.rebake! + end + end + + if existing_upload + original_upload.reload.destroy! + else + Discourse.store.remove_upload(original_upload) + end + + true + end + + private + + def transform_post(post, upload_before, upload_after) + post.raw.gsub!(/upload:\/\/#{upload_before.base62_sha1}(\.#{upload_before.extension})?/i, upload_after.short_url) + post.raw.gsub!(Discourse.store.cdn_url(upload_before.url), Discourse.store.cdn_url(upload_after.url)) + post.raw.gsub!("#{Discourse.base_url}#{upload_before.short_path}", "#{Discourse.base_url}#{upload_after.short_path}") + + if SiteSetting.enable_s3_uploads + post.raw.gsub!(Discourse.store.url_for(upload_before), Discourse.store.url_for(upload_after)) + + path = SiteSetting.Upload.s3_upload_bucket.split("/", 2)[1] + post.raw.gsub!(/\d+)x(?\d+).*?\" alt=\"(?.*?)\"\/?>/i) do + "![#{$~[:alt]}|#{$~[:width]}x#{$~[:height]}](#{upload_after.short_url})" + end + end + + post.raw.gsub!(/!\[(.*?)\]\(\/uploads\/.+?\/#{upload_before.sha1}(\.#{upload_before.extension})?\)/i, "![\\1](#{upload_after.short_url})") + end + + def log(*args) + puts(*args) if @verbose + end +end diff --git a/script/downsize_uploads.rb b/script/downsize_uploads.rb index dc2e7b2a628..7fcb1e64313 100644 --- a/script/downsize_uploads.rb +++ b/script/downsize_uploads.rb @@ -29,225 +29,13 @@ def log(*args) puts(*args) if ENV["VERBOSE"] end -def transform_post(post, upload_before, upload_after) - post.raw.gsub!(/upload:\/\/#{upload_before.base62_sha1}(\.#{upload_before.extension})?/i, upload_after.short_url) - post.raw.gsub!(Discourse.store.cdn_url(upload_before.url), Discourse.store.cdn_url(upload_after.url)) - post.raw.gsub!(Discourse.store.url_for(upload_before), Discourse.store.url_for(upload_after)) - post.raw.gsub!("#{Discourse.base_url}#{upload_before.short_path}", "#{Discourse.base_url}#{upload_after.short_path}") - - path = SiteSetting.Upload.s3_upload_bucket.split("/", 2)[1] - post.raw.gsub!(/\d+)x(?\d+).*?\" alt=\"(?.*?)\"\/?>/i) do - "![#{$~[:alt]}|#{$~[:width]}x#{$~[:height]}](#{upload_after.short_url})" - end - - post.raw.gsub!(/!\[(.*?)\]\(\/uploads\/.+?\/#{upload_before.sha1}(\.#{upload_before.extension})?\)/i, "![\\1](#{upload_after.short_url})") -end - -def downsize_upload(upload, path) - # Make sure the filesize is up to date - upload.filesize = File.size(path) - - OptimizedImage.downsize(path, path, "#{MAX_IMAGE_PIXELS}@", filename: upload.original_filename) - sha1 = Upload.generate_digest(path) - - if sha1 == upload.sha1 - log "No sha1 change" - return - end - - w, h = FastImage.size(path, timeout: 15, raise_on_failure: true) - - if !w || !h - log "Invalid image dimensions after resizing" - return - end - - # Neither #dup or #clone provide a complete copy - original_upload = Upload.find(upload.id) - ww, hh = ImageSizer.resize(w, h) - - # A different upload record that matches the sha1 of the downsized image - existing_upload = Upload.find_by(sha1: sha1) - upload = existing_upload if existing_upload - - upload.attributes = { - sha1: sha1, - width: w, - height: h, - thumbnail_width: ww, - thumbnail_height: hh, - filesize: File.size(path) - } - - if upload.filesize > upload.filesize_was - log "No filesize reduction" - return - end - - unless existing_upload - url = Discourse.store.store_upload(File.new(path), upload) - - unless url - log "Couldn't store the upload" - return - end - - upload.url = url - end - - log "base62: #{original_upload.base62_sha1} -> #{Upload.base62_sha1(sha1)}" - log "sha: #{original_upload.sha1} -> #{sha1}" - log "(an exisiting upload)" if existing_upload - - success = true - posts = Post.unscoped.joins(:post_uploads).where(post_uploads: { upload_id: original_upload.id }).uniq.sort_by(&:created_at) - - posts.each do |post| - transform_post(post, original_upload, upload) - - if post.custom_fields[Post::DOWNLOADED_IMAGES].present? - downloaded_images = JSON.parse(post.custom_fields[Post::DOWNLOADED_IMAGES]) - end - - if post.raw_changed? - log "Updating post" - elsif downloaded_images&.has_value?(original_upload.id) - log "A hotlinked, unreferenced image" - elsif post.raw.include?(upload.short_url) - log "Already processed" - elsif post.trashed? - log "A deleted post" - elsif !post.topic || post.topic.trashed? - log "A deleted topic" - elsif post.cooked.include?(original_upload.sha1) - if post.raw.include?("#{Discourse.base_url.sub(/^https?:\/\//i, "")}/t/") - log "Updating a topic onebox" - else - log "Updating an external onebox" - end - else - log "Could not find the upload URL" - success = false - end - - log "#{Discourse.base_url}/p/#{post.id}" - end - - if posts.empty? - log "Upload not used in any posts" - - if User.where(uploaded_avatar_id: original_upload.id).count - log "Used as a User avatar" - elsif UserAvatar.where(gravatar_upload_id: original_upload.id).count - log "Used as a UserAvatar gravatar" - elsif UserAvatar.where(custom_upload_id: original_upload.id).count - log "Used as a UserAvatar custom upload" - elsif UserProfile.where(profile_background_upload_id: original_upload.id).count - log "Used as a UserProfile profile background" - elsif UserProfile.where(card_background_upload_id: original_upload.id).count - log "Used as a UserProfile card background" - elsif Category.where(uploaded_logo_id: original_upload.id).count - log "Used as a Category logo" - elsif Category.where(uploaded_background_id: original_upload.id).count - log "Used as a Category background" - elsif CustomEmoji.where(upload_id: original_upload.id).count - log "Used as a CustomEmoji" - elsif ThemeField.where(upload_id: original_upload.id).count - log "Used as a ThemeField" - else - success = false - end - end - - unless success - if ENV["INTERACTIVE"] - print "Press any key to continue with the upload" - STDIN.beep - STDIN.getch - puts " k" - elsif !existing_upload && !Upload.where(url: upload.url).exist? - # We're bailing, so clean up the just uploaded file - Discourse.store.remove_upload(upload) - - log "⏩ Skipping" - return - end - end - - upload.save! - - if existing_upload - begin - PostUpload.where(upload_id: original_upload.id).update_all(upload_id: upload.id) - rescue ActiveRecord::RecordNotUnique, PG::UniqueViolation - end - - User.where(uploaded_avatar_id: original_upload.id).update_all(uploaded_avatar_id: upload.id) - UserAvatar.where(gravatar_upload_id: original_upload.id).update_all(gravatar_upload_id: upload.id) - UserAvatar.where(custom_upload_id: original_upload.id).update_all(custom_upload_id: upload.id) - UserProfile.where(profile_background_upload_id: original_upload.id).update_all(profile_background_upload_id: upload.id) - UserProfile.where(card_background_upload_id: original_upload.id).update_all(card_background_upload_id: upload.id) - Category.where(uploaded_logo_id: original_upload.id).update_all(uploaded_logo_id: upload.id) - Category.where(uploaded_background_id: original_upload.id).update_all(uploaded_background_id: upload.id) - CustomEmoji.where(upload_id: original_upload.id).update_all(upload_id: upload.id) - ThemeField.where(upload_id: original_upload.id).update_all(upload_id: upload.id) - else - upload.optimized_images.each(&:destroy!) - end - - posts.each do |post| - DistributedMutex.synchronize("process_post_#{post.id}") do - current_post = Post.unscoped.find(post.id) - - # If the post became outdated, reapply changes - if current_post.updated_at != post.updated_at - transform_post(current_post, original_upload, upload) - post = current_post - end - - if post.raw_changed? - post.update_columns( - raw: post.raw, - updated_at: Time.zone.now - ) - end - - if existing_upload && post.custom_fields[Post::DOWNLOADED_IMAGES].present? - downloaded_images = JSON.parse(post.custom_fields[Post::DOWNLOADED_IMAGES]) - - downloaded_images.transform_values! do |upload_id| - upload_id == original_upload.id ? upload.id : upload_id - end - - post.custom_fields[Post::DOWNLOADED_IMAGES] = downloaded_images.to_json if downloaded_images.present? - post.save_custom_fields - end - - post.rebake! - end - end - - if existing_upload - original_upload.reload.destroy! - else - Discourse.store.remove_upload(original_upload) - end - - true -end - def process_uploads - unless SiteSetting.Upload.enable_s3_uploads - puts "This script supports only S3 uploads" - return - end - puts "", "Downsizing images to no more than #{MAX_IMAGE_PIXELS} pixels" dimensions_count = 0 downsized_count = 0 - scope = Upload.where("LOWER(extension) IN ('jpg', 'jpeg', 'gif', 'png')") + scope = Upload.by_users.where("LOWER(extension) IN ('jpg', 'jpeg', 'gif', 'png')") scope = scope.where(<<-SQL, MAX_IMAGE_PIXELS) COALESCE(width, 0) = 0 OR COALESCE(height, 0) = 0 OR @@ -271,19 +59,20 @@ def process_uploads print "\r#{progress}% Fixed dimensions: #{dimensions_count} Downsized: #{downsized_count} Skipped: #{skipped} (upload id: #{upload.id})" log "\n" - source = upload.local? ? Discourse.store.path_for(upload) : "https:#{upload.url}" + path = if upload.local? + Discourse.store.path_for(upload) + else + (Discourse.store.download(upload, max_file_size_kb: 100.megabytes) rescue nil)&.path + end - unless source - log "No path or URL" + unless path + log "No image path" skipped += 1 next end begin - w, h = FastImage.size(source, timeout: 15, raise_on_failure: true) - rescue FastImage::ImageFetchFailure - log "Retrying image resizing" - w, h = FastImage.size(source, timeout: 15) + w, h = FastImage.size(path, raise_on_failure: true) rescue FastImage::UnknownImageType log "Unknown image type" skipped += 1 @@ -312,13 +101,14 @@ def process_uploads width: w, height: h, thumbnail_width: ww, - thumbnail_height: hh + thumbnail_height: hh, + filesize: File.size(path) } if upload.changed? log "Correcting the upload dimensions" - log "Before: #{upload.width_was}x#{upload.height_was} #{upload.thumbnail_width_was}x#{upload.thumbnail_height_was}" - log "After: #{w}x#{h} #{ww}x#{hh}" + log "Before: #{upload.width_was}x#{upload.height_was} #{upload.thumbnail_width_was}x#{upload.thumbnail_height_was} (#{upload.filesize_was})" + log "After: #{w}x#{h} #{ww}x#{hh} (#{upload.filesize})" dimensions_count += 1 upload.save! @@ -330,15 +120,15 @@ def process_uploads next end - path = upload.local? ? source : (Discourse.store.download(upload) rescue nil)&.path + result = ShrinkUploadedImage.new( + upload: upload, + path: path, + max_pixels: MAX_IMAGE_PIXELS, + verbose: ENV["VERBOSE"], + interactive: ENV["INTERACTIVE"] + ).perform - unless path - log "No image path" - skipped += 1 - next - end - - if downsize_upload(upload, path) + if result downsized_count += 1 else skipped += 1 diff --git a/spec/fabricators/upload_fabricator.rb b/spec/fabricators/upload_fabricator.rb index 65e4a1bf2fb..62272ecb9e3 100644 --- a/spec/fabricators/upload_fabricator.rb +++ b/spec/fabricators/upload_fabricator.rb @@ -66,6 +66,20 @@ Fabricator(:upload_s3, from: :upload) do end end +Fabricator(:s3_image_upload, from: :upload_s3) do + after_create do |upload| + file = Tempfile.new(['fabricated', '.png']) + `convert -size #{upload.width}x#{upload.height} xc:white "#{file.path}"` + + Discourse.store.store_upload(file, upload) + upload.sha1 = Upload.generate_digest(file.path) + + WebMock + .stub_request(:get, upload.url) + .to_return(status: 200, body: File.new(file.path)) + end +end + Fabricator(:secure_upload_s3, from: :upload_s3) do secure true sha1 { SecureRandom.hex(20) } diff --git a/spec/lib/shrink_uploaded_image_spec.rb b/spec/lib/shrink_uploaded_image_spec.rb new file mode 100644 index 00000000000..8c8e47165ab --- /dev/null +++ b/spec/lib/shrink_uploaded_image_spec.rb @@ -0,0 +1,103 @@ +# frozen_string_literal: true + +require 'rails_helper' + +describe ShrinkUploadedImage do + context "when local uploads are enabled" do + let(:upload) { Fabricate(:image_upload, width: 200, height: 200) } + + it "resizes the image" do + filesize_before = upload.filesize + post = Fabricate(:post, raw: "") + post.link_post_uploads + + result = ShrinkUploadedImage.new( + upload: upload, + path: Discourse.store.path_for(upload), + max_pixels: 10_000 + ).perform + + expect(result).to be(true) + expect(upload.width).to eq(100) + expect(upload.height).to eq(100) + expect(upload.filesize).to be < filesize_before + end + + it "returns false if the image is not used by any models" do + result = ShrinkUploadedImage.new( + upload: upload, + path: Discourse.store.path_for(upload), + max_pixels: 10_000 + ).perform + + expect(result).to be(false) + end + + it "returns false if the image cannot be shrunk more" do + post = Fabricate(:post, raw: "") + post.link_post_uploads + ShrinkUploadedImage.new( + upload: upload, + path: Discourse.store.path_for(upload), + max_pixels: 10_000 + ).perform + + upload.reload + + result = ShrinkUploadedImage.new( + upload: upload, + path: Discourse.store.path_for(upload), + max_pixels: 10_000 + ).perform + + expect(result).to be(false) + end + + it "returns false when the upload is above the size limit" do + post = Fabricate(:post, raw: "") + post.link_post_uploads + SiteSetting.max_image_size_kb = 0.001 # 1 byte + + result = ShrinkUploadedImage.new( + upload: upload, + path: Discourse.store.path_for(upload), + max_pixels: 10_000 + ).perform + + expect(result).to be(false) + end + end + + context "when S3 uploads are enabled" do + let(:upload) { Fabricate(:s3_image_upload, width: 200, height: 200) } + + before do + SiteSetting.enable_s3_uploads = true + SiteSetting.s3_access_key_id = "fakeid7974664" + SiteSetting.s3_secret_access_key = "fakesecretid7974664" + + store = FileStore::S3Store.new + s3_helper = store.instance_variable_get(:@s3_helper) + client = Aws::S3::Client.new(stub_responses: true) + s3_helper.stubs(:s3_client).returns(client) + Discourse.stubs(:store).returns(store) + end + + it "resizes the image" do + filesize_before = upload.filesize + post = Fabricate(:post, raw: "") + post.link_post_uploads + + result = ShrinkUploadedImage.new( + upload: upload, + path: Discourse.store.download(upload).path, + max_pixels: 10_000 + ).perform + + expect(result).to be(true) + expect(upload.width).to eq(100) + expect(upload.height).to eq(100) + expect(upload.filesize).to be < filesize_before + end + end +end