diff --git a/script/downsize_uploads.rb b/script/downsize_uploads.rb
index 95d65c24946..dc2e7b2a628 100644
--- a/script/downsize_uploads.rb
+++ b/script/downsize_uploads.rb
@@ -2,80 +2,181 @@
require File.expand_path("../../config/environment", __FILE__)
-# no less than 1 megapixel
-max_image_pixels = [ARGV[0].to_i, 1_000_000].max
+# Supported ENV arguments:
+#
+# VERBOSE=1
+# Shows debug information.
+#
+# INTERACTIVE=1
+# Shows debug information and pauses for input on issues.
+#
+# WORKER_ID/WORKER_COUNT
+# When running the script on a single forum in multiple terminals.
+# For example, if you want 4 concurrent scripts use WORKER_COUNT=4
+# and WORKER_ID from 0 to 3
-puts "", "Downsizing images to no more than #{max_image_pixels} pixels"
+MIN_IMAGE_PIXELS = 500_000 # 0.5 megapixels
+DEFAULT_IMAGE_PIXELS = 1_000_000 # 1 megapixel
-dimensions_count = 0
-downsized_count = 0
+MAX_IMAGE_PIXELS = [
+ ARGV[0]&.to_i || DEFAULT_IMAGE_PIXELS,
+ MIN_IMAGE_PIXELS
+].max
-def downsize_upload(upload, path, max_image_pixels)
+ENV["VERBOSE"] = "1" if ENV["INTERACTIVE"]
+
+def log(*args)
+ puts(*args) if ENV["VERBOSE"]
+end
+
+def transform_post(post, upload_before, upload_after)
+ post.raw.gsub!(/upload:\/\/#{upload_before.base62_sha1}(\.#{upload_before.extension})?/i, upload_after.short_url)
+ post.raw.gsub!(Discourse.store.cdn_url(upload_before.url), Discourse.store.cdn_url(upload_after.url))
+ post.raw.gsub!(Discourse.store.url_for(upload_before), Discourse.store.url_for(upload_after))
+ post.raw.gsub!("#{Discourse.base_url}#{upload_before.short_path}", "#{Discourse.base_url}#{upload_after.short_path}")
+
+ path = SiteSetting.Upload.s3_upload_bucket.split("/", 2)[1]
+ post.raw.gsub!(/
\d+)x(?\d+).*?\" alt=\"(?.*?)\"\/?>/i) do
+ "![#{$~[:alt]}|#{$~[:width]}x#{$~[:height]}](#{upload_after.short_url})"
+ end
+
+ post.raw.gsub!(/!\[(.*?)\]\(\/uploads\/.+?\/#{upload_before.sha1}(\.#{upload_before.extension})?\)/i, "")
+end
+
+def downsize_upload(upload, path)
# Make sure the filesize is up to date
upload.filesize = File.size(path)
- OptimizedImage.downsize(path, path, "#{max_image_pixels}@", filename: upload.original_filename)
+ OptimizedImage.downsize(path, path, "#{MAX_IMAGE_PIXELS}@", filename: upload.original_filename)
sha1 = Upload.generate_digest(path)
if sha1 == upload.sha1
- puts "no sha1 change" if ENV["VERBOSE"]
+ log "No sha1 change"
return
end
- w, h = FastImage.size(path, timeout: 10, raise_on_failure: true)
+ w, h = FastImage.size(path, timeout: 15, raise_on_failure: true)
if !w || !h
- puts "invalid image dimensions after resizing" if ENV["VERBOSE"]
+ log "Invalid image dimensions after resizing"
return
end
# Neither #dup or #clone provide a complete copy
original_upload = Upload.find(upload.id)
ww, hh = ImageSizer.resize(w, h)
- new_file = true
- if existing_upload = Upload.find_by(sha1: sha1)
- upload = existing_upload
- new_file = false
- end
+ # A different upload record that matches the sha1 of the downsized image
+ existing_upload = Upload.find_by(sha1: sha1)
+ upload = existing_upload if existing_upload
- before = upload.filesize
- upload.filesize = File.size(path)
+ upload.attributes = {
+ sha1: sha1,
+ width: w,
+ height: h,
+ thumbnail_width: ww,
+ thumbnail_height: hh,
+ filesize: File.size(path)
+ }
- if upload.filesize > before
- puts "no filesize reduction" if ENV["VERBOSE"]
+ if upload.filesize > upload.filesize_was
+ log "No filesize reduction"
return
end
- upload.sha1 = sha1
- upload.width = w
- upload.height = h
- upload.thumbnail_width = ww
- upload.thumbnail_height = hh
-
- if new_file
+ unless existing_upload
url = Discourse.store.store_upload(File.new(path), upload)
unless url
- puts "couldn't store the upload" if ENV["VERBOSE"]
+ log "Couldn't store the upload"
return
end
upload.url = url
end
- if ENV["VERBOSE"]
- puts "base62: #{original_upload.base62_sha1} -> #{Upload.base62_sha1(sha1)}"
- puts "sha1: #{original_upload.sha1} -> #{sha1}"
- puts "is a new file: #{new_file}"
+ log "base62: #{original_upload.base62_sha1} -> #{Upload.base62_sha1(sha1)}"
+ log "sha: #{original_upload.sha1} -> #{sha1}"
+ log "(an exisiting upload)" if existing_upload
+
+ success = true
+ posts = Post.unscoped.joins(:post_uploads).where(post_uploads: { upload_id: original_upload.id }).uniq.sort_by(&:created_at)
+
+ posts.each do |post|
+ transform_post(post, original_upload, upload)
+
+ if post.custom_fields[Post::DOWNLOADED_IMAGES].present?
+ downloaded_images = JSON.parse(post.custom_fields[Post::DOWNLOADED_IMAGES])
+ end
+
+ if post.raw_changed?
+ log "Updating post"
+ elsif downloaded_images&.has_value?(original_upload.id)
+ log "A hotlinked, unreferenced image"
+ elsif post.raw.include?(upload.short_url)
+ log "Already processed"
+ elsif post.trashed?
+ log "A deleted post"
+ elsif !post.topic || post.topic.trashed?
+ log "A deleted topic"
+ elsif post.cooked.include?(original_upload.sha1)
+ if post.raw.include?("#{Discourse.base_url.sub(/^https?:\/\//i, "")}/t/")
+ log "Updating a topic onebox"
+ else
+ log "Updating an external onebox"
+ end
+ else
+ log "Could not find the upload URL"
+ success = false
+ end
+
+ log "#{Discourse.base_url}/p/#{post.id}"
+ end
+
+ if posts.empty?
+ log "Upload not used in any posts"
+
+ if User.where(uploaded_avatar_id: original_upload.id).count
+ log "Used as a User avatar"
+ elsif UserAvatar.where(gravatar_upload_id: original_upload.id).count
+ log "Used as a UserAvatar gravatar"
+ elsif UserAvatar.where(custom_upload_id: original_upload.id).count
+ log "Used as a UserAvatar custom upload"
+ elsif UserProfile.where(profile_background_upload_id: original_upload.id).count
+ log "Used as a UserProfile profile background"
+ elsif UserProfile.where(card_background_upload_id: original_upload.id).count
+ log "Used as a UserProfile card background"
+ elsif Category.where(uploaded_logo_id: original_upload.id).count
+ log "Used as a Category logo"
+ elsif Category.where(uploaded_background_id: original_upload.id).count
+ log "Used as a Category background"
+ elsif CustomEmoji.where(upload_id: original_upload.id).count
+ log "Used as a CustomEmoji"
+ elsif ThemeField.where(upload_id: original_upload.id).count
+ log "Used as a ThemeField"
+ else
+ success = false
+ end
+ end
+
+ unless success
+ if ENV["INTERACTIVE"]
+ print "Press any key to continue with the upload"
+ STDIN.beep
+ STDIN.getch
+ puts " k"
+ elsif !existing_upload && !Upload.where(url: upload.url).exist?
+ # We're bailing, so clean up the just uploaded file
+ Discourse.store.remove_upload(upload)
+
+ log "⏩ Skipping"
+ return
+ end
end
upload.save!
- if new_file
- upload.optimized_images.each(&:destroy!)
- Discourse.store.remove_upload(original_upload)
- else
+ if existing_upload
begin
PostUpload.where(upload_id: original_upload.id).update_all(upload_id: upload.id)
rescue ActiveRecord::RecordNotUnique, PG::UniqueViolation
@@ -90,85 +191,162 @@ def downsize_upload(upload, path, max_image_pixels)
Category.where(uploaded_background_id: original_upload.id).update_all(uploaded_background_id: upload.id)
CustomEmoji.where(upload_id: original_upload.id).update_all(upload_id: upload.id)
ThemeField.where(upload_id: original_upload.id).update_all(upload_id: upload.id)
+ else
+ upload.optimized_images.each(&:destroy!)
end
- original_upload.posts.each do |post|
- post.raw.gsub!(/upload:\/\/#{original_upload.base62_sha1}(\.#{original_upload.extension})?/, upload.short_url)
- post.raw.gsub!(Discourse.store.cdn_url(original_upload.url), Discourse.store.cdn_url(upload.url))
+ posts.each do |post|
+ DistributedMutex.synchronize("process_post_#{post.id}") do
+ current_post = Post.unscoped.find(post.id)
- if post.raw_changed?
- puts "updating post #{post.id}" if ENV["VERBOSE"]
- post.save!
- else
- puts "Could find the upload path in post.raw (post_id: #{post.id})" if ENV["VERBOSE"]
+ # If the post became outdated, reapply changes
+ if current_post.updated_at != post.updated_at
+ transform_post(current_post, original_upload, upload)
+ post = current_post
+ end
+
+ if post.raw_changed?
+ post.update_columns(
+ raw: post.raw,
+ updated_at: Time.zone.now
+ )
+ end
+
+ if existing_upload && post.custom_fields[Post::DOWNLOADED_IMAGES].present?
+ downloaded_images = JSON.parse(post.custom_fields[Post::DOWNLOADED_IMAGES])
+
+ downloaded_images.transform_values! do |upload_id|
+ upload_id == original_upload.id ? upload.id : upload_id
+ end
+
+ post.custom_fields[Post::DOWNLOADED_IMAGES] = downloaded_images.to_json if downloaded_images.present?
+ post.save_custom_fields
+ end
+
+ post.rebake!
end
-
- post.rebake!
end
- original_upload.reload.destroy! unless new_file
-
- puts "" if ENV["VERBOSE"]
+ if existing_upload
+ original_upload.reload.destroy!
+ else
+ Discourse.store.remove_upload(original_upload)
+ end
true
end
-scope = Upload
- .where("LOWER(extension) IN ('jpg', 'jpeg', 'gif', 'png')")
- .where("COALESCE(width, 0) = 0 OR COALESCE(height, 0) = 0 OR COALESCE(thumbnail_width, 0) = 0 OR COALESCE(thumbnail_height, 0) = 0 OR width * height > ?", max_image_pixels)
-
-puts "Uploads to process: #{scope.count}"
-
-scope.find_each do |upload|
- print "\rFixed dimensions: %8d Downsized: %8d (upload id: #{upload.id})".freeze % [dimensions_count, downsized_count]
- puts "\n" if ENV["VERBOSE"]
-
- source = upload.local? ? Discourse.store.path_for(upload) : "https:#{upload.url}"
-
- unless source
- puts "no path or URL" if ENV["VERBOSE"]
- next
+def process_uploads
+ unless SiteSetting.Upload.enable_s3_uploads
+ puts "This script supports only S3 uploads"
+ return
end
- w, h = FastImage.size(source, timeout: 10)
+ puts "", "Downsizing images to no more than #{MAX_IMAGE_PIXELS} pixels"
- if !w || !h
- puts "invalid image dimensions" if ENV["VERBOSE"]
- next
+ dimensions_count = 0
+ downsized_count = 0
+
+ scope = Upload.where("LOWER(extension) IN ('jpg', 'jpeg', 'gif', 'png')")
+ scope = scope.where(<<-SQL, MAX_IMAGE_PIXELS)
+ COALESCE(width, 0) = 0 OR
+ COALESCE(height, 0) = 0 OR
+ COALESCE(thumbnail_width, 0) = 0 OR
+ COALESCE(thumbnail_height, 0) = 0 OR
+ width * height > ?
+ SQL
+
+ if ENV["WORKER_ID"] && ENV["WORKER_COUNT"]
+ scope = scope.where("id % ? = ?", ENV["WORKER_COUNT"], ENV["WORKER_ID"])
end
- ww, hh = ImageSizer.resize(w, h)
+ skipped = 0
+ total_count = scope.count
+ puts "Uploads to process: #{total_count}"
- if w == 0 || h == 0 || ww == 0 || hh == 0
- puts "invalid image dimensions" if ENV["VERBOSE"]
- next
- end
+ scope.find_each.with_index do |upload, index|
+ progress = (index * 100.0 / total_count).round(1)
- if upload.read_attribute(:width) != w || upload.read_attribute(:height) != h || upload.read_attribute(:thumbnail_width) != ww || upload.read_attribute(:thumbnail_height) != hh
- puts "Correcting the upload dimensions" if ENV["VERBOSE"]
- dimensions_count += 1
+ log "\n"
+ print "\r#{progress}% Fixed dimensions: #{dimensions_count} Downsized: #{downsized_count} Skipped: #{skipped} (upload id: #{upload.id})"
+ log "\n"
- upload.update!(
+ source = upload.local? ? Discourse.store.path_for(upload) : "https:#{upload.url}"
+
+ unless source
+ log "No path or URL"
+ skipped += 1
+ next
+ end
+
+ begin
+ w, h = FastImage.size(source, timeout: 15, raise_on_failure: true)
+ rescue FastImage::ImageFetchFailure
+ log "Retrying image resizing"
+ w, h = FastImage.size(source, timeout: 15)
+ rescue FastImage::UnknownImageType
+ log "Unknown image type"
+ skipped += 1
+ next
+ rescue FastImage::SizeNotFound
+ log "Size not found"
+ skipped += 1
+ next
+ end
+
+ if !w || !h
+ log "Invalid image dimensions"
+ skipped += 1
+ next
+ end
+
+ ww, hh = ImageSizer.resize(w, h)
+
+ if w == 0 || h == 0 || ww == 0 || hh == 0
+ log "Invalid image dimensions"
+ skipped += 1
+ next
+ end
+
+ upload.attributes = {
width: w,
height: h,
thumbnail_width: ww,
- thumbnail_height: hh,
- )
+ thumbnail_height: hh
+ }
+
+ if upload.changed?
+ log "Correcting the upload dimensions"
+ log "Before: #{upload.width_was}x#{upload.height_was} #{upload.thumbnail_width_was}x#{upload.thumbnail_height_was}"
+ log "After: #{w}x#{h} #{ww}x#{hh}"
+
+ dimensions_count += 1
+ upload.save!
+ end
+
+ if w * h < MAX_IMAGE_PIXELS
+ log "Image size within allowed range"
+ skipped += 1
+ next
+ end
+
+ path = upload.local? ? source : (Discourse.store.download(upload) rescue nil)&.path
+
+ unless path
+ log "No image path"
+ skipped += 1
+ next
+ end
+
+ if downsize_upload(upload, path)
+ downsized_count += 1
+ else
+ skipped += 1
+ end
end
- if w * h < max_image_pixels
- puts "image size within allowed range" if ENV["VERBOSE"]
- next
- end
-
- path = upload.local? ? source : (Discourse.store.download(upload) rescue nil)&.path
-
- unless path
- puts "no image path" if ENV["VERBOSE"]
- next
- end
-
- downsized_count += 1 if downsize_upload(upload, path, max_image_pixels)
+ STDIN.beep
+ puts "", "Done", Time.zone.now
end
-puts "", "Done"
+process_uploads