mirror of
https://github.com/discourse/discourse.git
synced 2025-01-18 14:32:44 +08:00
FIX: Improve image downsizing script (#9549)
Correctly handles more upload formats in posts, updates post custom fields, fixes more edge cases, adds debugging capabilities. (VERBOSE=1 and INTERACTIVE=1 flags) Includes these commits and some more: * DEV: Show the fixed image dimensions * FIX: Support more upload url formats * DEV: Remove the old upload after updating posts * FIX: Use the `process_post_#{id}` mutex * FIX: Avoid rebaking twice * DEV: Print out the link to the post * DEV: Process posts chronologically * DEV: Do a dry-run before saving, pause on any issue * FIX: Also process deleted posts * DEV: Make matchers case-insensitive * DEV: Pause on "detached" uploads, add more debug info * DEV: Print out time when finished * DEV: Add support for WORKER_ID/WORKER_COUNT * DEV: Fix the onebox in cooked text heuristic * DEV: Don't report already processed posts * DEV: Beep when done! * DEV: Ignore issues with deleted posts * DEV: Ignore issues with deleted topics * DEV: Multiline SQL * DEV: Use the bulk attribute assignment * DEV: Add ENV["INTERACTIVE"] mode * DEV: Handle post custom fields * DEV: Bail on non-S3 sites * DEV: Allow sizes smaller than 1 mpix
This commit is contained in:
parent
b61a291cf3
commit
eb462bfb3d
|
@ -2,80 +2,181 @@
|
|||
|
||||
require File.expand_path("../../config/environment", __FILE__)
|
||||
|
||||
# no less than 1 megapixel
|
||||
max_image_pixels = [ARGV[0].to_i, 1_000_000].max
|
||||
# Supported ENV arguments:
|
||||
#
|
||||
# VERBOSE=1
|
||||
# Shows debug information.
|
||||
#
|
||||
# INTERACTIVE=1
|
||||
# Shows debug information and pauses for input on issues.
|
||||
#
|
||||
# WORKER_ID/WORKER_COUNT
|
||||
# When running the script on a single forum in multiple terminals.
|
||||
# For example, if you want 4 concurrent scripts use WORKER_COUNT=4
|
||||
# and WORKER_ID from 0 to 3
|
||||
|
||||
puts "", "Downsizing images to no more than #{max_image_pixels} pixels"
|
||||
MIN_IMAGE_PIXELS = 500_000 # 0.5 megapixels
|
||||
DEFAULT_IMAGE_PIXELS = 1_000_000 # 1 megapixel
|
||||
|
||||
dimensions_count = 0
|
||||
downsized_count = 0
|
||||
MAX_IMAGE_PIXELS = [
|
||||
ARGV[0]&.to_i || DEFAULT_IMAGE_PIXELS,
|
||||
MIN_IMAGE_PIXELS
|
||||
].max
|
||||
|
||||
def downsize_upload(upload, path, max_image_pixels)
|
||||
ENV["VERBOSE"] = "1" if ENV["INTERACTIVE"]
|
||||
|
||||
def log(*args)
|
||||
puts(*args) if ENV["VERBOSE"]
|
||||
end
|
||||
|
||||
def transform_post(post, upload_before, upload_after)
|
||||
post.raw.gsub!(/upload:\/\/#{upload_before.base62_sha1}(\.#{upload_before.extension})?/i, upload_after.short_url)
|
||||
post.raw.gsub!(Discourse.store.cdn_url(upload_before.url), Discourse.store.cdn_url(upload_after.url))
|
||||
post.raw.gsub!(Discourse.store.url_for(upload_before), Discourse.store.url_for(upload_after))
|
||||
post.raw.gsub!("#{Discourse.base_url}#{upload_before.short_path}", "#{Discourse.base_url}#{upload_after.short_path}")
|
||||
|
||||
path = SiteSetting.Upload.s3_upload_bucket.split("/", 2)[1]
|
||||
post.raw.gsub!(/<img src=\"https:\/\/.+?\/#{path}\/uploads\/default\/optimized\/.+?\/#{upload_before.sha1}_\d_(?<width>\d+)x(?<height>\d+).*?\" alt=\"(?<alt>.*?)\"\/?>/i) do
|
||||
"![#{$~[:alt]}|#{$~[:width]}x#{$~[:height]}](#{upload_after.short_url})"
|
||||
end
|
||||
|
||||
post.raw.gsub!(/!\[(.*?)\]\(\/uploads\/.+?\/#{upload_before.sha1}(\.#{upload_before.extension})?\)/i, "![\\1](#{upload_after.short_url})")
|
||||
end
|
||||
|
||||
def downsize_upload(upload, path)
|
||||
# Make sure the filesize is up to date
|
||||
upload.filesize = File.size(path)
|
||||
|
||||
OptimizedImage.downsize(path, path, "#{max_image_pixels}@", filename: upload.original_filename)
|
||||
OptimizedImage.downsize(path, path, "#{MAX_IMAGE_PIXELS}@", filename: upload.original_filename)
|
||||
sha1 = Upload.generate_digest(path)
|
||||
|
||||
if sha1 == upload.sha1
|
||||
puts "no sha1 change" if ENV["VERBOSE"]
|
||||
log "No sha1 change"
|
||||
return
|
||||
end
|
||||
|
||||
w, h = FastImage.size(path, timeout: 10, raise_on_failure: true)
|
||||
w, h = FastImage.size(path, timeout: 15, raise_on_failure: true)
|
||||
|
||||
if !w || !h
|
||||
puts "invalid image dimensions after resizing" if ENV["VERBOSE"]
|
||||
log "Invalid image dimensions after resizing"
|
||||
return
|
||||
end
|
||||
|
||||
# Neither #dup or #clone provide a complete copy
|
||||
original_upload = Upload.find(upload.id)
|
||||
ww, hh = ImageSizer.resize(w, h)
|
||||
new_file = true
|
||||
|
||||
if existing_upload = Upload.find_by(sha1: sha1)
|
||||
upload = existing_upload
|
||||
new_file = false
|
||||
end
|
||||
# A different upload record that matches the sha1 of the downsized image
|
||||
existing_upload = Upload.find_by(sha1: sha1)
|
||||
upload = existing_upload if existing_upload
|
||||
|
||||
before = upload.filesize
|
||||
upload.filesize = File.size(path)
|
||||
upload.attributes = {
|
||||
sha1: sha1,
|
||||
width: w,
|
||||
height: h,
|
||||
thumbnail_width: ww,
|
||||
thumbnail_height: hh,
|
||||
filesize: File.size(path)
|
||||
}
|
||||
|
||||
if upload.filesize > before
|
||||
puts "no filesize reduction" if ENV["VERBOSE"]
|
||||
if upload.filesize > upload.filesize_was
|
||||
log "No filesize reduction"
|
||||
return
|
||||
end
|
||||
|
||||
upload.sha1 = sha1
|
||||
upload.width = w
|
||||
upload.height = h
|
||||
upload.thumbnail_width = ww
|
||||
upload.thumbnail_height = hh
|
||||
|
||||
if new_file
|
||||
unless existing_upload
|
||||
url = Discourse.store.store_upload(File.new(path), upload)
|
||||
|
||||
unless url
|
||||
puts "couldn't store the upload" if ENV["VERBOSE"]
|
||||
log "Couldn't store the upload"
|
||||
return
|
||||
end
|
||||
|
||||
upload.url = url
|
||||
end
|
||||
|
||||
if ENV["VERBOSE"]
|
||||
puts "base62: #{original_upload.base62_sha1} -> #{Upload.base62_sha1(sha1)}"
|
||||
puts "sha1: #{original_upload.sha1} -> #{sha1}"
|
||||
puts "is a new file: #{new_file}"
|
||||
log "base62: #{original_upload.base62_sha1} -> #{Upload.base62_sha1(sha1)}"
|
||||
log "sha: #{original_upload.sha1} -> #{sha1}"
|
||||
log "(an exisiting upload)" if existing_upload
|
||||
|
||||
success = true
|
||||
posts = Post.unscoped.joins(:post_uploads).where(post_uploads: { upload_id: original_upload.id }).uniq.sort_by(&:created_at)
|
||||
|
||||
posts.each do |post|
|
||||
transform_post(post, original_upload, upload)
|
||||
|
||||
if post.custom_fields[Post::DOWNLOADED_IMAGES].present?
|
||||
downloaded_images = JSON.parse(post.custom_fields[Post::DOWNLOADED_IMAGES])
|
||||
end
|
||||
|
||||
if post.raw_changed?
|
||||
log "Updating post"
|
||||
elsif downloaded_images&.has_value?(original_upload.id)
|
||||
log "A hotlinked, unreferenced image"
|
||||
elsif post.raw.include?(upload.short_url)
|
||||
log "Already processed"
|
||||
elsif post.trashed?
|
||||
log "A deleted post"
|
||||
elsif !post.topic || post.topic.trashed?
|
||||
log "A deleted topic"
|
||||
elsif post.cooked.include?(original_upload.sha1)
|
||||
if post.raw.include?("#{Discourse.base_url.sub(/^https?:\/\//i, "")}/t/")
|
||||
log "Updating a topic onebox"
|
||||
else
|
||||
log "Updating an external onebox"
|
||||
end
|
||||
else
|
||||
log "Could not find the upload URL"
|
||||
success = false
|
||||
end
|
||||
|
||||
log "#{Discourse.base_url}/p/#{post.id}"
|
||||
end
|
||||
|
||||
if posts.empty?
|
||||
log "Upload not used in any posts"
|
||||
|
||||
if User.where(uploaded_avatar_id: original_upload.id).count
|
||||
log "Used as a User avatar"
|
||||
elsif UserAvatar.where(gravatar_upload_id: original_upload.id).count
|
||||
log "Used as a UserAvatar gravatar"
|
||||
elsif UserAvatar.where(custom_upload_id: original_upload.id).count
|
||||
log "Used as a UserAvatar custom upload"
|
||||
elsif UserProfile.where(profile_background_upload_id: original_upload.id).count
|
||||
log "Used as a UserProfile profile background"
|
||||
elsif UserProfile.where(card_background_upload_id: original_upload.id).count
|
||||
log "Used as a UserProfile card background"
|
||||
elsif Category.where(uploaded_logo_id: original_upload.id).count
|
||||
log "Used as a Category logo"
|
||||
elsif Category.where(uploaded_background_id: original_upload.id).count
|
||||
log "Used as a Category background"
|
||||
elsif CustomEmoji.where(upload_id: original_upload.id).count
|
||||
log "Used as a CustomEmoji"
|
||||
elsif ThemeField.where(upload_id: original_upload.id).count
|
||||
log "Used as a ThemeField"
|
||||
else
|
||||
success = false
|
||||
end
|
||||
end
|
||||
|
||||
unless success
|
||||
if ENV["INTERACTIVE"]
|
||||
print "Press any key to continue with the upload"
|
||||
STDIN.beep
|
||||
STDIN.getch
|
||||
puts " k"
|
||||
elsif !existing_upload && !Upload.where(url: upload.url).exist?
|
||||
# We're bailing, so clean up the just uploaded file
|
||||
Discourse.store.remove_upload(upload)
|
||||
|
||||
log "⏩ Skipping"
|
||||
return
|
||||
end
|
||||
end
|
||||
|
||||
upload.save!
|
||||
|
||||
if new_file
|
||||
upload.optimized_images.each(&:destroy!)
|
||||
Discourse.store.remove_upload(original_upload)
|
||||
else
|
||||
if existing_upload
|
||||
begin
|
||||
PostUpload.where(upload_id: original_upload.id).update_all(upload_id: upload.id)
|
||||
rescue ActiveRecord::RecordNotUnique, PG::UniqueViolation
|
||||
|
@ -90,85 +191,162 @@ def downsize_upload(upload, path, max_image_pixels)
|
|||
Category.where(uploaded_background_id: original_upload.id).update_all(uploaded_background_id: upload.id)
|
||||
CustomEmoji.where(upload_id: original_upload.id).update_all(upload_id: upload.id)
|
||||
ThemeField.where(upload_id: original_upload.id).update_all(upload_id: upload.id)
|
||||
else
|
||||
upload.optimized_images.each(&:destroy!)
|
||||
end
|
||||
|
||||
original_upload.posts.each do |post|
|
||||
post.raw.gsub!(/upload:\/\/#{original_upload.base62_sha1}(\.#{original_upload.extension})?/, upload.short_url)
|
||||
post.raw.gsub!(Discourse.store.cdn_url(original_upload.url), Discourse.store.cdn_url(upload.url))
|
||||
posts.each do |post|
|
||||
DistributedMutex.synchronize("process_post_#{post.id}") do
|
||||
current_post = Post.unscoped.find(post.id)
|
||||
|
||||
if post.raw_changed?
|
||||
puts "updating post #{post.id}" if ENV["VERBOSE"]
|
||||
post.save!
|
||||
else
|
||||
puts "Could find the upload path in post.raw (post_id: #{post.id})" if ENV["VERBOSE"]
|
||||
# If the post became outdated, reapply changes
|
||||
if current_post.updated_at != post.updated_at
|
||||
transform_post(current_post, original_upload, upload)
|
||||
post = current_post
|
||||
end
|
||||
|
||||
if post.raw_changed?
|
||||
post.update_columns(
|
||||
raw: post.raw,
|
||||
updated_at: Time.zone.now
|
||||
)
|
||||
end
|
||||
|
||||
if existing_upload && post.custom_fields[Post::DOWNLOADED_IMAGES].present?
|
||||
downloaded_images = JSON.parse(post.custom_fields[Post::DOWNLOADED_IMAGES])
|
||||
|
||||
downloaded_images.transform_values! do |upload_id|
|
||||
upload_id == original_upload.id ? upload.id : upload_id
|
||||
end
|
||||
|
||||
post.custom_fields[Post::DOWNLOADED_IMAGES] = downloaded_images.to_json if downloaded_images.present?
|
||||
post.save_custom_fields
|
||||
end
|
||||
|
||||
post.rebake!
|
||||
end
|
||||
|
||||
post.rebake!
|
||||
end
|
||||
|
||||
original_upload.reload.destroy! unless new_file
|
||||
|
||||
puts "" if ENV["VERBOSE"]
|
||||
if existing_upload
|
||||
original_upload.reload.destroy!
|
||||
else
|
||||
Discourse.store.remove_upload(original_upload)
|
||||
end
|
||||
|
||||
true
|
||||
end
|
||||
|
||||
scope = Upload
|
||||
.where("LOWER(extension) IN ('jpg', 'jpeg', 'gif', 'png')")
|
||||
.where("COALESCE(width, 0) = 0 OR COALESCE(height, 0) = 0 OR COALESCE(thumbnail_width, 0) = 0 OR COALESCE(thumbnail_height, 0) = 0 OR width * height > ?", max_image_pixels)
|
||||
|
||||
puts "Uploads to process: #{scope.count}"
|
||||
|
||||
scope.find_each do |upload|
|
||||
print "\rFixed dimensions: %8d Downsized: %8d (upload id: #{upload.id})".freeze % [dimensions_count, downsized_count]
|
||||
puts "\n" if ENV["VERBOSE"]
|
||||
|
||||
source = upload.local? ? Discourse.store.path_for(upload) : "https:#{upload.url}"
|
||||
|
||||
unless source
|
||||
puts "no path or URL" if ENV["VERBOSE"]
|
||||
next
|
||||
def process_uploads
|
||||
unless SiteSetting.Upload.enable_s3_uploads
|
||||
puts "This script supports only S3 uploads"
|
||||
return
|
||||
end
|
||||
|
||||
w, h = FastImage.size(source, timeout: 10)
|
||||
puts "", "Downsizing images to no more than #{MAX_IMAGE_PIXELS} pixels"
|
||||
|
||||
if !w || !h
|
||||
puts "invalid image dimensions" if ENV["VERBOSE"]
|
||||
next
|
||||
dimensions_count = 0
|
||||
downsized_count = 0
|
||||
|
||||
scope = Upload.where("LOWER(extension) IN ('jpg', 'jpeg', 'gif', 'png')")
|
||||
scope = scope.where(<<-SQL, MAX_IMAGE_PIXELS)
|
||||
COALESCE(width, 0) = 0 OR
|
||||
COALESCE(height, 0) = 0 OR
|
||||
COALESCE(thumbnail_width, 0) = 0 OR
|
||||
COALESCE(thumbnail_height, 0) = 0 OR
|
||||
width * height > ?
|
||||
SQL
|
||||
|
||||
if ENV["WORKER_ID"] && ENV["WORKER_COUNT"]
|
||||
scope = scope.where("id % ? = ?", ENV["WORKER_COUNT"], ENV["WORKER_ID"])
|
||||
end
|
||||
|
||||
ww, hh = ImageSizer.resize(w, h)
|
||||
skipped = 0
|
||||
total_count = scope.count
|
||||
puts "Uploads to process: #{total_count}"
|
||||
|
||||
if w == 0 || h == 0 || ww == 0 || hh == 0
|
||||
puts "invalid image dimensions" if ENV["VERBOSE"]
|
||||
next
|
||||
end
|
||||
scope.find_each.with_index do |upload, index|
|
||||
progress = (index * 100.0 / total_count).round(1)
|
||||
|
||||
if upload.read_attribute(:width) != w || upload.read_attribute(:height) != h || upload.read_attribute(:thumbnail_width) != ww || upload.read_attribute(:thumbnail_height) != hh
|
||||
puts "Correcting the upload dimensions" if ENV["VERBOSE"]
|
||||
dimensions_count += 1
|
||||
log "\n"
|
||||
print "\r#{progress}% Fixed dimensions: #{dimensions_count} Downsized: #{downsized_count} Skipped: #{skipped} (upload id: #{upload.id})"
|
||||
log "\n"
|
||||
|
||||
upload.update!(
|
||||
source = upload.local? ? Discourse.store.path_for(upload) : "https:#{upload.url}"
|
||||
|
||||
unless source
|
||||
log "No path or URL"
|
||||
skipped += 1
|
||||
next
|
||||
end
|
||||
|
||||
begin
|
||||
w, h = FastImage.size(source, timeout: 15, raise_on_failure: true)
|
||||
rescue FastImage::ImageFetchFailure
|
||||
log "Retrying image resizing"
|
||||
w, h = FastImage.size(source, timeout: 15)
|
||||
rescue FastImage::UnknownImageType
|
||||
log "Unknown image type"
|
||||
skipped += 1
|
||||
next
|
||||
rescue FastImage::SizeNotFound
|
||||
log "Size not found"
|
||||
skipped += 1
|
||||
next
|
||||
end
|
||||
|
||||
if !w || !h
|
||||
log "Invalid image dimensions"
|
||||
skipped += 1
|
||||
next
|
||||
end
|
||||
|
||||
ww, hh = ImageSizer.resize(w, h)
|
||||
|
||||
if w == 0 || h == 0 || ww == 0 || hh == 0
|
||||
log "Invalid image dimensions"
|
||||
skipped += 1
|
||||
next
|
||||
end
|
||||
|
||||
upload.attributes = {
|
||||
width: w,
|
||||
height: h,
|
||||
thumbnail_width: ww,
|
||||
thumbnail_height: hh,
|
||||
)
|
||||
thumbnail_height: hh
|
||||
}
|
||||
|
||||
if upload.changed?
|
||||
log "Correcting the upload dimensions"
|
||||
log "Before: #{upload.width_was}x#{upload.height_was} #{upload.thumbnail_width_was}x#{upload.thumbnail_height_was}"
|
||||
log "After: #{w}x#{h} #{ww}x#{hh}"
|
||||
|
||||
dimensions_count += 1
|
||||
upload.save!
|
||||
end
|
||||
|
||||
if w * h < MAX_IMAGE_PIXELS
|
||||
log "Image size within allowed range"
|
||||
skipped += 1
|
||||
next
|
||||
end
|
||||
|
||||
path = upload.local? ? source : (Discourse.store.download(upload) rescue nil)&.path
|
||||
|
||||
unless path
|
||||
log "No image path"
|
||||
skipped += 1
|
||||
next
|
||||
end
|
||||
|
||||
if downsize_upload(upload, path)
|
||||
downsized_count += 1
|
||||
else
|
||||
skipped += 1
|
||||
end
|
||||
end
|
||||
|
||||
if w * h < max_image_pixels
|
||||
puts "image size within allowed range" if ENV["VERBOSE"]
|
||||
next
|
||||
end
|
||||
|
||||
path = upload.local? ? source : (Discourse.store.download(upload) rescue nil)&.path
|
||||
|
||||
unless path
|
||||
puts "no image path" if ENV["VERBOSE"]
|
||||
next
|
||||
end
|
||||
|
||||
downsized_count += 1 if downsize_upload(upload, path, max_image_pixels)
|
||||
STDIN.beep
|
||||
puts "", "Done", Time.zone.now
|
||||
end
|
||||
|
||||
puts "", "Done"
|
||||
process_uploads
|
||||
|
|
Loading…
Reference in New Issue
Block a user