2013-06-15 17:29:20 +08:00
|
|
|
require "digest/sha1"
|
|
|
|
|
2016-04-12 02:42:40 +08:00
|
|
|
################################################################################
|
|
|
|
# gather #
|
|
|
|
################################################################################
|
|
|
|
|
|
|
|
task "uploads:gather" => :environment do
|
|
|
|
require "db_helper"
|
|
|
|
|
2016-04-12 22:00:25 +08:00
|
|
|
ENV["RAILS_DB"] ? gather_uploads : gather_uploads_for_all_sites
|
|
|
|
end
|
|
|
|
|
|
|
|
def gather_uploads_for_all_sites
|
|
|
|
RailsMultisite::ConnectionManagement.each_connection { gather_uploads }
|
|
|
|
end
|
|
|
|
|
2016-04-13 22:33:00 +08:00
|
|
|
def file_exists?(path)
|
|
|
|
File.exists?(path) && File.size(path) > 0
|
|
|
|
rescue
|
|
|
|
false
|
|
|
|
end
|
|
|
|
|
2016-04-12 22:00:25 +08:00
|
|
|
def gather_uploads
|
2016-04-12 02:42:40 +08:00
|
|
|
public_directory = "#{Rails.root}/public"
|
|
|
|
current_db = RailsMultisite::ConnectionManagement.current_db
|
|
|
|
|
|
|
|
puts "", "Gathering uploads for '#{current_db}'...", ""
|
|
|
|
|
2016-04-12 03:17:33 +08:00
|
|
|
Upload.where("url ~ '^\/uploads\/'")
|
|
|
|
.where("url !~ '^\/uploads\/#{current_db}'")
|
|
|
|
.find_each do |upload|
|
2016-04-12 02:42:40 +08:00
|
|
|
begin
|
|
|
|
old_db = upload.url[/^\/uploads\/([^\/]+)\//, 1]
|
|
|
|
from = upload.url.dup
|
|
|
|
to = upload.url.sub("/uploads/#{old_db}/", "/uploads/#{current_db}/")
|
|
|
|
source = "#{public_directory}#{from}"
|
|
|
|
destination = "#{public_directory}#{to}"
|
|
|
|
|
2016-04-13 22:33:00 +08:00
|
|
|
# create destination directory & copy file unless it already exists
|
|
|
|
unless file_exists?(destination)
|
|
|
|
`mkdir -p '#{File.dirname(destination)}'`
|
|
|
|
`cp --link '#{source}' '#{destination}'`
|
|
|
|
end
|
|
|
|
|
2016-04-12 02:42:40 +08:00
|
|
|
# ensure file has been succesfuly copied over
|
2016-04-13 22:33:00 +08:00
|
|
|
raise unless file_exists?(destination)
|
|
|
|
|
2016-04-12 02:42:40 +08:00
|
|
|
# remap links in db
|
|
|
|
DbHelper.remap(from, to)
|
|
|
|
rescue
|
|
|
|
putc "!"
|
|
|
|
else
|
|
|
|
putc "."
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
puts "", "Done!"
|
|
|
|
|
|
|
|
end
|
|
|
|
|
2015-05-25 23:59:00 +08:00
|
|
|
################################################################################
|
|
|
|
# backfill_shas #
|
|
|
|
################################################################################
|
|
|
|
|
2013-06-15 17:29:20 +08:00
|
|
|
task "uploads:backfill_shas" => :environment do
|
|
|
|
RailsMultisite::ConnectionManagement.each_connection do |db|
|
2015-06-10 23:19:58 +08:00
|
|
|
puts "Backfilling #{db}..."
|
|
|
|
Upload.where(sha1: nil).find_each do |u|
|
|
|
|
begin
|
|
|
|
path = Discourse.store.path_for(u)
|
|
|
|
u.sha1 = Digest::SHA1.file(path).hexdigest
|
|
|
|
u.save!
|
2013-06-15 17:29:20 +08:00
|
|
|
putc "."
|
2016-08-23 15:05:37 +08:00
|
|
|
rescue => e
|
2016-08-29 10:30:10 +08:00
|
|
|
puts "Skipping #{u.original_filename} (#{u.url}) #{e.message}"
|
2013-06-15 17:29:20 +08:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2015-06-10 23:19:58 +08:00
|
|
|
puts "", "Done"
|
2013-06-15 17:29:20 +08:00
|
|
|
end
|
2014-06-24 21:35:15 +08:00
|
|
|
|
2015-05-25 23:59:00 +08:00
|
|
|
################################################################################
|
|
|
|
# migrate_from_s3 #
|
|
|
|
################################################################################
|
|
|
|
|
2014-06-24 21:35:15 +08:00
|
|
|
task "uploads:migrate_from_s3" => :environment do
|
2016-07-25 18:12:10 +08:00
|
|
|
require "db_helper"
|
2014-06-24 21:35:15 +08:00
|
|
|
|
2016-07-25 18:12:10 +08:00
|
|
|
ENV["RAILS_DB"] ? migrate_from_s3 : migrate_all_from_s3
|
|
|
|
end
|
2014-06-24 21:35:15 +08:00
|
|
|
|
2016-07-25 18:12:10 +08:00
|
|
|
def guess_filename(url, raw)
|
|
|
|
begin
|
|
|
|
uri = URI.parse("http:#{url}")
|
|
|
|
f = uri.open("rb", read_timeout: 5, redirect: true, allow_redirections: :all)
|
|
|
|
filename = if f.meta && f.meta["content-disposition"]
|
|
|
|
f.meta["content-disposition"][/filename="([^"]+)"/, 1].presence
|
|
|
|
end
|
|
|
|
filename ||= raw[/<a class="attachment" href="(?:https?:)?#{Regexp.escape(url)}">([^<]+)<\/a>/, 1].presence
|
|
|
|
filename ||= File.basename(url)
|
|
|
|
filename
|
|
|
|
rescue
|
|
|
|
nil
|
|
|
|
ensure
|
|
|
|
f.try(:close!) rescue nil
|
|
|
|
end
|
|
|
|
end
|
2015-03-19 01:23:55 +08:00
|
|
|
|
2016-07-25 18:12:10 +08:00
|
|
|
def migrate_all_from_s3
|
|
|
|
RailsMultisite::ConnectionManagement.each_connection { migrate_from_s3 }
|
|
|
|
end
|
2015-03-19 01:23:55 +08:00
|
|
|
|
2016-07-25 18:12:10 +08:00
|
|
|
def migrate_from_s3
|
|
|
|
require "file_store/s3_store"
|
2014-06-24 21:35:15 +08:00
|
|
|
|
2016-07-25 18:12:10 +08:00
|
|
|
# make sure S3 is disabled
|
|
|
|
if SiteSetting.enable_s3_uploads
|
|
|
|
puts "You must disable S3 uploads before running that task."
|
|
|
|
return
|
|
|
|
end
|
2014-06-24 21:35:15 +08:00
|
|
|
|
2016-07-25 18:12:10 +08:00
|
|
|
# make sure S3 bucket is set
|
|
|
|
if SiteSetting.s3_upload_bucket.blank?
|
|
|
|
puts "The S3 upload bucket must be set before running that task."
|
|
|
|
return
|
|
|
|
end
|
2014-06-24 21:35:15 +08:00
|
|
|
|
2016-07-25 18:12:10 +08:00
|
|
|
db = RailsMultisite::ConnectionManagement.current_db
|
2014-06-24 21:35:15 +08:00
|
|
|
|
2016-07-25 18:12:10 +08:00
|
|
|
puts "Migrating uploads from S3 to local storage for '#{db}'..."
|
2014-06-24 21:35:15 +08:00
|
|
|
|
2016-07-25 18:12:10 +08:00
|
|
|
s3_base_url = FileStore::S3Store.new.absolute_base_url
|
|
|
|
max_file_size_kb = [SiteSetting.max_image_size_kb, SiteSetting.max_attachment_size_kb].max.kilobytes
|
2014-06-24 21:35:15 +08:00
|
|
|
|
2016-07-25 18:12:10 +08:00
|
|
|
Post.unscoped.find_each do |post|
|
|
|
|
if post.raw[s3_base_url]
|
|
|
|
post.raw.scan(/(#{Regexp.escape(s3_base_url)}\/(\d+)(\h{40})\.\w+)/).each do |url, id, sha|
|
|
|
|
begin
|
|
|
|
puts "POST ID: #{post.id}"
|
|
|
|
puts "UPLOAD ID: #{id}"
|
|
|
|
puts "UPLOAD SHA: #{sha}"
|
|
|
|
puts "UPLOAD URL: #{url}"
|
|
|
|
if filename = guess_filename(url, post.raw)
|
|
|
|
puts "FILENAME: #{filename}"
|
|
|
|
file = FileHelper.download("http:#{url}", 20.megabytes, "from_s3", true)
|
|
|
|
if upload = Upload.create_for(post.user_id || -1, file, filename, File.size(file))
|
|
|
|
post.raw = post.raw.gsub(/(https?:)?#{Regexp.escape(url)}/, upload.url)
|
|
|
|
post.save
|
|
|
|
post.rebake!
|
|
|
|
puts "OK :)"
|
|
|
|
else
|
|
|
|
puts "KO :("
|
|
|
|
end
|
|
|
|
puts post.full_url, ""
|
|
|
|
else
|
|
|
|
puts "NO FILENAME :("
|
|
|
|
end
|
|
|
|
rescue => e
|
|
|
|
puts "EXCEPTION: #{e.message}"
|
|
|
|
end
|
|
|
|
end
|
2014-06-24 21:35:15 +08:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2016-07-25 18:12:10 +08:00
|
|
|
puts "Done!"
|
2014-06-24 21:35:15 +08:00
|
|
|
end
|
2014-09-30 00:31:53 +08:00
|
|
|
|
2015-05-25 23:59:00 +08:00
|
|
|
################################################################################
|
|
|
|
# migrate_to_s3 #
|
|
|
|
################################################################################
|
|
|
|
|
|
|
|
task "uploads:migrate_to_s3" => :environment do
|
|
|
|
require "file_store/s3_store"
|
|
|
|
require "file_store/local_store"
|
2015-06-12 18:02:36 +08:00
|
|
|
require "db_helper"
|
2015-05-25 23:59:00 +08:00
|
|
|
|
|
|
|
ENV["RAILS_DB"] ? migrate_to_s3 : migrate_to_s3_all_sites
|
|
|
|
end
|
|
|
|
|
|
|
|
def migrate_to_s3_all_sites
|
|
|
|
RailsMultisite::ConnectionManagement.each_connection { migrate_to_s3 }
|
|
|
|
end
|
|
|
|
|
|
|
|
def migrate_to_s3
|
|
|
|
# make sure s3 is enabled
|
|
|
|
if !SiteSetting.enable_s3_uploads
|
|
|
|
puts "You must enable s3 uploads before running that task"
|
|
|
|
return
|
|
|
|
end
|
|
|
|
|
|
|
|
db = RailsMultisite::ConnectionManagement.current_db
|
|
|
|
|
|
|
|
puts "Migrating uploads to S3 (#{SiteSetting.s3_upload_bucket}) for '#{db}'..."
|
|
|
|
|
|
|
|
# will throw an exception if the bucket is missing
|
|
|
|
s3 = FileStore::S3Store.new
|
|
|
|
local = FileStore::LocalStore.new
|
|
|
|
|
|
|
|
# Migrate all uploads
|
|
|
|
Upload.where.not(sha1: nil)
|
|
|
|
.where("url NOT LIKE '#{s3.absolute_base_url}%'")
|
|
|
|
.find_each do |upload|
|
|
|
|
# remove invalid uploads
|
|
|
|
if upload.url.blank?
|
|
|
|
upload.destroy!
|
|
|
|
next
|
|
|
|
end
|
|
|
|
# store the old url
|
|
|
|
from = upload.url
|
|
|
|
# retrieve the path to the local file
|
|
|
|
path = local.path_for(upload)
|
|
|
|
# make sure the file exists locally
|
2015-11-16 18:39:38 +08:00
|
|
|
if !path or !File.exists?(path)
|
2015-05-25 23:59:00 +08:00
|
|
|
putc "X"
|
|
|
|
next
|
|
|
|
end
|
|
|
|
|
|
|
|
begin
|
|
|
|
file = File.open(path)
|
|
|
|
content_type = `file --mime-type -b #{path}`.strip
|
|
|
|
to = s3.store_upload(file, upload, content_type)
|
|
|
|
rescue
|
|
|
|
putc "X"
|
|
|
|
next
|
|
|
|
ensure
|
|
|
|
file.try(:close!) rescue nil
|
|
|
|
end
|
|
|
|
|
|
|
|
# remap the URL
|
2015-06-12 18:02:36 +08:00
|
|
|
DbHelper.remap(from, to)
|
2015-05-25 23:59:00 +08:00
|
|
|
|
|
|
|
putc "."
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
################################################################################
|
|
|
|
# clean_up #
|
|
|
|
################################################################################
|
|
|
|
|
2014-09-30 00:31:53 +08:00
|
|
|
task "uploads:clean_up" => :environment do
|
|
|
|
|
|
|
|
RailsMultisite::ConnectionManagement.each_connection do |db|
|
|
|
|
puts "Cleaning up uploads and thumbnails for '#{db}'..."
|
|
|
|
|
|
|
|
if Discourse.store.external?
|
|
|
|
puts "This task only works for internal storages."
|
|
|
|
next
|
|
|
|
end
|
|
|
|
|
|
|
|
public_directory = "#{Rails.root}/public"
|
|
|
|
|
|
|
|
##
|
|
|
|
## DATABASE vs FILE SYSTEM
|
|
|
|
##
|
|
|
|
|
|
|
|
# uploads & avatars
|
2015-05-12 15:37:48 +08:00
|
|
|
Upload.find_each do |upload|
|
2014-09-30 00:31:53 +08:00
|
|
|
path = "#{public_directory}#{upload.url}"
|
|
|
|
if !File.exists?(path)
|
|
|
|
upload.destroy rescue nil
|
|
|
|
putc "#"
|
|
|
|
else
|
|
|
|
putc "."
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
# optimized images
|
2015-05-12 15:37:48 +08:00
|
|
|
OptimizedImage.find_each do |optimized_image|
|
2014-09-30 00:31:53 +08:00
|
|
|
path = "#{public_directory}#{optimized_image.url}"
|
|
|
|
if !File.exists?(path)
|
|
|
|
optimized_image.destroy rescue nil
|
|
|
|
putc "#"
|
|
|
|
else
|
|
|
|
putc "."
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
##
|
|
|
|
## FILE SYSTEM vs DATABASE
|
|
|
|
##
|
|
|
|
|
|
|
|
uploads_directory = "#{public_directory}/uploads/#{db}"
|
|
|
|
|
|
|
|
# avatars (no avatar should be stored in that old directory)
|
|
|
|
FileUtils.rm_rf("#{uploads_directory}/avatars") rescue nil
|
|
|
|
|
|
|
|
# uploads
|
|
|
|
Dir.glob("#{uploads_directory}/*/*.*").each do |f|
|
|
|
|
url = "/uploads/#{db}/" << f.split("/uploads/#{db}/")[1]
|
|
|
|
if !Upload.where(url: url).exists?
|
|
|
|
FileUtils.rm(f) rescue nil
|
|
|
|
putc "#"
|
|
|
|
else
|
|
|
|
putc "."
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
# optimized images
|
|
|
|
Dir.glob("#{uploads_directory}/_optimized/*/*/*.*").each do |f|
|
|
|
|
url = "/uploads/#{db}/_optimized/" << f.split("/uploads/#{db}/_optimized/")[1]
|
|
|
|
if !OptimizedImage.where(url: url).exists?
|
|
|
|
FileUtils.rm(f) rescue nil
|
|
|
|
putc "#"
|
|
|
|
else
|
|
|
|
putc "."
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
puts
|
|
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
end
|
2015-05-11 08:30:22 +08:00
|
|
|
|
2015-05-25 23:59:00 +08:00
|
|
|
################################################################################
|
|
|
|
# missing #
|
|
|
|
################################################################################
|
2015-05-11 08:30:22 +08:00
|
|
|
|
|
|
|
# list all missing uploads and optimized images
|
|
|
|
task "uploads:missing" => :environment do
|
|
|
|
|
|
|
|
public_directory = "#{Rails.root}/public"
|
|
|
|
|
|
|
|
RailsMultisite::ConnectionManagement.each_connection do |db|
|
|
|
|
|
|
|
|
if Discourse.store.external?
|
|
|
|
puts "This task only works for internal storages."
|
|
|
|
next
|
|
|
|
end
|
|
|
|
|
|
|
|
|
2015-05-12 15:37:48 +08:00
|
|
|
Upload.find_each do |upload|
|
2015-05-11 08:30:22 +08:00
|
|
|
|
|
|
|
# could be a remote image
|
2015-05-12 15:28:43 +08:00
|
|
|
next unless upload.url =~ /^\/[^\/]/
|
2015-05-11 08:30:22 +08:00
|
|
|
|
|
|
|
path = "#{public_directory}#{upload.url}"
|
|
|
|
bad = true
|
|
|
|
begin
|
|
|
|
bad = false if File.size(path) != 0
|
|
|
|
rescue
|
|
|
|
# something is messed up
|
|
|
|
end
|
|
|
|
puts path if bad
|
|
|
|
end
|
|
|
|
|
2015-05-12 15:37:48 +08:00
|
|
|
OptimizedImage.find_each do |optimized_image|
|
2015-05-11 08:30:22 +08:00
|
|
|
|
|
|
|
# remote?
|
2015-05-12 15:28:43 +08:00
|
|
|
next unless optimized_image.url =~ /^\/[^\/]/
|
2015-05-11 08:30:22 +08:00
|
|
|
|
|
|
|
path = "#{public_directory}#{optimized_image.url}"
|
|
|
|
|
|
|
|
bad = true
|
|
|
|
begin
|
|
|
|
bad = false if File.size(path) != 0
|
|
|
|
rescue
|
|
|
|
# something is messed up
|
|
|
|
end
|
|
|
|
puts path if bad
|
|
|
|
end
|
|
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
end
|
2015-05-11 18:59:50 +08:00
|
|
|
|
2015-05-25 23:59:00 +08:00
|
|
|
################################################################################
|
|
|
|
# regenerate_missing_optimized #
|
|
|
|
################################################################################
|
|
|
|
|
2015-05-11 18:59:50 +08:00
|
|
|
# regenerate missing optimized images
|
|
|
|
task "uploads:regenerate_missing_optimized" => :environment do
|
2016-08-19 15:52:09 +08:00
|
|
|
regenerate_missing_optimized
|
2015-05-11 22:19:16 +08:00
|
|
|
end
|
|
|
|
|
2016-08-19 15:52:09 +08:00
|
|
|
task "uploads:regenerate_missing_optimized_all_sites" => :environment do
|
2015-05-11 22:19:16 +08:00
|
|
|
RailsMultisite::ConnectionManagement.each_connection { regenerate_missing_optimized }
|
|
|
|
end
|
|
|
|
|
|
|
|
def regenerate_missing_optimized
|
2015-05-12 01:07:39 +08:00
|
|
|
db = RailsMultisite::ConnectionManagement.current_db
|
|
|
|
|
|
|
|
puts "Regenerating missing optimized images for '#{db}'..."
|
2015-05-11 18:59:50 +08:00
|
|
|
|
|
|
|
if Discourse.store.external?
|
|
|
|
puts "This task only works for internal storages."
|
|
|
|
return
|
|
|
|
end
|
|
|
|
|
|
|
|
public_directory = "#{Rails.root}/public"
|
|
|
|
missing_uploads = Set.new
|
|
|
|
|
2016-08-25 18:29:52 +08:00
|
|
|
avatar_upload_ids = UserAvatar.all.pluck(:custom_upload_id, :gravatar_upload_id).flatten.compact
|
2015-05-11 18:59:50 +08:00
|
|
|
|
2016-08-25 18:29:52 +08:00
|
|
|
default_scope = OptimizedImage.includes(:upload)
|
2015-05-12 01:07:39 +08:00
|
|
|
|
2016-08-25 18:29:52 +08:00
|
|
|
[
|
|
|
|
default_scope
|
|
|
|
.where("optimized_images.upload_id IN (?)", avatar_upload_ids),
|
2015-05-11 21:41:52 +08:00
|
|
|
|
2016-08-25 18:29:52 +08:00
|
|
|
default_scope
|
|
|
|
.where("optimized_images.upload_id NOT IN (?)", avatar_upload_ids)
|
|
|
|
.where("LENGTH(COALESCE(url, '')) > 0")
|
|
|
|
.where("width > 0 AND height > 0")
|
|
|
|
].each do |scope|
|
|
|
|
scope.find_each do |optimized_image|
|
|
|
|
upload = optimized_image.upload
|
2015-05-11 18:59:50 +08:00
|
|
|
|
2016-08-25 18:29:52 +08:00
|
|
|
next unless optimized_image.url =~ /^\/[^\/]/
|
|
|
|
next unless upload.url =~ /^\/[^\/]/
|
|
|
|
|
|
|
|
thumbnail = "#{public_directory}#{optimized_image.url}"
|
|
|
|
original = "#{public_directory}#{upload.url}"
|
|
|
|
|
|
|
|
if !File.exists?(thumbnail) || File.size(thumbnail) <= 0
|
|
|
|
# make sure the original image exists locally
|
|
|
|
if (!File.exists?(original) || File.size(original) <= 0) && upload.origin.present?
|
|
|
|
# try to fix it by redownloading it
|
|
|
|
begin
|
|
|
|
downloaded = FileHelper.download(upload.origin, SiteSetting.max_image_size_kb.kilobytes, "discourse-missing", true) rescue nil
|
|
|
|
if downloaded && downloaded.size > 0
|
|
|
|
FileUtils.mkdir_p(File.dirname(original))
|
|
|
|
File.open(original, "wb") { |f| f.write(downloaded.read) }
|
|
|
|
end
|
|
|
|
ensure
|
|
|
|
downloaded.try(:close!) if downloaded.respond_to?(:close!)
|
2015-05-12 01:07:39 +08:00
|
|
|
end
|
2015-05-11 23:03:48 +08:00
|
|
|
end
|
|
|
|
|
2016-08-25 18:29:52 +08:00
|
|
|
if File.exists?(original) && File.size(original) > 0
|
|
|
|
FileUtils.mkdir_p(File.dirname(thumbnail))
|
|
|
|
OptimizedImage.resize(original, thumbnail, optimized_image.width, optimized_image.height)
|
|
|
|
putc "#"
|
|
|
|
else
|
|
|
|
missing_uploads << original
|
|
|
|
putc "X"
|
|
|
|
end
|
2015-05-11 18:59:50 +08:00
|
|
|
else
|
2016-08-25 18:29:52 +08:00
|
|
|
putc "."
|
2015-05-11 18:59:50 +08:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
puts "", "Done"
|
|
|
|
|
|
|
|
if missing_uploads.size > 0
|
|
|
|
puts "Missing uploads:"
|
|
|
|
missing_uploads.sort.each { |u| puts u }
|
|
|
|
end
|
|
|
|
end
|
2015-05-19 18:31:51 +08:00
|
|
|
|
2015-05-25 23:59:00 +08:00
|
|
|
################################################################################
|
2015-06-12 18:02:36 +08:00
|
|
|
# migrate_to_new_scheme #
|
2015-05-25 23:59:00 +08:00
|
|
|
################################################################################
|
|
|
|
|
2015-06-12 18:02:36 +08:00
|
|
|
task "uploads:start_migration" => :environment do
|
|
|
|
SiteSetting.migrate_to_new_scheme = true
|
|
|
|
puts "Migration started!"
|
2015-05-19 18:31:51 +08:00
|
|
|
end
|
|
|
|
|
2015-06-12 18:02:36 +08:00
|
|
|
task "uploads:stop_migration" => :environment do
|
|
|
|
SiteSetting.migrate_to_new_scheme = false
|
|
|
|
puts "Migration stoped!"
|
2015-05-19 18:31:51 +08:00
|
|
|
end
|