discourse/lib/tasks/posts.rake
Gerhard Schlager 241bf48497 DEV: Allow rebakes to generate optimized images at the same time
Previously only Sidekiq was allowed to generate more than one optimized image at the same time per machine. This adds an easy mechanism to allow the same in rake tasks and other tools.
2024-01-16 14:33:16 +01:00

757 lines
21 KiB
Ruby

# frozen_string_literal: true
require "file_store/local_store"
desc "Update each post with latest markdown"
task "posts:rebake" => :environment do
ENV["RAILS_DB"] ? rebake_posts : rebake_posts_all_sites
end
task "posts:rebake_uncooked_posts" => :environment do
# rebaking uncooked posts can very quickly saturate sidekiq
# this provides an insurance policy so you can safely run and stop
# this rake task without worrying about your sidekiq imploding
Jobs.run_immediately!
# don't lock per machine, we want to be able to run this from multiple consoles
OptimizedImage.lock_per_machine = false
ENV["RAILS_DB"] ? rebake_uncooked_posts : rebake_uncooked_posts_all_sites
end
def rebake_uncooked_posts_all_sites
RailsMultisite::ConnectionManagement.each_connection { |db| rebake_uncooked_posts }
end
def rebake_uncooked_posts
puts "Rebaking uncooked posts on #{RailsMultisite::ConnectionManagement.current_db}"
uncooked = Post.where("baked_version <> ? or baked_version IS NULL", Post::BAKED_VERSION)
rebaked = 0
total = uncooked.count
ids = uncooked.pluck(:id)
# work randomly so you can run this job from lots of consoles if needed
ids.shuffle!
ids.each do |id|
# may have been cooked in interim
post = uncooked.where(id: id).first
rebake_post(post) if post
print_status(rebaked += 1, total)
end
puts "", "#{rebaked} posts done!", ""
end
desc "Update each post with latest markdown and refresh oneboxes"
task "posts:refresh_oneboxes" => :environment do
if ENV["RAILS_DB"]
rebake_posts(invalidate_oneboxes: true)
else
rebake_posts_all_sites(invalidate_oneboxes: true)
end
end
desc "Rebake all posts with a quote using a letter_avatar"
task "posts:fix_letter_avatars" => :environment do
next unless SiteSetting.external_system_avatars_enabled
search =
Post.where("user_id <> -1").where(
"raw LIKE '%/letter\_avatar/%' OR cooked LIKE '%/letter\_avatar/%'",
)
rebaked = 0
total = search.count
search.find_each do |post|
rebake_post(post)
print_status(rebaked += 1, total)
end
puts "", "#{rebaked} posts done!", ""
end
desc "Rebake all posts matching string/regex and optionally delay the loop"
task "posts:rebake_match", %i[pattern type delay] => [:environment] do |_, args|
args.with_defaults(type: "string")
pattern = args[:pattern]
type = args[:type]&.downcase
delay = args[:delay]&.to_i
if !pattern
puts "ERROR: Expecting rake posts:rebake_match[pattern,type,delay]"
exit 1
elsif delay && delay < 1
puts "ERROR: delay parameter should be an integer and greater than 0"
exit 1
elsif type != "string" && type != "regex"
puts "ERROR: Expecting rake posts:rebake_match[pattern,type] where type is string or regex"
exit 1
end
search = Post.raw_match(pattern, type)
rebaked = 0
total = search.count
search.find_each do |post|
rebake_post(post)
print_status(rebaked += 1, total)
sleep(delay) if delay
end
puts "", "#{rebaked} posts done!", ""
end
def rebake_posts_all_sites(opts = {})
RailsMultisite::ConnectionManagement.each_connection { |db| rebake_posts(opts) }
end
def rebake_posts(opts = {})
puts "Rebaking post markdown for '#{RailsMultisite::ConnectionManagement.current_db}'"
begin
disable_system_edit_notifications = SiteSetting.disable_system_edit_notifications
SiteSetting.disable_system_edit_notifications = true
total = Post.count
rebaked = 0
batch = 1000
Post.update_all("baked_version = NULL")
(0..(total - 1).abs).step(batch) do |i|
Post
.order(id: :desc)
.offset(i)
.limit(batch)
.each do |post|
rebake_post(post, opts)
print_status(rebaked += 1, total)
end
end
ensure
SiteSetting.disable_system_edit_notifications = disable_system_edit_notifications
end
puts "", "#{rebaked} posts done!", "-" * 50
end
def rebake_post(post, opts = {})
opts[:priority] = :ultra_low if !opts[:priority]
post.rebake!(**opts)
rescue => e
puts "",
"Failed to rebake (topic_id: #{post.topic_id}, post_id: #{post.id})",
e,
e.backtrace.join("\n")
end
def print_status(current, max)
print "\r%9d / %d (%5.1f%%)" % [current, max, ((current.to_f / max.to_f) * 100).round(1)]
end
desc "normalize all markdown so <pre><code> is not used and instead backticks"
task "posts:normalize_code" => :environment do
lang = ENV["CODE_LANG"] || ""
require "import/normalize"
puts "Normalizing"
i = 0
Post
.where("raw like '%<pre>%<code>%'")
.each do |p|
normalized = Import::Normalize.normalize_code_blocks(p.raw, lang)
if normalized != p.raw
p.revise(Discourse.system_user, raw: normalized)
putc "."
i += 1
end
end
puts
puts "#{i} posts normalized!"
end
def remap_posts(find, type, ignore_case, replace = "")
ignore_case = ignore_case == "true"
i = 0
Post
.raw_match(find, type)
.find_each do |p|
regex =
case type
when "string"
Regexp.new(Regexp.escape(find), ignore_case)
when "regex"
Regexp.new(find, ignore_case)
end
new_raw = p.raw.gsub(regex, replace)
if new_raw != p.raw
begin
p.revise(Discourse.system_user, { raw: new_raw }, bypass_bump: true, skip_revision: true)
putc "."
i += 1
rescue StandardError
puts "\nFailed to remap post (topic_id: #{p.topic_id}, post_id: #{p.id})\n"
end
end
end
i
end
desc "Remap all posts matching specific string"
task "posts:remap", %i[find replace type ignore_case] => [:environment] do |_, args|
require "highline/import"
args.with_defaults(type: "string", ignore_case: "false")
find = args[:find]
replace = args[:replace]
type = args[:type]&.downcase
ignore_case = args[:ignore_case]&.downcase
if !find
puts "ERROR: Expecting rake posts:remap['find','replace']"
exit 1
elsif !replace
puts "ERROR: Expecting rake posts:remap['find','replace']. Want to delete a word/string instead? Try rake posts:delete_word['word-to-delete']"
exit 1
elsif type != "string" && type != "regex"
puts "ERROR: Expecting rake posts:remap['find','replace',type] where type is string or regex"
exit 1
elsif ignore_case != "true" && ignore_case != "false"
puts "ERROR: Expecting rake posts:remap['find','replace',type,ignore_case] where ignore_case is true or false"
exit 1
else
confirm_replace =
ask(
"Are you sure you want to replace all #{type} occurrences of '#{find}' with '#{replace}'? (Y/n)",
)
exit 1 unless (confirm_replace == "" || confirm_replace.downcase == "y")
end
puts "Remapping"
total = remap_posts(find, type, ignore_case, replace)
puts "", "#{total} posts remapped!", ""
end
desc "Delete occurrence of a word/string"
task "posts:delete_word", %i[find type ignore_case] => [:environment] do |_, args|
require "highline/import"
args.with_defaults(type: "string", ignore_case: "false")
find = args[:find]
type = args[:type]&.downcase
ignore_case = args[:ignore_case]&.downcase
if !find
puts "ERROR: Expecting rake posts:delete_word['word-to-delete']"
exit 1
elsif type != "string" && type != "regex"
puts "ERROR: Expecting rake posts:delete_word[pattern, type] where type is string or regex"
exit 1
elsif ignore_case != "true" && ignore_case != "false"
puts "ERROR: Expecting rake posts:delete_word[pattern, type,ignore_case] where ignore_case is true or false"
exit 1
else
confirm_delete =
ask("Are you sure you want to remove all #{type} occurrences of '#{find}'? (Y/n)")
exit 1 unless (confirm_delete == "" || confirm_delete.downcase == "y")
end
puts "Processing"
total = remap_posts(find, type, ignore_case)
puts "", "#{total} posts updated!", ""
end
desc "Delete all likes"
task "posts:delete_all_likes" => :environment do
post_actions = PostAction.where(post_action_type_id: PostActionType.types[:like])
likes_deleted = 0
total = post_actions.count
post_actions.each do |post_action|
begin
post_action.remove_act!(Discourse.system_user)
print_status(likes_deleted += 1, total)
rescue StandardError
# skip
end
end
UserStat.update_all(likes_given: 0, likes_received: 0) # clear user likes stats
DirectoryItem.update_all(likes_given: 0, likes_received: 0) # clear user directory likes stats
puts "", "#{likes_deleted} likes deleted!", ""
end
desc "Refreshes each post that was received via email"
task "posts:refresh_emails", [:topic_id] => [:environment] do |_, args|
posts = Post.where.not(raw_email: nil).where(via_email: true)
posts = posts.where(topic_id: args[:topic_id]) if args[:topic_id]
updated = 0
total = posts.count
posts.find_each do |post|
begin
receiver = Email::Receiver.new(post.raw_email)
body, elided = receiver.select_body
body = receiver.add_attachments(body || "", post.user)
body << Email::Receiver.elided_html(elided) if elided.present?
post.revise(
Discourse.system_user,
{ raw: body, cook_method: Post.cook_methods[:regular] },
skip_revision: true,
skip_validations: true,
bypass_bump: true,
)
rescue StandardError
puts "Failed to refresh post (topic_id: #{post.topic_id}, post_id: #{post.id})"
end
updated += 1
print_status(updated, total)
end
puts "", "Done. #{updated} posts updated.", ""
end
desc "Reorders all posts based on their creation_date"
task "posts:reorder_posts", [:topic_id] => [:environment] do |_, args|
Post.transaction do
builder = DB.build <<~SQL
WITH ordered_posts AS (
SELECT
id,
ROW_NUMBER() OVER (
PARTITION BY
topic_id
ORDER BY
created_at,
post_number
) AS new_post_number
FROM
posts
/*where*/
)
UPDATE
posts AS p
SET
sort_order = o.new_post_number,
post_number = p.post_number * -1
FROM
ordered_posts AS o
WHERE
p.id = o.id AND
p.post_number <> o.new_post_number
SQL
builder.where("topic_id = ?", args[:topic_id]) if args[:topic_id]
builder.exec
[
%w[notifications post_number],
%w[post_timings post_number],
%w[posts reply_to_post_number],
%w[topic_users last_read_post_number],
%w[topic_users last_emailed_post_number],
].each do |table, column|
builder = DB.build <<~SQL
UPDATE
#{table} AS x
SET
#{column} = p.sort_order * -1
FROM
posts AS p
/*where*/
SQL
builder.where("p.topic_id = ?", args[:topic_id]) if args[:topic_id]
builder.where("p.post_number < 0")
builder.where("x.topic_id = p.topic_id")
builder.where("x.#{column} = ABS(p.post_number)")
builder.exec
DB.exec <<~SQL
UPDATE
#{table}
SET
#{column} = #{column} * -1
WHERE
#{column} < 0
SQL
end
builder = DB.build <<~SQL
UPDATE
posts
SET
post_number = sort_order
/*where*/
SQL
builder.where("topic_id = ?", args[:topic_id]) if args[:topic_id]
builder.where("post_number < 0")
builder.exec
end
puts "", "Done.", ""
end
def missing_uploads
puts "Looking for missing uploads on: #{RailsMultisite::ConnectionManagement.current_db}"
old_scheme_upload_count = 0
count_missing = 0
missing =
Post.find_missing_uploads(include_local_upload: true) do |post, src, path, sha1|
next if sha1.present?
puts "Fixing missing uploads: " if count_missing == 0
count_missing += 1
upload_id = nil
# recovering old scheme upload.
local_store = FileStore::LocalStore.new
public_path = "#{local_store.public_dir}#{path}"
file_path = nil
if File.file?(public_path)
file_path = public_path
else
tombstone_path = public_path.sub("/uploads/", "/uploads/tombstone/")
file_path = tombstone_path if File.file?(tombstone_path)
end
if file_path.present?
if (
upload =
UploadCreator.new(File.open(file_path), File.basename(path)).create_for(
Discourse.system_user.id,
)
).persisted?
upload_id = upload.id
post.reload
new_raw = post.raw.dup
new_raw = new_raw.gsub(path, upload.url)
PostRevisor.new(post, Topic.with_deleted.find_by(id: post.topic_id)).revise!(
Discourse.system_user,
{ raw: new_raw },
skip_validations: true,
force_new_version: true,
bypass_bump: true,
)
print "🆗"
else
print ""
end
else
print "🚫"
old_scheme_upload_count += 1
end
upload_id
end
puts "", "#{missing[:count]} post uploads are missing.", ""
if missing[:count] > 0
puts "#{missing[:uploads].count} uploads are missing."
if old_scheme_upload_count > 0
puts "#{old_scheme_upload_count} of #{missing[:uploads].count} are old scheme uploads."
end
puts "#{missing[:post_uploads].count} of #{Post.count} posts are affected.", ""
if ENV["GIVE_UP"] == "1"
missing[:post_uploads].each do |id, uploads|
post = Post.with_deleted.find_by(id: id)
if post
puts "#{post.full_url} giving up on #{uploads.length} upload(s)"
PostCustomField.create!(post_id: post.id, name: Post::MISSING_UPLOADS_IGNORED, value: "t")
else
puts "could not find post #{id}"
end
end
end
if ENV["VERBOSE"] == "1"
puts "missing uploads!"
missing[:uploads].each { |path| puts "#{path}" }
if missing[:post_uploads].count > 0
puts
puts "Posts with missing uploads"
missing[:post_uploads].each do |id, uploads|
post = Post.with_deleted.find_by(id: id)
if post
puts "#{post.full_url} missing #{uploads.join(", ")}"
else
puts "could not find post #{id}"
end
end
end
end
end
missing[:count] == 0
end
desc "Finds missing post upload records from cooked HTML content"
task "posts:missing_uploads" => :environment do |_, args|
if ENV["RAILS_DB"]
missing_uploads
else
RailsMultisite::ConnectionManagement.each_connection { missing_uploads }
end
end
def recover_uploads_from_index(path)
lookup = []
db = RailsMultisite::ConnectionManagement.current_db
cdn_path = SiteSetting.cdn_path("/uploads/#{db}").sub(/https?:/, "")
Post
.where("cooked LIKE ?", "%#{cdn_path}%")
.each do |post|
regex = Regexp.new("((https?:)?#{Regexp.escape(cdn_path)}[^,;\\]\\>\\t\\n\\s)\"\']+)")
uploads = []
post.raw.scan(regex).each { |match| uploads << match[0] }
if uploads.length > 0
lookup << [post.id, uploads]
else
print "."
post.rebake!
end
end
PostCustomField
.where(name: Post::MISSING_UPLOADS)
.pluck(:post_id, :value)
.each do |post_id, uploads|
uploads = JSON.parse(uploads)
raw = Post.where(id: post_id).pick(:raw)
uploads.map! do |upload|
orig = upload
if raw.scan(upload).length == 0
upload = upload.sub(SiteSetting.Upload.s3_cdn_url, SiteSetting.Upload.s3_base_url)
end
if raw.scan(upload).length == 0
upload = upload.sub(SiteSetting.Upload.s3_base_url, Discourse.base_url)
end
upload = upload.sub(Discourse.base_url + "/", "/") if raw.scan(upload).length == 0
if raw.scan(upload).length == 0
# last resort, try for sha
sha = upload.split("/")[-1]
sha = sha.split(".")[0]
if sha.length == 40 && raw.scan(sha).length == 1
raw.match(Regexp.new("([^\"'<\\s\\n]+#{sha}[^\"'>\\s\\n]+)"))
upload = $1
end
end
if raw.scan(upload).length == 0
puts "can not find #{orig} in\n\n#{raw}"
upload = nil
end
upload
end
uploads.compact!
lookup << [post_id, uploads] if uploads.length > 0
end
lookup.each do |post_id, uploads|
post = Post.find(post_id)
changed = false
uploads.each do |url|
if (n = post.raw.scan(url).length) != 1
puts "Skipping #{url} in #{post.full_url} cause it appears #{n} times"
next
end
name = File.basename(url).split("_")[0].split(".")[0]
puts "Searching for #{url} (#{name}) in index"
if name.length != 40
puts "Skipping #{url} in #{post.full_url} cause it appears to have a short file name"
next
end
found =
begin
`cat #{path} | grep #{name} | grep original`.split("\n")[0]
rescue StandardError
nil
end
if found.blank?
puts "Skipping #{url} in #{post.full_url} cause it missing from index"
next
end
found = File.expand_path(File.join(File.dirname(path), found))
if !File.exist?(found)
puts "Skipping #{url} in #{post.full_url} cause it missing from disk"
next
end
File.open(found) do |f|
begin
upload = UploadCreator.new(f, "upload").create_for(post.user_id)
if upload && upload.url
post.raw = post.raw.sub(url, upload.url)
changed = true
else
puts "Skipping #{url} in #{post.full_url} unable to create upload (unknown error)"
next
end
rescue Discourse::InvalidAccess
puts "Skipping #{url} in #{post.full_url} unable to create upload (bad format)"
next
end
end
end
if changed
puts "Recovered uploads on #{post.full_url}"
post.save!(validate: false)
post.rebake!
end
end
end
desc "Attempts to recover missing uploads from an index file"
task "posts:recover_uploads_from_index" => :environment do |_, args|
path = File.expand_path(Rails.root + "public/uploads/all_the_files")
if File.exist?(path)
puts "Found existing index file at #{path}"
else
puts "Can not find index #{path} generating index this could take a while..."
`cd #{File.dirname(path)} && find -type f > #{path}`
end
if RailsMultisite::ConnectionManagement.current_db != "default"
recover_uploads_from_index(path)
else
RailsMultisite::ConnectionManagement.each_connection { recover_uploads_from_index(path) }
end
end
desc "invalidate broken images"
task "posts:invalidate_broken_images" => :environment do
puts "Invalidating broken images.."
posts = Post.where("raw like '%<img%'")
rebaked = 0
total = posts.count
posts.find_each do |p|
rebake_post(p, invalidate_broken_images: true)
print_status(rebaked += 1, total)
end
puts
puts "", "#{rebaked} posts rebaked!"
end
desc "Coverts full upload URLs in `Post#raw` to short upload url"
task "posts:inline_uploads" => :environment do |_, args|
if ENV["RAILS_DB"]
correct_inline_uploads
else
RailsMultisite::ConnectionManagement.each_connection do |db|
puts "Correcting #{db}..."
puts
correct_inline_uploads
end
end
end
def correct_inline_uploads
dry_run = (ENV["DRY_RUN"].nil? ? true : ENV["DRY_RUN"] != "false")
verbose = ENV["VERBOSE"]
scope =
Post
.joins(:upload_references)
.distinct("posts.id")
.where(
"raw LIKE ?",
"%/uploads/#{RailsMultisite::ConnectionManagement.current_db}/original/%",
)
affected_posts_count = scope.count
fixed_count = 0
not_corrected_post_ids = []
failed_to_correct_post_ids = []
scope.find_each do |post|
if post.raw !~ Upload::URL_REGEX
affected_posts_count -= 1
next
end
begin
new_raw = InlineUploads.process(post.raw)
if post.raw != new_raw
if !dry_run
PostRevisor.new(post, Topic.with_deleted.find_by(id: post.topic_id)).revise!(
Discourse.system_user,
{ raw: new_raw },
skip_validations: true,
force_new_version: true,
bypass_bump: true,
)
end
if verbose
require "diffy"
Diffy::Diff.default_format = :color
puts "Cooked diff for Post #{post.id}"
puts Diffy::Diff.new(PrettyText.cook(post.raw), PrettyText.cook(new_raw), context: 1)
puts
elsif dry_run
putc "#"
else
putc "."
end
fixed_count += 1
else
putc "X"
not_corrected_post_ids << post.id
end
rescue StandardError
putc "!"
failed_to_correct_post_ids << post.id
end
end
puts
puts "#{fixed_count} out of #{affected_posts_count} affected posts corrected"
if not_corrected_post_ids.present?
puts "Ids of posts that were not corrected: #{not_corrected_post_ids}"
end
if failed_to_correct_post_ids.present?
puts "Ids of posts that encountered failures: #{failed_to_correct_post_ids}"
end
puts "Task was ran in dry run mode. Set `DRY_RUN=false` to revise affected posts" if dry_run
end