mirror of
https://github.com/discourse/discourse.git
synced 2024-11-25 09:42:07 +08:00
List and restore missing post uploads from S3 inventory.
This commit is contained in:
parent
bfcbfd7864
commit
e8fafbc123
|
@ -890,12 +890,12 @@ class Post < ActiveRecord::Base
|
|||
|
||||
def link_post_uploads(fragments: nil)
|
||||
upload_ids = []
|
||||
fragments ||= Nokogiri::HTML::fragment(self.cooked)
|
||||
|
||||
fragments.css("a/@href", "img/@src").each do |media|
|
||||
if upload = Upload.get_from_url(media.value)
|
||||
upload_ids << upload.id
|
||||
end
|
||||
each_upload_url(fragments: fragments) do |src, _, sha1|
|
||||
upload = nil
|
||||
upload = Upload.find_by(sha1: sha1) if sha1.present?
|
||||
upload ||= Upload.get_from_url(src)
|
||||
upload_ids << upload.id if upload.present?
|
||||
end
|
||||
|
||||
upload_ids |= Upload.where(id: downloaded_images.values).pluck(:id)
|
||||
|
@ -916,6 +916,84 @@ class Post < ActiveRecord::Base
|
|||
{}
|
||||
end
|
||||
|
||||
def each_upload_url(fragments: nil, include_local_upload: true)
|
||||
upload_patterns = [
|
||||
/\/uploads\/#{RailsMultisite::ConnectionManagement.current_db}\//,
|
||||
/\/original\//,
|
||||
/\/optimized\//
|
||||
]
|
||||
fragments ||= Nokogiri::HTML::fragment(self.cooked)
|
||||
links = fragments.css("a/@href", "img/@src").map { |media| media.value }.uniq
|
||||
|
||||
links.each do |src|
|
||||
next if src.blank? || upload_patterns.none? { |pattern| src =~ pattern }
|
||||
|
||||
src = "#{SiteSetting.force_https ? "https" : "http"}:#{src}" if src.start_with?("//")
|
||||
next unless Discourse.store.has_been_uploaded?(src) || (include_local_upload && src =~ /\A\/[^\/]/i)
|
||||
|
||||
path = begin
|
||||
URI(URI.unescape(src))&.path
|
||||
rescue URI::Error
|
||||
end
|
||||
|
||||
next if path.blank?
|
||||
|
||||
sha1 =
|
||||
if path.include? "optimized"
|
||||
OptimizedImage.extract_sha1(path)
|
||||
else
|
||||
Upload.extract_sha1(path)
|
||||
end
|
||||
|
||||
yield(src, path, sha1)
|
||||
end
|
||||
end
|
||||
|
||||
def self.find_missing_uploads(include_local_upload: true)
|
||||
PostCustomField.where(name: Post::MISSING_UPLOADS).delete_all
|
||||
missing_uploads = []
|
||||
missing_post_uploads = {}
|
||||
|
||||
Post.have_uploads.select(:id, :cooked).find_in_batches do |posts|
|
||||
ids = posts.pluck(:id)
|
||||
sha1s = Upload.joins(:post_uploads).where("post_uploads.post_id >= ? AND post_uploads.post_id <= ?", ids.min, ids.max).pluck(:sha1)
|
||||
|
||||
posts.each do |post|
|
||||
post.each_upload_url do |src, path, sha1|
|
||||
next if sha1.present? && sha1s.include?(sha1)
|
||||
|
||||
missing_post_uploads[post.id] ||= []
|
||||
|
||||
if missing_uploads.include?(src)
|
||||
missing_post_uploads[post.id] << src
|
||||
next
|
||||
end
|
||||
|
||||
upload_id = nil
|
||||
upload_id = Upload.where(sha1: sha1).pluck(:id).first if sha1.present?
|
||||
upload_id ||= yield(post, src, path, sha1)
|
||||
|
||||
if upload_id.present?
|
||||
attributes = { post_id: post.id, upload_id: upload_id }
|
||||
PostUpload.create!(attributes) unless PostUpload.exists?(attributes)
|
||||
else
|
||||
missing_uploads << src
|
||||
missing_post_uploads[post.id] << src
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
count = 0
|
||||
missing_post_uploads = missing_post_uploads.reject { |_, uploads| uploads.empty? }
|
||||
missing_post_uploads.reject do |post_id, uploads|
|
||||
PostCustomField.create!(post_id: post_id, name: Post::MISSING_UPLOADS, value: uploads.to_json)
|
||||
count += uploads.count
|
||||
end
|
||||
|
||||
return { uploads: missing_uploads, post_uploads: missing_post_uploads, count: count }
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def parse_quote_into_arguments(quote)
|
||||
|
|
|
@ -36,8 +36,6 @@ class S3Inventory
|
|||
|
||||
ActiveRecord::Base.transaction do
|
||||
begin
|
||||
table_name = "#{type}_inventory"
|
||||
connection = ActiveRecord::Base.connection.raw_connection
|
||||
connection.exec("CREATE TEMP TABLE #{table_name}(key text UNIQUE, etag text, PRIMARY KEY(etag, key))")
|
||||
connection.copy_data("COPY #{table_name} FROM STDIN CSV") do
|
||||
files.each do |file|
|
||||
|
@ -54,6 +52,8 @@ class S3Inventory
|
|||
WHERE #{model.table_name}.etag IS NULL
|
||||
AND url ILIKE '%' || #{table_name}.key")
|
||||
|
||||
list_missing_post_uploads if type == "original"
|
||||
|
||||
uploads = (model == Upload) ? model.by_users.where("created_at < ?", inventory_date) : model
|
||||
missing_uploads = uploads.joins("LEFT JOIN #{table_name} ON #{table_name}.etag = #{model.table_name}.etag").where("#{table_name}.etag is NULL")
|
||||
|
||||
|
@ -73,6 +73,35 @@ class S3Inventory
|
|||
end
|
||||
end
|
||||
|
||||
def list_missing_post_uploads
|
||||
log "Listing missing post uploads..."
|
||||
|
||||
missing = Post.find_missing_uploads(include_local_upload: false) do |_, _, _, sha1|
|
||||
next if sha1.blank?
|
||||
|
||||
upload_id = nil
|
||||
result = connection.exec("SELECT * FROM #{table_name} WHERE key LIKE '%original/%/#{sha1}%'")
|
||||
|
||||
if result.count >= 0
|
||||
key = result[0]["key"]
|
||||
data = s3_helper.object(key).data
|
||||
upload_id = Upload.create!(
|
||||
user_id: Discourse.system_user.id,
|
||||
original_filename: "",
|
||||
filesize: data.content_length,
|
||||
url: File.join(Discourse.store.absolute_base_url, key),
|
||||
sha1: sha1,
|
||||
etag: result[0]["etag"]
|
||||
).id
|
||||
end
|
||||
|
||||
upload_id
|
||||
end
|
||||
|
||||
Discourse.stats.set("missing_post_uploads", missing[:count])
|
||||
log "#{missing[:count]} post uploads are missing."
|
||||
end
|
||||
|
||||
def download_inventory_files_to_tmp_directory
|
||||
files.each do |file|
|
||||
log "Downloading inventory file '#{file[:key]}' to tmp directory..."
|
||||
|
@ -128,6 +157,14 @@ class S3Inventory
|
|||
|
||||
private
|
||||
|
||||
def connection
|
||||
@connection ||= ActiveRecord::Base.connection.raw_connection
|
||||
end
|
||||
|
||||
def table_name
|
||||
"#{type}_inventory"
|
||||
end
|
||||
|
||||
def files
|
||||
@files ||= begin
|
||||
symlink_file = unsorted_files.sort_by { |file| -file.last_modified.to_i }.first
|
||||
|
|
|
@ -390,56 +390,15 @@ task 'posts:reorder_posts', [:topic_id] => [:environment] do |_, args|
|
|||
puts "", "Done.", ""
|
||||
end
|
||||
|
||||
def get_missing_uploads
|
||||
PostCustomField.where(name: Post::MISSING_UPLOADS)
|
||||
end
|
||||
|
||||
desc 'Finds missing post upload records from cooked HTML content'
|
||||
task 'posts:missing_uploads' => :environment do
|
||||
get_missing_uploads.delete_all
|
||||
|
||||
upload_patterns = [
|
||||
/\/uploads\/#{RailsMultisite::ConnectionManagement.current_db}\//,
|
||||
/\/original\//,
|
||||
/\/optimized\//
|
||||
]
|
||||
missing_uploads = []
|
||||
old_scheme_upload_count = 0
|
||||
count = 0
|
||||
|
||||
Post.have_uploads.select(:id, :cooked).find_in_batches do |posts|
|
||||
ids = posts.pluck(:id)
|
||||
sha1s = Upload.joins(:post_uploads).where("post_uploads.post_id >= ? AND post_uploads.post_id <= ?", ids.min, ids.max).pluck(:sha1)
|
||||
missing = Post.find_missing_uploads do |post, src, path, sha1|
|
||||
next if sha1.present?
|
||||
|
||||
posts.each do |post|
|
||||
missing_post_uploads = []
|
||||
links = Nokogiri::HTML::fragment(post.cooked).css("a/@href", "img/@src").map { |media| media.value }.uniq
|
||||
|
||||
links.each do |src|
|
||||
next if src.blank? || upload_patterns.none? { |pattern| src =~ pattern }
|
||||
|
||||
src = "#{SiteSetting.force_https ? "https" : "http"}:#{src}" if src.start_with?("//")
|
||||
next unless Discourse.store.has_been_uploaded?(src) || src =~ /\A\/[^\/]/i
|
||||
|
||||
path = begin
|
||||
URI(URI.unescape(src))&.path
|
||||
rescue URI::Error
|
||||
end
|
||||
|
||||
next if path.blank?
|
||||
|
||||
sha1 =
|
||||
if path.include? "optimized"
|
||||
OptimizedImage.extract_sha1(path)
|
||||
else
|
||||
Upload.extract_sha1(path)
|
||||
end
|
||||
|
||||
if sha1.blank? || sha1s.exclude?(sha1)
|
||||
upload_id = nil
|
||||
|
||||
if missing_uploads.exclude?(src)
|
||||
if sha1.blank?
|
||||
# recovering old scheme upload.
|
||||
local_store = FileStore::LocalStore.new
|
||||
public_path = "#{local_store.public_dir}#{path}"
|
||||
|
@ -476,37 +435,15 @@ task 'posts:missing_uploads' => :environment do
|
|||
else
|
||||
old_scheme_upload_count += 1
|
||||
end
|
||||
else
|
||||
upload_id = Upload.where(sha1: sha1).pluck(:id).first
|
||||
|
||||
upload_id
|
||||
end
|
||||
|
||||
if upload_id.present?
|
||||
attributes = { post_id: post.id, upload_id: upload_id }
|
||||
PostUpload.create!(attributes) unless PostUpload.exists?(attributes)
|
||||
else
|
||||
missing_uploads << src
|
||||
end
|
||||
end
|
||||
puts "", "#{missing[:count]} post uploads are missing.", ""
|
||||
|
||||
missing_post_uploads << src if upload_id.blank?
|
||||
end
|
||||
end
|
||||
|
||||
if missing_post_uploads.present?
|
||||
PostCustomField.create!(post_id: post.id, name: Post::MISSING_UPLOADS, value: missing_post_uploads.to_json)
|
||||
count += missing_post_uploads.count
|
||||
putc "x"
|
||||
else
|
||||
putc "."
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
puts "", "#{count} post uploads are missing.", ""
|
||||
|
||||
if count > 0
|
||||
puts "#{missing_uploads.count} uploads are missing."
|
||||
puts "#{old_scheme_upload_count} of #{missing_uploads.count} are old scheme uploads." if old_scheme_upload_count > 0
|
||||
puts "#{get_missing_uploads.count} of #{Post.count} posts are affected.", ""
|
||||
if missing[:count] > 0
|
||||
puts "#{missing[:uploads].count} uploads are missing."
|
||||
puts "#{old_scheme_upload_count} of #{missing[:uploads].count} are old scheme uploads." if old_scheme_upload_count > 0
|
||||
puts "#{missing[:post_uploads].count} of #{Post.count} posts are affected.", ""
|
||||
end
|
||||
end
|
||||
|
|
|
@ -75,7 +75,7 @@ describe "S3Inventory" do
|
|||
inventory.backfill_etags_and_list_missing
|
||||
end
|
||||
|
||||
expect(output).to eq("#{upload.url}\n1 of 4 uploads are missing\n")
|
||||
expect(output).to eq("Listing missing post uploads...\n0 post uploads are missing.\n#{upload.url}\n1 of 4 uploads are missing\n")
|
||||
expect(Discourse.stats.get("missing_s3_uploads")).to eq(1)
|
||||
end
|
||||
|
||||
|
|
|
@ -1261,19 +1261,23 @@ describe Post do
|
|||
)
|
||||
end
|
||||
|
||||
let(:base_url) { "#{Discourse.base_url_no_prefix}#{Discourse.base_uri}" }
|
||||
let(:video_url) { "#{base_url}#{video_upload.url}" }
|
||||
let(:audio_url) { "#{base_url}#{audio_upload.url}" }
|
||||
|
||||
let(:raw) do
|
||||
<<~RAW
|
||||
<a href="#{attachment_upload.url}">Link</a>
|
||||
<img src="#{image_upload.url}">
|
||||
|
||||
<video width="100%" height="100%" controls>
|
||||
<source src="http://myforum.com#{video_upload.url}">
|
||||
<a href="http://myforum.com#{video_upload.url}">http://myforum.com#{video_upload.url}</a>
|
||||
<source src="#{video_url}">
|
||||
<a href="#{video_url}">#{video_url}</a>
|
||||
</video>
|
||||
|
||||
<audio controls>
|
||||
<source src="http://myforum.com#{audio_upload.url}">
|
||||
<a href="http://myforum.com#{audio_upload.url}">http://myforum.com#{audio_upload.url}</a>
|
||||
<source src="#{audio_url}">
|
||||
<a href="#{audio_url}">#{audio_url}</a>
|
||||
</audio>
|
||||
RAW
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue
Block a user