diff --git a/app/models/upload.rb b/app/models/upload.rb index 947481cb2fc..dccd90a1d86 100644 --- a/app/models/upload.rb +++ b/app/models/upload.rb @@ -168,6 +168,10 @@ class Upload < ActiveRecord::Base Digest::SHA1.file(path).hexdigest end + def self.extract_upload_url(url) + url.match(/(\/original\/\dX[\/\.\w]*\/([a-zA-Z0-9]+)[\.\w]*)/) + end + def self.get_from_url(url) return if url.blank? @@ -177,7 +181,7 @@ class Upload < ActiveRecord::Base end return if uri&.path.blank? - data = uri.path.match(/(\/original\/\dX[\/\.\w]*\/([a-zA-Z0-9]+)[\.\w]*)/) + data = extract_upload_url(uri.path) return if data.blank? sha1 = data[2] upload = nil diff --git a/lib/upload_recovery.rb b/lib/upload_recovery.rb index d2a584fa882..75b6e3e0421 100644 --- a/lib/upload_recovery.rb +++ b/lib/upload_recovery.rb @@ -8,18 +8,36 @@ class UploadRecovery begin analyzer = PostAnalyzer.new(post.raw, post.topic_id) - analyzer.cooked_stripped.css("img").each do |img| - if dom_class = img["class"] - if (Post.white_listed_image_classes & dom_class.split).count > 0 - next + analyzer.cooked_stripped.css("img", "a").each do |media| + if media.name == "img" + if dom_class = media["class"] + if (Post.white_listed_image_classes & dom_class.split).count > 0 + next + end end - end - if img["data-orig-src"] - if @dry_run - puts "#{post.full_url} #{img["data-orig-src"]}" - else - recover_post_upload(post, img["data-orig-src"]) + orig_src = media["data-orig-src"] + + if orig_src + if @dry_run + puts "#{post.full_url} #{orig_src}" + else + recover_post_upload(post, Upload.sha1_from_short_url(orig_src)) + end + end + elsif media.name == "a" + href = media["href"] + + if data = Upload.extract_upload_url(href) + sha1 = data[2] + + unless upload = Upload.get_from_url(href) + if @dry_run + puts "#{post.full_url} #{href}" + else + recover_post_upload(post, sha1) + end + end end end end @@ -32,8 +50,7 @@ class UploadRecovery private - def recover_post_upload(post, short_url) - sha1 = Upload.sha1_from_short_url(short_url) + def recover_post_upload(post, sha1) return unless sha1.present? attributes = { diff --git a/spec/fixtures/pdf/small.pdf b/spec/fixtures/pdf/small.pdf new file mode 100644 index 00000000000..9ec444cfa40 --- /dev/null +++ b/spec/fixtures/pdf/small.pdf @@ -0,0 +1,5 @@ +%PDF-1. +1 0 obj<>endobj +2 0 obj<>endobj +3 0 obj<>endobj +trailer <> \ No newline at end of file diff --git a/spec/lib/upload_recovery_spec.rb b/spec/lib/upload_recovery_spec.rb index 7a5929d8ffa..b28e04c192e 100644 --- a/spec/lib/upload_recovery_spec.rb +++ b/spec/lib/upload_recovery_spec.rb @@ -6,14 +6,24 @@ RSpec.describe UploadRecovery do let(:upload) do UploadCreator.new( - file_from_fixtures("logo.png"), + file_from_fixtures("smallest.png"), "logo.png" ).create_for(user.id) end + let(:upload2) do + UploadCreator.new( + file_from_fixtures("small.pdf", "pdf"), + "some.pdf" + ).create_for(user.id) + end + let(:post) do Fabricate(:post, - raw: "![logo.png](#{upload.short_url})", + raw: <<~SQL, + ![logo.png](#{upload.short_url}) + some.pdf + SQL user: user ).tap(&:link_post_uploads) end @@ -21,17 +31,20 @@ RSpec.describe UploadRecovery do let(:upload_recovery) { UploadRecovery.new } before do + SiteSetting.authorized_extensions = 'png|pdf' SiteSetting.queue_jobs = false end describe '#recover' do after do - public_path = "#{Discourse.store.public_dir}#{upload.url}" + [upload, upload2].each do |u| + public_path = "#{Discourse.store.public_dir}#{u.url}" - [ - public_path, - public_path.sub("uploads", "uploads/tombstone") - ].each { |path| File.delete(path) if File.exists?(path) } + [ + public_path, + public_path.sub("uploads", "uploads/tombstone") + ].each { |path| File.delete(path) if File.exists?(path) } + end end describe 'when given an invalid sha1' do @@ -54,17 +67,18 @@ RSpec.describe UploadRecovery do upload_recovery.recover(Post.where("updated_at >= ?", 1.day.ago)) end - it 'should recover the upload' do + it 'should recover uploads and attachments' do stub_request(:get, "http://test.localhost#{upload.url}") .to_return(status: 200) expect do upload.destroy! - end.to change { post.reload.uploads.count }.from(1).to(0) + upload2.destroy! + end.to change { post.reload.uploads.count }.from(2).to(0) expect do upload_recovery.recover - end.to change { post.reload.uploads.count }.from(0).to(1) + end.to change { post.reload.uploads.count }.from(0).to(2) end end end