PERF: Split queries when cleaning uploads.

This reduces the number of scans that the db has to do in the query
to fetch orphan uploads. Futheremore, we were not batching our
records which bloats memory.
This commit is contained in:
Guo Xiang Tan 2016-07-01 15:22:30 +08:00
parent 4657d22eb0
commit bd07658a37
No known key found for this signature in database
GPG Key ID: 19C321C8952B0F72
7 changed files with 111 additions and 31 deletions

View File

@ -1,25 +1,35 @@
module Jobs
class CleanUpUploads < Jobs::Scheduled
every 1.hour
def execute(args)
return unless SiteSetting.clean_up_uploads?
ignore_urls = []
ignore_urls |= UserProfile.uniq.select(:profile_background).where("profile_background IS NOT NULL AND profile_background != ''").pluck(:profile_background)
ignore_urls |= UserProfile.uniq.select(:card_background).where("card_background IS NOT NULL AND card_background != ''").pluck(:card_background)
ignore_urls |= Category.uniq.select(:logo_url).where("logo_url IS NOT NULL AND logo_url != ''").pluck(:logo_url)
ignore_urls |= Category.uniq.select(:background_url).where("background_url IS NOT NULL AND background_url != ''").pluck(:background_url)
ids = []
ids |= PostUpload.uniq.select(:upload_id).pluck(:upload_id)
ids |= User.uniq.select(:uploaded_avatar_id).where("uploaded_avatar_id IS NOT NULL").pluck(:uploaded_avatar_id)
ids |= UserAvatar.uniq.select(:gravatar_upload_id).where("gravatar_upload_id IS NOT NULL").pluck(:gravatar_upload_id)
grace_period = [SiteSetting.clean_orphan_uploads_grace_period_hours, 1].max
Upload.where("created_at < ?", grace_period.hour.ago)
.where("retain_hours IS NULL OR created_at < current_timestamp - interval '1 hour' * retain_hours")
.where("id NOT IN (SELECT upload_id FROM post_uploads WHERE upload_id IS NOT NULL)")
.where("id NOT IN (SELECT uploaded_avatar_id FROM users WHERE uploaded_avatar_id IS NOT NULL)")
.where("id NOT IN (SELECT gravatar_upload_id FROM user_avatars WHERE gravatar_upload_id IS NOT NULL)")
.where("url NOT IN (SELECT profile_background FROM user_profiles WHERE LENGTH(COALESCE(profile_background, '')) > 0)")
.where("url NOT IN (SELECT card_background FROM user_profiles WHERE LENGTH(COALESCE(card_background, '')) > 0)")
.where("url NOT IN (SELECT logo_url FROM categories WHERE LENGTH(COALESCE(logo_url, '')) > 0)")
.where("url NOT IN (SELECT background_url FROM categories WHERE LENGTH(COALESCE(background_url, '')) > 0)")
.destroy_all
result = Upload.where("created_at < ?", grace_period.hour.ago)
.where("retain_hours IS NULL OR created_at < current_timestamp - interval '1 hour' * retain_hours")
if !ids.empty?
result = result.where("id NOT IN (?)", ids)
end
if !ignore_urls.empty?
result = result.where("url NOT IN (?)", ignore_urls)
end
result.find_each { |upload| upload.destroy }
end
end
end

View File

@ -120,9 +120,9 @@ describe CookedPostProcessor do
it "generates overlay information" do
cpp.post_process_images
expect(cpp.html).to match_html '<p><div class="lightbox-wrapper"><a data-download-href="/uploads/default/e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98" href="/uploads/default/1/1234567890123456.jpg" class="lightbox" title="logo.png"><img src="/uploads/default/optimized/1X/e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98_1_690x788.png" width="690" height="788"><div class="meta">
<span class="filename">logo.png</span><span class="informations">1750x2000 1.21 KB</span><span class="expand"></span>
</div></a></div></p>'
expect(cpp.html).to match_html "<p><div class=\"lightbox-wrapper\"><a data-download-href=\"/uploads/default/#{upload.sha1}\" href=\"/uploads/default/1/1234567890123456.jpg\" class=\"lightbox\" title=\"logo.png\"><img src=\"/uploads/default/optimized/1X/#{upload.sha1}_1_690x788.png\" width=\"690\" height=\"788\"><div class=\"meta\">
<span class=\"filename\">logo.png</span><span class=\"informations\">1750x2000 1.21 KB</span><span class=\"expand\"></span>
</div></a></div></p>"
expect(cpp).to be_dirty
end
@ -153,9 +153,9 @@ describe CookedPostProcessor do
it "generates overlay information" do
cpp.post_process_images
expect(cpp.html).to match_html '<p><div class="lightbox-wrapper"><a data-download-href="/subfolder/uploads/default/e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98" href="/subfolder/uploads/default/1/1234567890123456.jpg" class="lightbox" title="logo.png"><img src="/subfolder/uploads/default/optimized/1X/e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98_1_690x788.png" width="690" height="788"><div class="meta">
<span class="filename">logo.png</span><span class="informations">1750x2000 1.21 KB</span><span class="expand"></span>
</div></a></div></p>'
expect(cpp.html).to match_html "<p><div class=\"lightbox-wrapper\"><a data-download-href=\"/subfolder/uploads/default/#{upload.sha1}\" href=\"/subfolder/uploads/default/1/1234567890123456.jpg\" class=\"lightbox\" title=\"logo.png\"><img src=\"/subfolder/uploads/default/optimized/1X/#{upload.sha1}_1_690x788.png\" width=\"690\" height=\"788\"><div class=\"meta\">
<span class=\"filename\">logo.png</span><span class=\"informations\">1750x2000 1.21 KB</span><span class=\"expand\"></span>
</div></a></div></p>"
expect(cpp).to be_dirty
end
@ -181,9 +181,9 @@ describe CookedPostProcessor do
it "generates overlay information" do
cpp.post_process_images
expect(cpp.html).to match_html '<p><div class="lightbox-wrapper"><a data-download-href="/uploads/default/e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98" href="/uploads/default/1/1234567890123456.jpg" class="lightbox" title="WAT"><img src="/uploads/default/optimized/1X/e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98_1_690x788.png" title="WAT" width="690" height="788"><div class="meta">
<span class="filename">WAT</span><span class="informations">1750x2000 1.21 KB</span><span class="expand"></span>
</div></a></div></p>'
expect(cpp.html).to match_html "<p><div class=\"lightbox-wrapper\"><a data-download-href=\"/uploads/default/#{upload.sha1}\" href=\"/uploads/default/1/1234567890123456.jpg\" class=\"lightbox\" title=\"WAT\"><img src=\"/uploads/default/optimized/1X/#{upload.sha1}_1_690x788.png\" title=\"WAT\" width=\"690\" height=\"788\"><div class=\"meta\">
<span class=\"filename\">WAT</span><span class=\"informations\">1750x2000 1.21 KB</span><span class=\"expand\"></span>
</div></a></div></p>"
expect(cpp).to be_dirty
end

View File

@ -14,7 +14,7 @@ describe FileStore::LocalStore do
it "returns a relative url" do
store.expects(:copy_file)
expect(store.store_upload(uploaded_file, upload)).to match(/\/uploads\/default\/original\/.+e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98\.png/)
expect(store.store_upload(uploaded_file, upload)).to match(/\/uploads\/default\/original\/.+#{upload.sha1}\.png/)
end
end
@ -23,7 +23,7 @@ describe FileStore::LocalStore do
it "returns a relative url" do
store.expects(:copy_file)
expect(store.store_optimized_image({}, optimized_image)).to match(/\/uploads\/default\/optimized\/.+e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98_#{OptimizedImage::VERSION}_100x200\.png/)
expect(store.store_optimized_image({}, optimized_image)).to match(/\/uploads\/default\/optimized\/.+#{optimized_image.upload.sha1}_#{OptimizedImage::VERSION}_100x200\.png/)
end
end

View File

@ -23,7 +23,7 @@ describe FileStore::S3Store do
it "returns an absolute schemaless url" do
s3_helper.expects(:upload)
expect(store.store_upload(uploaded_file, upload)).to match(/\/\/s3_upload_bucket\.s3\.amazonaws\.com\/original\/.+e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98\.png/)
expect(store.store_upload(uploaded_file, upload)).to match(/\/\/s3_upload_bucket\.s3\.amazonaws\.com\/original\/.+#{upload.sha1}\.png/)
end
end
@ -32,7 +32,7 @@ describe FileStore::S3Store do
it "returns an absolute schemaless url" do
s3_helper.expects(:upload)
expect(store.store_optimized_image(optimized_image_file, optimized_image)).to match(/\/\/s3_upload_bucket\.s3\.amazonaws\.com\/optimized\/.+e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98_#{OptimizedImage::VERSION}_100x200\.png/)
expect(store.store_optimized_image(optimized_image_file, optimized_image)).to match(/\/\/s3_upload_bucket\.s3\.amazonaws\.com\/optimized\/.+#{optimized_image.upload.sha1}_#{OptimizedImage::VERSION}_100x200\.png/)
end
end

View File

@ -1,11 +1,11 @@
Fabricator(:upload) do
user
sha1 "e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98"
sha1 { sequence(:sha1) { |n| Digest::SHA1.hexdigest(n.to_s) } }
original_filename "logo.png"
filesize 1234
width 100
height 200
url "/uploads/default/1/1234567890123456.png"
url { sequence(:url) { |n| "/uploads/default/#{n}/1234567890123456.png" } }
end
Fabricator(:attachment, from: :upload) do

View File

@ -0,0 +1,3 @@
Fabricator(:user_avatar) do
user
end

View File

@ -3,16 +3,14 @@ require 'rails_helper'
require_dependency 'jobs/scheduled/clean_up_uploads'
describe Jobs::CleanUpUploads do
before do
Upload.destroy_all
SiteSetting.clean_up_uploads = true
SiteSetting.clean_orphan_uploads_grace_period_hours = 1
@upload = Fabricate(:upload, created_at: 2.hours.ago)
end
it "deletes orphan uploads" do
Fabricate(:upload, created_at: 2.hours.ago)
expect(Upload.count).to be(1)
Jobs::CleanUpUploads.new.execute(nil)
@ -20,4 +18,73 @@ describe Jobs::CleanUpUploads do
expect(Upload.count).to be(0)
end
it "does not delete profile background uploads" do
profile_background_upload = Fabricate(:upload, created_at: 2.hours.ago)
UserProfile.last.update_attributes!(profile_background: profile_background_upload.url)
Jobs::CleanUpUploads.new.execute(nil)
expect(Upload.find_by(id: @upload.id)).to eq(nil)
expect(Upload.find_by(id: profile_background_upload.id)).to eq(profile_background_upload)
end
it "does not delete card background uploads" do
card_background_upload = Fabricate(:upload, created_at: 2.hours.ago)
UserProfile.last.update_attributes!(card_background: card_background_upload.url)
Jobs::CleanUpUploads.new.execute(nil)
expect(Upload.find_by(id: @upload.id)).to eq(nil)
expect(Upload.find_by(id: card_background_upload.id)).to eq(card_background_upload)
end
it "does not delete category logo uploads" do
category_logo_upload = Fabricate(:upload, created_at: 2.hours.ago)
category = Fabricate(:category, logo_url: category_logo_upload.url)
Jobs::CleanUpUploads.new.execute(nil)
expect(Upload.find_by(id: @upload.id)).to eq(nil)
expect(Upload.find_by(id: category_logo_upload.id)).to eq(category_logo_upload)
end
it "does not delete category background url uploads" do
category_background_url = Fabricate(:upload, created_at: 2.hours.ago)
category = Fabricate(:category, background_url: category_background_url.url)
Jobs::CleanUpUploads.new.execute(nil)
expect(Upload.find_by(id: @upload.id)).to eq(nil)
expect(Upload.find_by(id: category_background_url.id)).to eq(category_background_url)
end
it "does not delete post uploads" do
upload = Fabricate(:upload, created_at: 2.hours.ago)
post = Fabricate(:post, uploads: [upload])
Jobs::CleanUpUploads.new.execute(nil)
expect(Upload.find_by(id: @upload.id)).to eq(nil)
expect(Upload.find_by(id: upload.id)).to eq(upload)
end
it "does not delete user uploaded avatar" do
upload = Fabricate(:upload, created_at: 2.hours.ago)
user = Fabricate(:user, uploaded_avatar: upload)
Jobs::CleanUpUploads.new.execute(nil)
expect(Upload.find_by(id: @upload.id)).to eq(nil)
expect(Upload.find_by(id: upload.id)).to eq(upload)
end
it "does not delete user gravatar" do
upload = Fabricate(:upload, created_at: 2.hours.ago)
user = Fabricate(:user, user_avatar: Fabricate(:user_avatar, gravatar_upload: upload))
Jobs::CleanUpUploads.new.execute(nil)
expect(Upload.find_by(id: @upload.id)).to eq(nil)
expect(Upload.find_by(id: upload.id)).to eq(upload)
end
end