discourse/app/jobs/scheduled/clean_up_uploads.rb
Bianca Nenciu 9db8f00b3d
FEATURE: Create upload_references table (#16146)
This table holds associations between uploads and other models. This can be used to prevent removing uploads that are still in use.

* DEV: Create upload_references
* DEV: Use UploadReference instead of PostUpload
* DEV: Use UploadReference for SiteSetting
* DEV: Use UploadReference for Badge
* DEV: Use UploadReference for Category
* DEV: Use UploadReference for CustomEmoji
* DEV: Use UploadReference for Group
* DEV: Use UploadReference for ThemeField
* DEV: Use UploadReference for ThemeSetting
* DEV: Use UploadReference for User
* DEV: Use UploadReference for UserAvatar
* DEV: Use UploadReference for UserExport
* DEV: Use UploadReference for UserProfile
* DEV: Add method to extract uploads from raw text
* DEV: Use UploadReference for Draft
* DEV: Use UploadReference for ReviewableQueuedPost
* DEV: Use UploadReference for UserProfile's bio_raw
* DEV: Do not copy user uploads to upload references
* DEV: Copy post uploads again after deploy
* DEV: Use created_at and updated_at from uploads table
* FIX: Check if upload site setting is empty
* DEV: Copy user uploads to upload references
* DEV: Make upload extraction less strict
2022-06-09 09:24:30 +10:00

80 lines
2.6 KiB
Ruby

# frozen_string_literal: true
module Jobs
class CleanUpUploads < ::Jobs::Scheduled
every 1.hour
def execute(args)
grace_period = [SiteSetting.clean_orphan_uploads_grace_period_hours, 1].max
# always remove invalid upload records
Upload
.by_users
.where("retain_hours IS NULL OR created_at < current_timestamp - interval '1 hour' * retain_hours")
.where("created_at < ?", grace_period.hour.ago)
.where(url: "")
.find_each(&:destroy!)
return unless SiteSetting.clean_up_uploads?
if c = last_cleanup
return if (Time.zone.now.to_i - c) < (grace_period / 2).hours
end
base_url = Discourse.store.internal? ? Discourse.store.relative_base_url : Discourse.store.absolute_base_url
s3_hostname = URI.parse(base_url).hostname
s3_cdn_hostname = URI.parse(SiteSetting.Upload.s3_cdn_url || "").hostname
result = Upload.by_users
Upload.unused_callbacks&.each { |handler| result = handler.call(result) }
result = result
.where("uploads.retain_hours IS NULL OR uploads.created_at < current_timestamp - interval '1 hour' * uploads.retain_hours")
.where("uploads.created_at < ?", grace_period.hour.ago)
.where("uploads.access_control_post_id IS NULL")
.joins("LEFT JOIN upload_references ON upload_references.upload_id = uploads.id")
.where("upload_references.upload_id IS NULL")
.with_no_non_post_relations
result.find_each do |upload|
next if Upload.in_use_callbacks&.any? { |callback| callback.call(upload) }
if upload.sha1.present?
# TODO: Remove this check after UploadReferences records were created
encoded_sha = Base62.encode(upload.sha1.hex)
next if ReviewableQueuedPost.pending.where("payload->>'raw' LIKE '%#{upload.sha1}%' OR payload->>'raw' LIKE '%#{encoded_sha}%'").exists?
next if Draft.where("data LIKE '%#{upload.sha1}%' OR data LIKE '%#{encoded_sha}%'").exists?
next if UserProfile.where("bio_raw LIKE '%#{upload.sha1}%' OR bio_raw LIKE '%#{encoded_sha}%'").exists?
upload.destroy
else
upload.delete
end
end
ExternalUploadStub.cleanup!
self.last_cleanup = Time.zone.now.to_i
end
def last_cleanup=(v)
Discourse.redis.setex(last_cleanup_key, 7.days.to_i, v.to_s)
end
def last_cleanup
v = Discourse.redis.get(last_cleanup_key)
v ? v.to_i : v
end
def reset_last_cleanup!
Discourse.redis.del(last_cleanup_key)
end
protected
def last_cleanup_key
"LAST_UPLOAD_CLEANUP"
end
end
end