DEV: Change upload verified column to be integer (#10643)

Per review https://review.discourse.org/t/dev-add-verified-to-uploads-and-fill-in-s3-inventory-10406/14180

Change the verified column for Upload to a verified_status integer column, to avoid having NULL as a weird implicit status.
This commit is contained in:
Martin Brennan 2020-09-17 13:35:29 +10:00 committed by GitHub
parent e313aa5a6e
commit 80268357e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 87 additions and 21 deletions

View File

@ -3,6 +3,10 @@
require "digest/sha1" require "digest/sha1"
class Upload < ActiveRecord::Base class Upload < ActiveRecord::Base
self.ignored_columns = [
"verified" # TODO(2020-12-10): remove
]
include ActionView::Helpers::NumberHelper include ActionView::Helpers::NumberHelper
include HasUrl include HasUrl
@ -51,6 +55,14 @@ class Upload < ActiveRecord::Base
scope :by_users, -> { where("uploads.id > ?", SEEDED_ID_THRESHOLD) } scope :by_users, -> { where("uploads.id > ?", SEEDED_ID_THRESHOLD) }
def self.verification_statuses
@verification_statuses ||= Enum.new(
unchecked: 1,
verified: 2,
invalid_etag: 3
)
end
def to_s def to_s
self.url self.url
end end
@ -449,6 +461,7 @@ end
# access_control_post_id :bigint # access_control_post_id :bigint
# original_sha1 :string # original_sha1 :string
# verified :boolean # verified :boolean
# verification_status :integer default(1), not null
# #
# Indexes # Indexes
# #

View File

@ -0,0 +1,21 @@
# frozen_string_literal: true
class ChangeUploadsVerifiedToInteger < ActiveRecord::Migration[6.0]
def up
add_column :uploads, :verification_status, :integer, null: false, default: 1
Migration::ColumnDropper.mark_readonly(:uploads, :verified)
DB.exec(
<<~SQL
UPDATE uploads SET verification_status = CASE WHEN
verified THEN 2
WHEN NOT verified THEN 3
END
SQL
)
end
def down
remove_column :uploads, :verification_status
end
end

View File

@ -0,0 +1,16 @@
# frozen_string_literal: true
class AddIndexToUploadsVerificationStatus < ActiveRecord::Migration[6.0]
disable_ddl_transaction!
def up
execute <<~SQL
CREATE INDEX CONCURRENTLY IF NOT EXISTS
idx_uploads_on_verification_status ON uploads USING btree (verification_status)
SQL
end
def down
execute "DROP INDEX IF EXISTS idx_uploads_on_verification_status"
end
end

View File

@ -71,22 +71,34 @@ class S3Inventory
.joins("LEFT JOIN #{table_name} inventory2 ON inventory2.url = #{model.table_name}.url") .joins("LEFT JOIN #{table_name} inventory2 ON inventory2.url = #{model.table_name}.url")
.where("inventory2.etag IS NOT NULL").pluck(:id) .where("inventory2.etag IS NOT NULL").pluck(:id)
if model == Upload
# marking as verified/not verified # marking as verified/not verified
DB.exec(<<~SQL, inventory_date if model == Upload
sql_params = {
inventory_date: inventory_date,
unchecked: Upload.verification_statuses[:unchecked],
invalid_etag: Upload.verification_statuses[:invalid_etag],
verified: Upload.verification_statuses[:verified]
}
DB.exec(<<~SQL, sql_params)
UPDATE #{model.table_name} UPDATE #{model.table_name}
SET verified = CASE when table_name_alias.etag IS NULL THEN false ELSE true END SET verification_status = CASE WHEN table_name_alias.etag IS NULL
THEN :invalid_etag
ELSE :verified
END
FROM #{model.table_name} AS model_table FROM #{model.table_name} AS model_table
LEFT JOIN #{table_name} AS table_name_alias ON model_table.etag = table_name_alias.etag LEFT JOIN #{table_name} AS table_name_alias ON
model_table.etag = table_name_alias.etag
WHERE model_table.id = #{model.table_name}.id WHERE model_table.id = #{model.table_name}.id
AND model_table.updated_at < ? AND model_table.updated_at < :inventory_date
AND ( AND (
model_table.verified IS NULL OR model_table.verification_status = :unchecked OR
model_table.verified <> CASE when table_name_alias.etag IS NULL THEN false ELSE true END model_table.verification_status <> CASE WHEN table_name_alias.etag IS NULL
THEN :invalid_etag
ELSE :verified
END
) )
AND model_table.id > #{model::SEEDED_ID_THRESHOLD} AND model_table.id > #{model::SEEDED_ID_THRESHOLD}
SQL SQL
)
end end
if (missing_count = missing_uploads.count) > 0 if (missing_count = missing_uploads.count) > 0

View File

@ -1001,7 +1001,7 @@ def analyze_missing_s3
SELECT post_id, url, sha1, extension, uploads.id SELECT post_id, url, sha1, extension, uploads.id
FROM post_uploads pu FROM post_uploads pu
RIGHT JOIN uploads on uploads.id = pu.upload_id RIGHT JOIN uploads on uploads.id = pu.upload_id
WHERE NOT verified WHERE verification_status = :invalid_etag
ORDER BY created_at ORDER BY created_at
SQL SQL
@ -1009,7 +1009,7 @@ def analyze_missing_s3
other = [] other = []
all = [] all = []
DB.query(sql).each do |r| DB.query(sql, invalid_etag: Upload.verification_statuses[:invalid_etag]).each do |r|
all << r all << r
if r.post_id if r.post_id
lookup[r.post_id] ||= [] lookup[r.post_id] ||= []
@ -1029,7 +1029,7 @@ def analyze_missing_s3
puts puts
end end
missing_uploads = Upload.where(verified: false) missing_uploads = Upload.where(verification_status: Upload.verification_statuses[:invalid_etag])
puts "Total missing uploads: #{missing_uploads.count}, newest is #{missing_uploads.maximum(:created_at)}" puts "Total missing uploads: #{missing_uploads.count}, newest is #{missing_uploads.maximum(:created_at)}"
puts "Total problem posts: #{lookup.keys.count} with #{lookup.values.sum { |a| a.length } } missing uploads" puts "Total problem posts: #{lookup.keys.count} with #{lookup.values.sum { |a| a.length } } missing uploads"
puts "Other missing uploads count: #{other.count}" puts "Other missing uploads count: #{other.count}"
@ -1067,7 +1067,9 @@ def analyze_missing_s3
end end
def delete_missing_s3 def delete_missing_s3
missing = Upload.where(verified: false).order(:created_at) missing = Upload.where(
verification_status: Upload.verification_statuses[:invalid_etag]
).order(:created_at)
count = missing.count count = missing.count
if count > 0 if count > 0
puts "The following uploads will be deleted from the database" puts "The following uploads will be deleted from the database"
@ -1110,7 +1112,9 @@ def fix_missing_s3
Jobs.run_immediately! Jobs.run_immediately!
puts "Attempting to download missing uploads and recreate" puts "Attempting to download missing uploads and recreate"
ids = Upload.where(verified: false).pluck(:id) ids = Upload.where(
verification_status: Upload.verification_statuses[:invalid_etag]
).pluck(:id)
ids.each do |id| ids.each do |id|
upload = Upload.find(id) upload = Upload.find(id)
@ -1165,11 +1169,11 @@ def fix_missing_s3
SELECT post_id SELECT post_id
FROM post_uploads pu FROM post_uploads pu
JOIN uploads on uploads.id = pu.upload_id JOIN uploads on uploads.id = pu.upload_id
WHERE NOT verified WHERE verification_status = :invalid_etag
ORDER BY post_id DESC ORDER BY post_id DESC
SQL SQL
DB.query_single(sql).each do |post_id| DB.query_single(sql, invalid_etag: Upload.verification_statuses[:invalid_etag]).each do |post_id|
post = Post.find_by(id: post_id) post = Post.find_by(id: post_id)
if post if post
post.rebake! post.rebake!

View File

@ -107,15 +107,15 @@ describe "S3Inventory" do
end end
it "marks missing uploads as not verified and found uploads as verified. uploads not checked will be verified nil" do it "marks missing uploads as not verified and found uploads as verified. uploads not checked will be verified nil" do
expect(Upload.where(verified: nil).count).to eq(12) expect(Upload.where(verification_status: Upload.verification_statuses[:unchecked]).count).to eq(12)
output = capture_stdout do output = capture_stdout do
inventory.backfill_etags_and_list_missing inventory.backfill_etags_and_list_missing
end end
verified = Upload.pluck(:verified) verification_status = Upload.pluck(:verification_status)
expect(Upload.where(verified: true).count).to eq(3) expect(Upload.where(verification_status: Upload.verification_statuses[:verified]).count).to eq(3)
expect(Upload.where(verified: false).count).to eq(2) expect(Upload.where(verification_status: Upload.verification_statuses[:invalid_etag]).count).to eq(2)
expect(Upload.where(verified: nil).count).to eq(7) expect(Upload.where(verification_status: Upload.verification_statuses[:unchecked]).count).to eq(7)
end end
it "does not affect the updated_at date of uploads" do it "does not affect the updated_at date of uploads" do