2019-05-03 06:17:27 +08:00
# frozen_string_literal: true
2014-07-01 08:09:55 +08:00
module Jobs
2019-10-02 12:01:53 +08:00
class ReindexSearch < :: Jobs :: Scheduled
2018-02-20 11:41:00 +08:00
every 2 . hours
2014-07-01 08:09:55 +08:00
def execute ( args )
2021-01-25 18:23:36 +08:00
@verbose = args [ :verbose ]
@cleanup_grace_period = 1 . day . ago
2020-06-24 13:36:51 +08:00
2021-01-25 18:23:36 +08:00
rebuild_categories
rebuild_tags
rebuild_topics
rebuild_posts
rebuild_users
2020-06-24 13:36:51 +08:00
2021-01-25 18:23:36 +08:00
clean_topics
clean_posts
2017-08-16 19:38:34 +08:00
end
2021-01-25 18:23:36 +08:00
def rebuild_categories ( limit : 500 , indexer : SearchIndexer )
2017-08-16 20:18:59 +08:00
category_ids = load_problem_category_ids ( limit )
2017-08-16 19:38:34 +08:00
2021-01-25 18:23:36 +08:00
puts " rebuilding #{ category_ids . size } categories " if @verbose
2020-06-24 13:36:51 +08:00
2017-08-16 20:18:59 +08:00
category_ids . each do | id |
category = Category . find_by ( id : id )
2021-01-25 18:23:36 +08:00
indexer . index ( category , force : true ) if category
2017-08-16 19:38:34 +08:00
end
end
2021-01-25 18:23:36 +08:00
def rebuild_tags ( limit : 1_000 , indexer : SearchIndexer )
tag_ids = load_problem_tag_ids ( limit )
2017-08-16 19:38:34 +08:00
2021-01-25 18:23:36 +08:00
puts " rebuilding #{ tag_ids . size } tags " if @verbose
2020-06-24 13:36:51 +08:00
2021-01-25 18:23:36 +08:00
tag_ids . each do | id |
tag = Tag . find_by ( id : id )
indexer . index ( tag , force : true ) if tag
2017-08-16 19:38:34 +08:00
end
end
2021-01-25 18:23:36 +08:00
def rebuild_topics ( limit : 10_000 , indexer : SearchIndexer )
2017-08-16 20:18:59 +08:00
topic_ids = load_problem_topic_ids ( limit )
2017-08-16 19:38:34 +08:00
2021-01-25 18:23:36 +08:00
puts " rebuilding #{ topic_ids . size } topics " if @verbose
2020-06-24 13:36:51 +08:00
2017-08-16 20:18:59 +08:00
topic_ids . each do | id |
topic = Topic . find_by ( id : id )
2021-01-25 18:23:36 +08:00
indexer . index ( topic , force : true ) if topic
2017-08-16 19:38:34 +08:00
end
end
2021-01-25 18:23:36 +08:00
def rebuild_posts ( limit : 20_000 , indexer : SearchIndexer )
2017-08-16 20:18:59 +08:00
post_ids = load_problem_post_ids ( limit )
2017-08-16 19:38:34 +08:00
2021-01-25 18:23:36 +08:00
puts " rebuilding #{ post_ids . size } posts " if @verbose
2020-06-24 13:36:51 +08:00
2017-08-16 20:18:59 +08:00
post_ids . each do | id |
2021-01-25 18:23:36 +08:00
post = Post . find_by ( id : id )
indexer . index ( post , force : true ) if post
2017-08-16 19:38:34 +08:00
end
end
2021-01-25 18:23:36 +08:00
def rebuild_users ( limit : 5_000 , indexer : SearchIndexer )
user_ids = load_problem_user_ids ( limit )
2017-08-25 23:52:18 +08:00
2021-01-25 18:23:36 +08:00
puts " rebuilding #{ user_ids . size } users " if @verbose
2020-06-24 13:36:51 +08:00
2021-01-25 18:23:36 +08:00
user_ids . each do | id |
user = User . find_by ( id : id )
indexer . index ( user , force : true ) if user
2017-08-25 23:52:18 +08:00
end
end
2021-01-25 18:23:36 +08:00
def clean_topics
puts " cleaning up topic search data " if @verbose
2020-06-24 13:36:51 +08:00
2021-01-25 18:23:36 +08:00
# remove search data from deleted topics
2019-04-03 10:10:41 +08:00
2021-01-25 18:23:36 +08:00
DB . exec ( << ~ SQL , deleted_at : @cleanup_grace_period )
DELETE FROM topic_search_data
WHERE topic_id IN (
SELECT topic_id
FROM topic_search_data
LEFT JOIN topics ON topic_id = topics . id
WHERE topics . id IS NULL
OR ( deleted_at IS NOT NULL AND deleted_at < = :deleted_at )
2019-06-04 15:19:44 +08:00
)
2019-04-03 10:10:41 +08:00
SQL
2019-04-01 10:06:27 +08:00
end
2021-01-25 18:23:36 +08:00
def clean_posts
puts " cleaning up post search data " if @verbose
2020-06-24 13:36:51 +08:00
2021-01-25 18:23:36 +08:00
# remove search data from deleted/empty posts
2019-04-08 16:51:39 +08:00
2021-01-25 18:23:36 +08:00
DB . exec ( << ~ SQL , deleted_at : @cleanup_grace_period )
DELETE FROM post_search_data
WHERE post_id IN (
SELECT post_id
FROM post_search_data
LEFT JOIN posts ON post_id = posts . id
JOIN topics ON posts . topic_id = topics . id
WHERE posts . id IS NULL
OR posts . raw = ''
OR ( posts . deleted_at IS NOT NULL AND posts . deleted_at < = :deleted_at )
OR ( topics . deleted_at IS NOT NULL AND topics . deleted_at < = :deleted_at )
)
2019-04-02 07:36:53 +08:00
SQL
2017-08-16 19:38:34 +08:00
end
2017-08-16 20:18:59 +08:00
def load_problem_category_ids ( limit )
2021-01-25 18:23:36 +08:00
Category
. joins ( " LEFT JOIN category_search_data ON category_id = categories.id " )
. where ( " category_search_data.locale IS NULL OR category_search_data.locale != ? OR category_search_data.version != ? " , SiteSetting . default_locale , SearchIndexer :: CATEGORY_INDEX_VERSION )
. order ( " categories.id ASC " )
. limit ( limit )
. pluck ( :id )
end
def load_problem_tag_ids ( limit )
Tag
. joins ( " LEFT JOIN tag_search_data ON tag_id = tags.id " )
. where ( " tag_search_data.locale IS NULL OR tag_search_data.locale != ? OR tag_search_data.version != ? " , SiteSetting . default_locale , SearchIndexer :: TAG_INDEX_VERSION )
. order ( " tags.id ASC " )
2017-08-16 19:38:34 +08:00
. limit ( limit )
2017-08-16 20:18:59 +08:00
. pluck ( :id )
2017-08-16 19:38:34 +08:00
end
2017-08-16 20:18:59 +08:00
def load_problem_topic_ids ( limit )
2021-01-25 18:23:36 +08:00
Topic
. joins ( " LEFT JOIN topic_search_data ON topic_id = topics.id " )
. where ( " topic_search_data.locale IS NULL OR topic_search_data.locale != ? OR topic_search_data.version != ? " , SiteSetting . default_locale , SearchIndexer :: TOPIC_INDEX_VERSION )
. order ( " topics.id DESC " )
2017-08-16 19:38:34 +08:00
. limit ( limit )
2017-08-16 20:18:59 +08:00
. pluck ( :id )
2017-08-16 19:38:34 +08:00
end
2021-01-25 18:23:36 +08:00
def load_problem_post_ids ( limit )
Post
. joins ( :topic )
. joins ( " LEFT JOIN post_search_data ON post_id = posts.id " )
. where ( " posts.raw != '' " )
. where ( " topics.deleted_at IS NULL " )
. where ( " post_search_data.locale IS NULL OR post_search_data.locale != ? OR post_search_data.version != ? " , SiteSetting . default_locale , SearchIndexer :: POST_INDEX_VERSION )
. order ( " posts.id DESC " )
2017-08-16 19:38:34 +08:00
. limit ( limit )
2017-08-16 20:18:59 +08:00
. pluck ( :id )
2014-07-01 08:09:55 +08:00
end
2017-08-25 23:52:18 +08:00
2021-01-25 18:23:36 +08:00
def load_problem_user_ids ( limit )
User
. joins ( " LEFT JOIN user_search_data ON user_id = users.id " )
. where ( " user_search_data.locale IS NULL OR user_search_data.locale != ? OR user_search_data.version != ? " , SiteSetting . default_locale , SearchIndexer :: USER_INDEX_VERSION )
. order ( " users.id ASC " )
2017-08-25 23:52:18 +08:00
. limit ( limit )
. pluck ( :id )
end
2021-01-25 18:23:36 +08:00
2014-07-01 08:09:55 +08:00
end
end