mirror of
https://github.com/discourse/discourse.git
synced 2025-01-22 14:00:46 +08:00
432 lines
13 KiB
Ruby
432 lines
13 KiB
Ruby
# Use http://tatiyants.com/pev/#/plans/new if you want to optimize a query
|
|
|
|
task "import:ensure_consistency" => :environment do
|
|
log "Starting..."
|
|
|
|
insert_post_timings
|
|
insert_post_replies
|
|
insert_topic_users
|
|
insert_topic_views
|
|
insert_user_actions
|
|
insert_user_options
|
|
insert_user_stats
|
|
insert_user_visits
|
|
insert_draft_sequences
|
|
|
|
update_user_stats
|
|
update_posts
|
|
update_topics
|
|
update_categories
|
|
update_users
|
|
update_groups
|
|
|
|
log "Done!"
|
|
end
|
|
|
|
MS_SPEND_CREATING_POST ||= 5000
|
|
|
|
def insert_post_timings
|
|
log "Inserting post timings..."
|
|
|
|
exec_sql <<-SQL
|
|
INSERT INTO post_timings (topic_id, post_number, user_id, msecs)
|
|
SELECT topic_id, post_number, user_id, #{MS_SPEND_CREATING_POST}
|
|
FROM posts
|
|
WHERE user_id > 0
|
|
ON CONFLICT DO NOTHING
|
|
SQL
|
|
end
|
|
|
|
def insert_post_replies
|
|
log "Inserting post replies..."
|
|
|
|
exec_sql <<-SQL
|
|
INSERT INTO post_replies (post_id, reply_id, created_at, updated_at)
|
|
SELECT p2.id, p.id, p.created_at, p.created_at
|
|
FROM posts p
|
|
INNER JOIN posts p2 ON p2.post_number = p.reply_to_post_number AND p2.topic_id = p.topic_id
|
|
ON CONFLICT DO NOTHING
|
|
SQL
|
|
end
|
|
|
|
def insert_topic_users
|
|
log "Inserting topic users..."
|
|
|
|
exec_sql <<-SQL
|
|
INSERT INTO topic_users (user_id, topic_id, posted, last_read_post_number, highest_seen_post_number, first_visited_at, last_visited_at, total_msecs_viewed)
|
|
SELECT user_id, topic_id, 't' , MAX(post_number), MAX(post_number), MIN(created_at), MAX(created_at), COUNT(id) * #{MS_SPEND_CREATING_POST}
|
|
FROM posts
|
|
WHERE user_id > 0
|
|
GROUP BY user_id, topic_id
|
|
ON CONFLICT DO NOTHING
|
|
SQL
|
|
end
|
|
|
|
def insert_topic_views
|
|
log "Inserting topic views..."
|
|
|
|
exec_sql <<-SQL
|
|
WITH X AS (
|
|
SELECT topic_id, user_id, DATE(p.created_at) posted_at
|
|
FROM posts p
|
|
JOIN users u ON u.id = p.user_id
|
|
WHERE user_id > 0
|
|
GROUP BY topic_id, user_id, DATE(p.created_at)
|
|
)
|
|
INSERT INTO topic_views (topic_id, user_id, viewed_at, ip_address)
|
|
SELECT X.topic_id, X.user_id, X.posted_at, ip_address
|
|
FROM X
|
|
JOIN users u ON u.id = X.user_id
|
|
WHERE ip_address IS NOT NULL
|
|
ON CONFLICT DO NOTHING
|
|
SQL
|
|
end
|
|
|
|
def insert_user_actions
|
|
log "Inserting user actions for NEW_TOPIC = 4..."
|
|
|
|
exec_sql <<-SQL
|
|
INSERT INTO user_actions (action_type, user_id, target_topic_id, target_post_id, acting_user_id, created_at, updated_at)
|
|
SELECT 4, p.user_id, topic_id, p.id, p.user_id, p.created_at, p.created_at
|
|
FROM posts p
|
|
JOIN topics t ON t.id = p.topic_id
|
|
WHERE post_number = 1
|
|
AND archetype <> 'private_message'
|
|
AND p.deleted_at IS NULL
|
|
AND t.deleted_at IS NULL
|
|
ON CONFLICT DO NOTHING
|
|
SQL
|
|
|
|
log "Inserting user actions for REPLY = 5..."
|
|
|
|
exec_sql <<-SQL
|
|
INSERT INTO user_actions (action_type, user_id, target_topic_id, target_post_id, acting_user_id, created_at, updated_at)
|
|
SELECT 5, p.user_id, topic_id, p.id, p.user_id, p.created_at, p.created_at
|
|
FROM posts p
|
|
JOIN topics t ON t.id = p.topic_id
|
|
WHERE post_number > 1
|
|
AND archetype <> 'private_message'
|
|
AND p.deleted_at IS NULL
|
|
AND t.deleted_at IS NULL
|
|
ON CONFLICT DO NOTHING
|
|
SQL
|
|
|
|
log "Inserting user actions for RESPONSE = 6..."
|
|
|
|
exec_sql <<-SQL
|
|
INSERT INTO user_actions (action_type, user_id, target_topic_id, target_post_id, acting_user_id, created_at, updated_at)
|
|
SELECT 6, p.user_id, p.topic_id, p.id, p2.user_id, p.created_at, p.created_at
|
|
FROM posts p
|
|
JOIN topics t ON t.id = p.topic_id
|
|
INNER JOIN posts p2 ON p2.post_number = p.reply_to_post_number
|
|
WHERE p.post_number > 1
|
|
AND archetype <> 'private_message'
|
|
AND p.deleted_at IS NULL
|
|
AND t.deleted_at IS NULL
|
|
AND p2.topic_id = p.topic_id
|
|
AND p2.user_id <> p.user_id
|
|
ON CONFLICT DO NOTHING
|
|
SQL
|
|
|
|
# TODO:
|
|
# - NEW_PRIVATE_MESSAGE
|
|
# - GOT_PRIVATE_MESSAGE
|
|
end
|
|
|
|
def insert_user_options
|
|
log "Inserting user options..."
|
|
|
|
exec_sql <<-SQL
|
|
INSERT INTO user_options (
|
|
user_id,
|
|
email_always,
|
|
mailing_list_mode,
|
|
mailing_list_mode_frequency,
|
|
email_direct,
|
|
email_private_messages,
|
|
email_previous_replies,
|
|
email_in_reply_to,
|
|
email_digests,
|
|
digest_after_minutes,
|
|
include_tl0_in_digests,
|
|
automatically_unpin_topics,
|
|
enable_quoting,
|
|
external_links_in_new_tab,
|
|
dynamic_favicon,
|
|
disable_jump_reply,
|
|
new_topic_duration_minutes,
|
|
auto_track_topics_after_msecs,
|
|
notification_level_when_replying,
|
|
like_notification_frequency
|
|
)
|
|
SELECT u.id
|
|
, #{SiteSetting.default_email_always}
|
|
, #{SiteSetting.default_email_mailing_list_mode}
|
|
, #{SiteSetting.default_email_mailing_list_mode_frequency}
|
|
, #{SiteSetting.default_email_direct}
|
|
, #{SiteSetting.default_email_private_messages}
|
|
, #{SiteSetting.default_email_previous_replies}
|
|
, #{SiteSetting.default_email_in_reply_to}
|
|
, #{SiteSetting.default_email_digest_frequency.to_i > 0}
|
|
, #{SiteSetting.default_email_digest_frequency}
|
|
, #{SiteSetting.default_include_tl0_in_digests}
|
|
, #{SiteSetting.default_topics_automatic_unpin}
|
|
, #{SiteSetting.default_other_enable_quoting}
|
|
, #{SiteSetting.default_other_external_links_in_new_tab}
|
|
, #{SiteSetting.default_other_dynamic_favicon}
|
|
, #{SiteSetting.default_other_disable_jump_reply}
|
|
, #{SiteSetting.default_other_new_topic_duration_minutes}
|
|
, #{SiteSetting.default_other_auto_track_topics_after_msecs}
|
|
, #{SiteSetting.default_other_notification_level_when_replying}
|
|
, #{SiteSetting.default_other_like_notification_frequency}
|
|
FROM users u
|
|
LEFT JOIN user_options uo ON uo.user_id = u.id
|
|
WHERE uo.user_id IS NULL
|
|
SQL
|
|
end
|
|
|
|
def insert_user_stats
|
|
log "Inserting user stats..."
|
|
|
|
exec_sql <<-SQL
|
|
INSERT INTO user_stats (user_id, new_since)
|
|
SELECT id, created_at
|
|
FROM users
|
|
ON CONFLICT DO NOTHING
|
|
SQL
|
|
end
|
|
|
|
def insert_user_visits
|
|
log "Inserting user visits..."
|
|
|
|
exec_sql <<-SQL
|
|
INSERT INTO user_visits (user_id, visited_at, posts_read)
|
|
SELECT user_id, DATE(created_at), COUNT(*)
|
|
FROM posts
|
|
WHERE user_id > 0
|
|
GROUP BY user_id, DATE(created_at)
|
|
ON CONFLICT DO NOTHING
|
|
SQL
|
|
end
|
|
|
|
def insert_draft_sequences
|
|
log "Inserting draft sequences..."
|
|
|
|
exec_sql <<-SQL
|
|
INSERT INTO draft_sequences (user_id, draft_key, sequence)
|
|
SELECT user_id, CONCAT('#{Draft::EXISTING_TOPIC}', id), 1
|
|
FROM topics
|
|
WHERE user_id > 0
|
|
AND archetype = 'regular'
|
|
ON CONFLICT DO NOTHING
|
|
SQL
|
|
end
|
|
|
|
def update_user_stats
|
|
log "Updating user stats..."
|
|
|
|
exec_sql <<-SQL
|
|
WITH X AS (
|
|
SELECT p.user_id
|
|
, COUNT(p.id) posts
|
|
, COUNT(DISTINCT p.topic_id) topics
|
|
, MIN(p.created_at) min_created_at
|
|
, COALESCE(COUNT(DISTINCT DATE(p.created_at)), 0) days
|
|
, COALESCE((
|
|
SELECT COUNT(*)
|
|
FROM topics
|
|
WHERE id IN (
|
|
SELECT topic_id
|
|
FROM posts p2
|
|
JOIN topics t2 ON t2.id = p2.topic_id
|
|
WHERE p2.deleted_at IS NULL
|
|
AND p2.post_type = 1
|
|
AND NOT COALESCE(p2.hidden, 't')
|
|
AND p2.user_id <> t2.user_id
|
|
AND p2.user_id = p.user_id
|
|
)
|
|
), 0) topic_replies
|
|
FROM posts p
|
|
JOIN topics t ON t.id = p.topic_id
|
|
WHERE p.deleted_at IS NULL
|
|
AND NOT COALESCE(p.hidden, 't')
|
|
AND p.post_type = 1
|
|
AND t.deleted_at IS NULL
|
|
AND COALESCE(t.visible, 't')
|
|
AND t.archetype <> 'private_message'
|
|
AND p.user_id > 0
|
|
GROUP BY p.user_id
|
|
)
|
|
UPDATE user_stats
|
|
SET post_count = X.posts
|
|
, posts_read_count = X.posts
|
|
, time_read = X.posts * 5
|
|
, topic_count = X.topics
|
|
, topics_entered = X.topics
|
|
, first_post_created_at = X.min_created_at
|
|
, days_visited = X.days
|
|
, topic_reply_count = X.topic_replies
|
|
FROM X
|
|
WHERE user_stats.user_id = X.user_id
|
|
AND (post_count <> X.posts
|
|
OR posts_read_count <> X.posts
|
|
OR time_read <> X.posts * 5
|
|
OR topic_count <> X.topics
|
|
OR topics_entered <> X.topics
|
|
OR COALESCE(first_post_created_at, '1970-01-01') <> X.min_created_at
|
|
OR days_visited <> X.days
|
|
OR topic_reply_count <> X.topic_replies)
|
|
SQL
|
|
end
|
|
|
|
def update_posts
|
|
log "Updating posts..."
|
|
|
|
exec_sql <<-SQL
|
|
WITH Y AS (
|
|
SELECT post_id, COUNT(*) replies FROM post_replies GROUP BY post_id
|
|
)
|
|
UPDATE posts
|
|
SET reply_count = Y.replies
|
|
FROM Y
|
|
WHERE posts.id = Y.post_id
|
|
AND reply_count <> Y.replies
|
|
SQL
|
|
|
|
|
|
# -- TODO: ensure this is how this works!
|
|
# WITH X AS (
|
|
# SELECT pr.post_id, p.user_id
|
|
# FROM post_replies pr
|
|
# JOIN posts p ON p.id = pr.reply_id
|
|
# )
|
|
# UPDATE posts
|
|
# SET reply_to_user_id = X.user_id
|
|
# FROM X
|
|
# WHERE id = X.post_id
|
|
# AND COALESCE(reply_to_user_id, -9999) <> X.user_id
|
|
end
|
|
|
|
def update_topics
|
|
log "Updating topics..."
|
|
|
|
exec_sql <<-SQL
|
|
WITH X AS (
|
|
SELECT topic_id
|
|
, COUNT(*) posts
|
|
, MAX(created_at) last_post_date
|
|
, COALESCE(SUM(word_count), 0) words
|
|
, COALESCE(SUM(reply_count), 0) replies
|
|
, ( SELECT user_id
|
|
FROM posts
|
|
WHERE NOT hidden
|
|
AND deleted_at IS NULL
|
|
AND topic_id = p.topic_id
|
|
ORDER BY post_number DESC
|
|
LIMIT 1) last_poster
|
|
FROM posts p
|
|
WHERE NOT hidden
|
|
AND deleted_at IS NULL
|
|
GROUP BY topic_id
|
|
)
|
|
UPDATE topics
|
|
SET posts_count = X.posts
|
|
, last_posted_at = X.last_post_date
|
|
, bumped_at = X.last_post_date
|
|
, word_count = X.words
|
|
, reply_count = X.replies
|
|
, last_post_user_id = X.last_poster
|
|
FROM X
|
|
WHERE id = X.topic_id
|
|
AND (posts_count <> X.posts
|
|
OR COALESCE(last_posted_at, '1970-01-01') <> X.last_post_date
|
|
OR bumped_at <> X.last_post_date
|
|
OR COALESCE(word_count, -1) <> X.words
|
|
OR COALESCE(reply_count, -1) <> X.replies
|
|
OR COALESCE(last_post_user_id, -9999) <> X.last_poster)
|
|
SQL
|
|
end
|
|
|
|
def update_categories
|
|
log "Updating categories..."
|
|
|
|
exec_sql <<-SQL
|
|
WITH X AS (
|
|
SELECT category_id
|
|
, MAX(p.id) post_id
|
|
, MAX(t.id) topic_id
|
|
, COUNT(p.id) posts
|
|
, COUNT(DISTINCT p.topic_id) topics
|
|
FROM posts p
|
|
JOIN topics t ON t.id = p.topic_id
|
|
WHERE p.deleted_at IS NULL
|
|
AND t.deleted_at IS NULL
|
|
AND NOT p.hidden
|
|
AND t.visible
|
|
GROUP BY category_id
|
|
)
|
|
UPDATE categories
|
|
SET latest_post_id = X.post_id
|
|
, latest_topic_id = X.topic_id
|
|
, post_count = X.posts
|
|
, topic_count = X.topics
|
|
FROM X
|
|
WHERE id = X.category_id
|
|
AND (COALESCE(latest_post_id, -1) <> X.post_id
|
|
OR COALESCE(latest_topic_id, -1) <> X.topic_id
|
|
OR post_count <> X.posts
|
|
OR topic_count <> X.topics)
|
|
SQL
|
|
end
|
|
|
|
def update_users
|
|
log "Updating users..."
|
|
|
|
exec_sql <<-SQL
|
|
WITH X AS (
|
|
SELECT user_id
|
|
, MIN(created_at) min_created_at
|
|
, MAX(created_at) max_created_at
|
|
FROM posts
|
|
WHERE deleted_at IS NULL
|
|
GROUP BY user_id
|
|
)
|
|
UPDATE users
|
|
SET first_seen_at = X.min_created_at
|
|
, last_seen_at = X.max_created_at
|
|
, last_posted_at = X.max_created_at
|
|
FROM X
|
|
WHERE id = X.user_id
|
|
AND (COALESCE(first_seen_at, '1970-01-01') <> X.min_created_at
|
|
OR COALESCE(last_seen_at, '1970-01-01') <> X.max_created_at
|
|
OR COALESCE(last_posted_at, '1970-01-01') <> X.max_created_at)
|
|
SQL
|
|
end
|
|
|
|
def update_groups
|
|
log "Updating groups..."
|
|
|
|
exec_sql <<-SQL
|
|
WITH X AS (
|
|
SELECT group_id, COUNT(*) count
|
|
FROM group_users
|
|
GROUP BY group_id
|
|
)
|
|
UPDATE groups
|
|
SET user_count = X.count
|
|
FROM X
|
|
WHERE id = X.group_id
|
|
AND user_count <> X.count
|
|
SQL
|
|
end
|
|
|
|
def log(message)
|
|
puts "[#{DateTime.now.strftime("%Y-%m-%d %H:%M:%S")}] #{message}"
|
|
end
|
|
|
|
def exec_sql(sql)
|
|
ActiveRecord::Base.transaction do
|
|
ActiveRecord::Base.exec_sql(sql)
|
|
end
|
|
end
|