mirror of
https://github.com/discourse/discourse.git
synced 2024-11-25 09:42:07 +08:00
DEV: Import script should insert more data into user_stats
table (#26551)
This SQL tries to insert as much data as possible into the `user_stats` table by either calculating or by approximating stats based on existing. It also fixes an error in the calculation of `reply_count`which mistakenly contained all posts, not just replies. This change also disables some steps in the `import:ensure_consistency` rake task by setting the `SKIP_USER_STATS` env variable. Otherwise, the rake task will overwrite the calculated data in the `user_stats` table with inaccurate data. I'm not changing or removing the logic from the rake task yet because other bulk import scripts seem to depend on it.
This commit is contained in:
parent
4f2b44ba9f
commit
4d045bfc61
|
@ -12,12 +12,12 @@ task "import:ensure_consistency" => :environment do
|
||||||
insert_user_actions
|
insert_user_actions
|
||||||
insert_user_options
|
insert_user_options
|
||||||
insert_user_profiles
|
insert_user_profiles
|
||||||
insert_user_stats
|
insert_user_stats unless ENV["SKIP_USER_STATS"]
|
||||||
insert_user_visits
|
insert_user_visits
|
||||||
insert_draft_sequences
|
insert_draft_sequences
|
||||||
insert_automatic_group_users
|
insert_automatic_group_users
|
||||||
|
|
||||||
update_user_stats
|
update_user_stats unless ENV["SKIP_USER_STATS"]
|
||||||
update_posts
|
update_posts
|
||||||
update_topics
|
update_topics
|
||||||
update_categories
|
update_categories
|
||||||
|
|
|
@ -31,6 +31,7 @@ class BulkImport::Generic < BulkImport::Base
|
||||||
|
|
||||||
# Now that the migration is complete, do some more work:
|
# Now that the migration is complete, do some more work:
|
||||||
|
|
||||||
|
ENV["SKIP_USER_STATS"] = "1"
|
||||||
Discourse::Application.load_tasks
|
Discourse::Application.load_tasks
|
||||||
|
|
||||||
puts "running 'import:ensure_consistency' rake task."
|
puts "running 'import:ensure_consistency' rake task."
|
||||||
|
@ -1258,35 +1259,91 @@ class BulkImport::Generic < BulkImport::Base
|
||||||
|
|
||||||
DB.exec(<<~SQL)
|
DB.exec(<<~SQL)
|
||||||
WITH
|
WITH
|
||||||
post_stats AS (
|
visible_posts AS (
|
||||||
SELECT p.user_id, COUNT(p.id) AS post_count, MIN(p.created_at) AS first_post_created_at,
|
SELECT p.id, p.post_number, p.user_id, p.created_at, p.like_count, p.topic_id
|
||||||
SUM(like_count) AS likes_received
|
FROM posts p
|
||||||
FROM posts p
|
JOIN topics t ON p.topic_id = t.id
|
||||||
GROUP BY p.user_id
|
WHERE t.archetype = 'regular'
|
||||||
),
|
AND t.deleted_at IS NULL
|
||||||
|
AND t.visible
|
||||||
|
AND p.deleted_at IS NULL
|
||||||
|
AND p.post_type = 1 /* regular_post_type */
|
||||||
|
AND NOT p.hidden
|
||||||
|
),
|
||||||
topic_stats AS (
|
topic_stats AS (
|
||||||
SELECT t.user_id, COUNT(t.id) AS topic_count FROM topics t GROUP BY t.user_id
|
SELECT t.user_id, COUNT(t.id) AS topic_count
|
||||||
),
|
FROM topics t
|
||||||
|
WHERE t.archetype = 'regular'
|
||||||
|
AND t.deleted_at IS NULL
|
||||||
|
AND t.visible
|
||||||
|
GROUP BY t.user_id
|
||||||
|
),
|
||||||
|
post_stats AS (
|
||||||
|
SELECT p.user_id, MIN(p.created_at) AS first_post_created_at, SUM(p.like_count) AS likes_received
|
||||||
|
FROM visible_posts p
|
||||||
|
GROUP BY p.user_id
|
||||||
|
),
|
||||||
|
reply_stats AS (
|
||||||
|
SELECT p.user_id, COUNT(p.id) AS reply_count
|
||||||
|
FROM visible_posts p
|
||||||
|
WHERE p.post_number > 1
|
||||||
|
GROUP BY p.user_id
|
||||||
|
),
|
||||||
like_stats AS (
|
like_stats AS (
|
||||||
SELECT pa.user_id, COUNT(*) AS likes_given
|
SELECT pa.user_id, COUNT(*) AS likes_given
|
||||||
FROM post_actions pa
|
FROM post_actions pa
|
||||||
WHERE pa.post_action_type_id = 2
|
JOIN visible_posts p ON pa.post_id = p.id
|
||||||
GROUP BY pa.user_id
|
WHERE pa.post_action_type_id = 2 /* like */
|
||||||
)
|
AND pa.deleted_at IS NULL
|
||||||
|
GROUP BY pa.user_id
|
||||||
|
),
|
||||||
|
badge_stats AS (
|
||||||
|
SELECT ub.user_id, COUNT(DISTINCT ub.badge_id) AS distinct_badge_count
|
||||||
|
FROM user_badges ub
|
||||||
|
JOIN badges b ON ub.badge_id = b.id AND b.enabled
|
||||||
|
GROUP BY ub.user_id
|
||||||
|
),
|
||||||
|
post_action_stats AS ( -- posts created by user and likes given by user
|
||||||
|
SELECT p.user_id, p.id AS post_id, p.created_at::DATE, p.topic_id, p.post_number
|
||||||
|
FROM visible_posts p
|
||||||
|
UNION
|
||||||
|
SELECT pa.user_id, pa.post_id, pa.created_at::DATE, p.topic_id, p.post_number
|
||||||
|
FROM post_actions pa
|
||||||
|
JOIN visible_posts p ON pa.post_id = p.id
|
||||||
|
WHERE pa.post_action_type_id = 2
|
||||||
|
),
|
||||||
|
topic_reading_stats AS (
|
||||||
|
SELECT user_id, COUNT(DISTINCT topic_id) AS topics_entered,
|
||||||
|
COUNT(DISTINCT created_at) AS days_visited
|
||||||
|
FROM post_action_stats
|
||||||
|
GROUP BY user_id
|
||||||
|
),
|
||||||
|
posts_reading_stats AS (
|
||||||
|
SELECT user_id, SUM(max_post_number) AS posts_read_count
|
||||||
|
FROM (
|
||||||
|
SELECT user_id, MAX(post_number) AS max_post_number
|
||||||
|
FROM post_action_stats
|
||||||
|
GROUP BY user_id, topic_id
|
||||||
|
) x
|
||||||
|
GROUP BY user_id
|
||||||
|
)
|
||||||
INSERT
|
INSERT
|
||||||
INTO user_stats (user_id, new_since, post_count, topic_count, first_post_created_at, likes_received, likes_given)
|
INTO user_stats (user_id, new_since, post_count, topic_count, first_post_created_at, likes_received,
|
||||||
SELECT u.id, u.created_at AS new_since, COALESCE(ps.post_count, 0) AS post_count,
|
likes_given, distinct_badge_count, days_visited, topics_entered, posts_read_count, time_read)
|
||||||
|
SELECT u.id AS user_id, u.created_at AS new_since, COALESCE(rs.reply_count, 0) AS reply_count,
|
||||||
COALESCE(ts.topic_count, 0) AS topic_count, ps.first_post_created_at,
|
COALESCE(ts.topic_count, 0) AS topic_count, ps.first_post_created_at,
|
||||||
COALESCE(ps.likes_received, 0) AS likes_received, COALESCE(ls.likes_given, 0) AS likes_given
|
COALESCE(ps.likes_received, 0) AS likes_received, COALESCE(ls.likes_given, 0) AS likes_given,
|
||||||
|
COALESCE(bs.distinct_badge_count, 0) AS distinct_badge_count, COALESCE(trs.days_visited, 1) AS days_visited,
|
||||||
|
COALESCE(trs.topics_entered, 0) AS topics_entered, COALESCE(prs.posts_read_count, 0) AS posts_read_count,
|
||||||
|
COALESCE(prs.posts_read_count, 0) * 30 AS time_read -- assume 30 seconds / post
|
||||||
FROM users u
|
FROM users u
|
||||||
LEFT JOIN post_stats ps ON u.id = ps.user_id
|
|
||||||
LEFT JOIN topic_stats ts ON u.id = ts.user_id
|
LEFT JOIN topic_stats ts ON u.id = ts.user_id
|
||||||
|
LEFT JOIN post_stats ps ON u.id = ps.user_id
|
||||||
|
LEFT JOIN reply_stats rs ON u.id = rs.user_id
|
||||||
LEFT JOIN like_stats ls ON u.id = ls.user_id
|
LEFT JOIN like_stats ls ON u.id = ls.user_id
|
||||||
WHERE NOT EXISTS (
|
LEFT JOIN badge_stats bs ON u.id = bs.user_id
|
||||||
SELECT 1
|
LEFT JOIN topic_reading_stats trs ON u.id = trs.user_id
|
||||||
FROM user_stats us
|
LEFT JOIN posts_reading_stats prs ON u.id = prs.user_id
|
||||||
WHERE us.user_id = u.id
|
|
||||||
)
|
|
||||||
ON CONFLICT DO NOTHING
|
ON CONFLICT DO NOTHING
|
||||||
SQL
|
SQL
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user