PERF: improve performance of unread queries

Figuring out what unread topics a user has is a very expensive
operation over time.

Users can easily accumulate 10s of thousands of tracking state rows
(1 for every topic they ever visit)

When figuring out what a user has that is unread we need to join
the tracking state records to the topic table. This can very quickly
lead to cases where you need to scan through the entire topic table.

This commit optimises it so we always keep track of the "first" date
a user has unread topics. Then we can easily filter out all earlier
topics from the join.

We use pg functions, instead of nested queries here to assist the
planner.
This commit is contained in:
Sam 2017-05-25 15:07:12 -04:00
parent 6eb6c25816
commit 29fac1ac18
12 changed files with 226 additions and 38 deletions

View File

@ -605,7 +605,7 @@ class TopicsController < ApplicationController
topic_ids = params[:topic_ids].map {|t| t.to_i}
elsif params[:filter] == 'unread'
tq = TopicQuery.new(current_user)
topics = TopicQuery.unread_filter(tq.joined_topic_user, staff: guardian.is_staff?).listable_topics
topics = TopicQuery.unread_filter(tq.joined_topic_user, current_user.id, staff: guardian.is_staff?).listable_topics
topics = topics.where('category_id = ?', params[:category_id]) if params[:category_id]
topic_ids = topics.pluck(:id)
else

View File

@ -0,0 +1,9 @@
module Jobs
class UpdateFirstTopicUnreadAt < Jobs::Scheduled
every 1.day
def execute(args)
UserStat.update_first_topic_unread_at!
end
end
end

View File

@ -541,6 +541,10 @@ SQL
def self.reset_highest(topic_id)
result = exec_sql "UPDATE topics
SET
last_unread_at = (
SELECT MAX(created_at) FROM posts
WHERE topic_id = :topic_id
),
highest_staff_post_number = (
SELECT COALESCE(MAX(post_number), 0) FROM posts
WHERE topic_id = :topic_id AND

View File

@ -186,7 +186,7 @@ class TopicTrackingState
if opts && opts[:skip_unread]
"1=0"
else
TopicQuery.unread_filter(Topic, staff: opts && opts[:staff]).where_values.join(" AND ")
TopicQuery.unread_filter(Topic, -999, staff: opts && opts[:staff]).where_values.join(" AND ").sub("-999", ":user_id")
end
new =

View File

@ -46,11 +46,6 @@ class TopicUser < ActiveRecord::Base
notification_level: notification_level,
notifications_reason_id: reason
)
MessageBus.publish("/topic/#{topic_id}", {
notification_level_change: notification_level,
notifications_reason_id: reason
}, user_ids: [user_id])
end
end
@ -139,23 +134,39 @@ SQL
end
if attrs[:notification_level]
MessageBus.publish(
"/topic/#{topic_id}",
{ notification_level_change: attrs[:notification_level] },
user_ids: [user_id]
)
DiscourseEvent.trigger(:topic_notification_level_changed,
attrs[:notification_level],
user_id,
topic_id
)
notification_level_change(user_id, topic_id, attrs[:notification_level], attrs[:notifications_reason_id])
end
rescue ActiveRecord::RecordNotUnique
# In case of a race condition to insert, do nothing
end
def notification_level_change(user_id, topic_id, notification_level, reason_id)
message = { notification_level_change: notification_level }
message[:notifications_reason_id] = reason_id if reason_id
MessageBus.publish("/topic/#{topic_id}", message, user_ids: [user_id])
if notification_level && notification_level >= notification_levels[:tracking]
sql = <<SQL
UPDATE user_stats us
SET first_topic_unread_at = t.last_unread_at
FROM topics t
WHERE t.id = :topic_id AND
t.last_unread_at < us.first_topic_unread_at AND
us.user_id = :user_id
SQL
exec_sql(sql, topic_id: topic_id, user_id: user_id)
end
DiscourseEvent.trigger(:topic_notification_level_changed,
notification_level,
user_id,
topic_id
)
end
def create_missing_record(user_id, topic_id, attrs)
now = DateTime.now
@ -297,7 +308,7 @@ SQL
end
if before != after
MessageBus.publish("/topic/#{topic_id}", { notification_level_change: after }, user_ids: [user.id])
notification_level_change(user.id, topic_id, after, nil)
end
end
@ -328,7 +339,7 @@ SQL
end
end
MessageBus.publish("/topic/#{topic_id}", { notification_level_change: args[:new_status] }, user_ids: [user.id])
notification_level_change(user.id, topic_id, args[:new_status], nil)
end
end

View File

@ -81,6 +81,30 @@ class UserStat < ActiveRecord::Base
update_columns(reset_bounce_score_after: nil, bounce_score: 0)
end
def self.update_first_topic_unread_at!
exec_sql <<SQL
UPDATE user_stats us
SET first_topic_unread_at = COALESCE(X.first_unread_at, 'epoch')
FROM
(
SELECT u.id user_id, MIN(last_unread_at) first_unread_at
FROM users u
JOIN topic_users tu ON tu.user_id = u.id
JOIN topics t ON t.id = tu.topic_id
WHERE notification_level > 1 AND last_read_post_number < CASE WHEN moderator OR admin
THEN t.highest_staff_post_number
ELSE t.highest_post_number
END
AND t.deleted_at IS NULL AND t.archetype <> 'private_message'
GROUP BY u.id
) X
WHERE X.user_id = us.user_id AND X.first_unread_at <> first_topic_unread_at
SQL
nil
end
protected
def trigger_badges

View File

@ -0,0 +1,48 @@
class AddUnreadTrackingColumns < ActiveRecord::Migration
def up
add_column :user_stats, :first_topic_unread_at, :datetime, null: false, default: "epoch"
add_column :topics, :last_unread_at, :datetime, null: false, default: "epoch"
execute <<SQL
UPDATE topics SET last_unread_at = (
SELECT MAX(created_at)
FROM posts
WHERE topics.id = posts.topic_id
)
SQL
execute <<SQL
UPDATE user_stats SET first_topic_unread_at = COALESCE((
SELECT MIN(last_unread_at) FROM topics
JOIN users u ON u.id = user_stats.user_id
JOIN topic_users tu ON tu.user_id = user_stats.user_id AND topics.id = tu.topic_id
WHERE notification_level > 1 AND last_read_post_number < CASE WHEN moderator OR admin
THEN topics.highest_staff_post_number
ELSE topics.highest_post_number
END
AND topics.deleted_at IS NULL AND topics.archetype <> 'private_message'
), 'epoch')
SQL
add_index :topics, [:last_unread_at]
# we need this function for performance reasons
execute <<SQL
CREATE FUNCTION first_unread_topic_for(user_id int)
RETURNS timestamp AS
$$
SELECT COALESCE(first_topic_unread_at, 'epoch'::timestamp)
FROM users u
JOIN user_stats ON user_id = u.id
WHERE u.id = $1
$$
LANGUAGE SQL STABLE
SQL
end
def down
execute "DROP FUNCTION first_unread_topic_for(int)"
remove_column :user_stats, :first_topic_unread_at
remove_column :topics, :last_unread_at
end
end

View File

@ -377,15 +377,16 @@ class PostCreator
end
def update_topic_stats
return if @post.post_type == Post.types[:whisper]
attrs = {last_unread_at: @post.created_at}
if @post.post_type != Post.types[:whisper]
attrs[:last_posted_at] = @post.created_at
attrs[:last_post_user_id] = @post.user_id
attrs[:word_count] = (@topic.word_count || 0) + @post.word_count
attrs[:excerpt] = @post.excerpt(220, strip_links: true) if new_topic?
attrs[:bumped_at] = @post.created_at unless @post.no_bump
end
attrs = {
last_posted_at: @post.created_at,
last_post_user_id: @post.user_id,
word_count: (@topic.word_count || 0) + @post.word_count,
}
attrs[:excerpt] = @post.excerpt(220, strip_links: true) if new_topic?
attrs[:bumped_at] = @post.created_at unless @post.no_bump
@topic.update_attributes(attrs)
end

View File

@ -280,10 +280,21 @@ class TopicQuery
.where("COALESCE(tu.notification_level, :tracking) >= :tracking", tracking: TopicUser.notification_levels[:tracking])
end
def self.unread_filter(list, opts)
def self.unread_filter(list, user_id, opts)
# PERF note
# We use the function first_unread_topic_for here instead of joining
# the table to assist the PostgreSQL query planner
#
# We want the query planner to have the actual value of the first_unread_topic so
# it can pick an appropriate plan. If it does not have this upfront it will just assume
# that the value will be 1/3 of the way through the topic table which makes it use terrible
# indexes for the plan.
#
col_name = opts[:staff] ? "highest_staff_post_number" : "highest_post_number"
list.where("tu.last_read_post_number < topics.#{col_name}")
list
.where("tu.last_read_post_number < topics.#{col_name}")
.where("topics.last_unread_at >= first_unread_topic_for(?)", user_id)
.where("COALESCE(tu.notification_level, :regular) >= :tracking",
regular: TopicUser.notification_levels[:regular], tracking: TopicUser.notification_levels[:tracking])
end
@ -361,7 +372,10 @@ class TopicQuery
end
def unread_results(options={})
result = TopicQuery.unread_filter(default_results(options.reverse_merge(:unordered => true)), staff: @user.try(:staff?))
result = TopicQuery.unread_filter(
default_results(options.reverse_merge(:unordered => true)),
@user&.id,
staff: @user&.staff?)
.order('CASE WHEN topics.user_id = tu.user_id THEN 1 ELSE 2 END')
self.class.results_filter_callbacks.each do |filter_callback|
@ -724,7 +738,10 @@ class TopicQuery
end
def unread_messages(params)
TopicQuery.unread_filter(messages_for_groups_or_user(params[:my_group_ids]), staff: @user.try(:staff?))
TopicQuery.unread_filter(
messages_for_groups_or_user(params[:my_group_ids]),
@user&.id,
staff: @user&.staff?)
.limit(params[:count])
end

View File

@ -395,6 +395,9 @@ describe PostCreator do
expect(whisper_reply).to be_present
expect(whisper_reply.post_type).to eq(Post.types[:whisper])
# date is not precise enough in db
whisper_reply.reload
first.reload
# does not leak into the OP
@ -408,7 +411,13 @@ describe PostCreator do
expect(topic.posts_count).to eq(1)
expect(topic.highest_staff_post_number).to eq(3)
topic.update_columns(highest_staff_post_number:0, highest_post_number:0, posts_count: 0, last_posted_at: 1.year.ago)
expect(topic.last_unread_at).to eq(whisper_reply.created_at)
topic.update_columns(highest_staff_post_number:0,
highest_post_number:0,
posts_count: 0,
last_unread_at: 1.year.ago,
last_posted_at: 1.year.ago)
Topic.reset_highest(topic.id)
@ -417,6 +426,7 @@ describe PostCreator do
expect(topic.posts_count).to eq(1)
expect(topic.last_posted_at).to eq(first.created_at)
expect(topic.highest_staff_post_number).to eq(3)
expect(topic.last_unread_at).to eq(whisper_reply.created_at)
end
end

View File

@ -1,6 +1,17 @@
require 'rails_helper'
describe TopicUser do
let :watching do
TopicUser.notification_levels[:watching]
end
let :regular do
TopicUser.notification_levels[:regular]
end
let :tracking do
TopicUser.notification_levels[:tracking]
end
describe "#unwatch_categories!" do
it "correctly unwatches categories" do
@ -10,9 +21,6 @@ describe TopicUser do
tracked_topic = Fabricate(:topic)
user = op_topic.user
watching = TopicUser.notification_levels[:watching]
regular = TopicUser.notification_levels[:regular]
tracking = TopicUser.notification_levels[:tracking]
TopicUser.change(user.id, op_topic, notification_level: watching)
TopicUser.change(user.id, another_topic, notification_level: watching)
@ -30,6 +38,36 @@ describe TopicUser do
end
describe "first unread" do
it "correctly update first unread as needed" do
time1 = 3.days.ago
time2 = 2.days.ago
time3 = 1.days.ago
topic1 = Fabricate(:topic, last_unread_at: time1)
topic2 = Fabricate(:topic, last_unread_at: time2)
topic3 = Fabricate(:topic, last_unread_at: time3)
user = Fabricate(:user)
user.user_stat.update_columns(first_topic_unread_at: Time.zone.now)
TopicUser.change(user.id, topic3, notification_level: tracking)
user.user_stat.reload
expect(user.user_stat.first_topic_unread_at).to be_within(1.second).of(time3)
TopicUser.change(user.id, topic2, notification_level: watching)
user.user_stat.reload
expect(user.user_stat.first_topic_unread_at).to be_within(1.second).of(time2)
TopicUser.change(user.id, topic1, notification_level: regular)
user.user_stat.reload
expect(user.user_stat.first_topic_unread_at).to be_within(1.second).of(time2)
end
end
describe '#notification_levels' do
context "verify enum sequence" do
before do

View File

@ -2,8 +2,6 @@ require 'rails_helper'
describe UserStat do
it { is_expected.to belong_to :user }
it "is created automatically when a user is created" do
user = Fabricate(:evil_trout)
expect(user.user_stat).to be_present
@ -12,6 +10,34 @@ describe UserStat do
expect(user.user_stat.new_since).to be_present
end
context "#update_first_topic_unread_at" do
it "updates date correctly for staff" do
now = Time.zone.now
admin = Fabricate(:admin)
topic = Fabricate(:topic,
highest_staff_post_number: 7,
highest_post_number: 1,
last_unread_at: now
)
UserStat.update_first_topic_unread_at!
admin.reload
expect(admin.user_stat.first_topic_unread_at).to_not be_within(5.years).of(now)
TopicUser.change(admin.id, topic.id, last_read_post_number: 1,
notification_level: NotificationLevels.all[:tracking])
UserStat.update_first_topic_unread_at!
admin.reload
expect(admin.user_stat.first_topic_unread_at).to be_within(1.second).of(now)
end
end
context '#update_view_counts' do
let(:user) { Fabricate(:user) }