PERF: calculate scores for topics/posts faster

Only look at topics that changed in last day for 15 minuted schedule
Do a full recalc weekly, just in case
This commit is contained in:
Sam 2014-02-27 11:45:20 +11:00
parent e4909c017c
commit 9551f4aeea
7 changed files with 139 additions and 36 deletions

View File

@ -9,8 +9,8 @@ module Jobs
def execute(args)
# Update the average times
Post.calculate_avg_time
Topic.calculate_avg_time
Post.calculate_avg_time(1.day.ago)
Topic.calculate_avg_time(1.day.ago)
# Feature topics in categories
CategoryFeaturedTopic.feature_topics
@ -19,8 +19,7 @@ module Jobs
UserStat.update_view_counts
# Update the scores of posts
ScoreCalculator.new.calculate
ScoreCalculator.new.calculate(1.day.ago)
# Automatically close stuff that we missed
Topic.auto_close

View File

@ -0,0 +1,16 @@
require_dependency 'score_calculator'
module Jobs
# This job will run on a regular basis to update statistics and denormalized data.
# If it does not run, the site will not function properly.
class Weekly < Jobs::Scheduled
every 1.week
def execute(args)
Post.calculate_avg_time
Topic.calculate_avg_time
ScoreCalculator.new.calculate
end
end
end

View File

@ -311,9 +311,9 @@ class Post < ActiveRecord::Base
# This calculates the geometric mean of the post timings and stores it along with
# each post.
def self.calculate_avg_time
def self.calculate_avg_time(min_topic_age=nil)
retry_lock_error do
exec_sql("UPDATE posts
builder = SqlBuilder.new("UPDATE posts
SET avg_time = (x.gmean / 1000)
FROM (SELECT post_timings.topic_id,
post_timings.post_number,
@ -324,9 +324,18 @@ class Post < ActiveRecord::Base
AND p2.topic_id = post_timings.topic_id
AND p2.user_id <> post_timings.user_id
GROUP BY post_timings.topic_id, post_timings.post_number) AS x
WHERE x.topic_id = posts.topic_id
/*where*/")
builder.where("x.topic_id = posts.topic_id
AND x.post_number = posts.post_number
AND (posts.avg_time <> (x.gmean / 1000)::int OR posts.avg_time IS NULL)")
if min_topic_age
builder.where("posts.topic_id IN (SELECT id FROM topics where bumped_at > :bumped_at)",
bumped_at: min_topic_age)
end
builder.exec
end
end

View File

@ -362,15 +362,25 @@ class Topic < ActiveRecord::Base
end
# This calculates the geometric mean of the posts and stores it with the topic
def self.calculate_avg_time
exec_sql("UPDATE topics
def self.calculate_avg_time(min_topic_age=nil)
builder = SqlBuilder.new("UPDATE topics
SET avg_time = x.gmean
FROM (SELECT topic_id,
round(exp(avg(ln(avg_time)))) AS gmean
FROM posts
WHERE avg_time > 0 AND avg_time IS NOT NULL
GROUP BY topic_id) AS x
WHERE x.topic_id = topics.id AND (topics.avg_time <> x.gmean OR topics.avg_time IS NULL)")
/*where*/")
builder.where("x.topic_id = topics.id AND
(topics.avg_time <> x.gmean OR topics.avg_time IS NULL)")
if min_topic_age
builder.where("topics.bumped_at > :bumped_at",
bumped_at: min_topic_age)
end
builder.exec
end
def changed_to_category(cat)

View File

@ -16,22 +16,60 @@ class ScoreCalculator
end
# Calculate the score for all posts based on the weightings
def calculate
def calculate(min_topic_age=nil)
# First update the scores of the posts
exec_sql(post_score_sql, @weightings)
update_posts_score(min_topic_age)
# Update the percent rankings of the posts
exec_sql("UPDATE posts SET percent_rank = x.percent_rank
update_posts_rank(min_topic_age)
update_topics_rank(min_topic_age)
update_topics_percent_rank(min_topic_age)
end
private
def update_posts_score(min_topic_age)
components = []
@weightings.keys.each { |k| components << "COALESCE(#{k.to_s}, 0) * :#{k.to_s}" }
components = components.join(" + ")
builder = SqlBuilder.new(
"UPDATE posts SET score = x.score
FROM (SELECT id, #{components} as score FROM posts) AS x
/*where*/"
)
builder.where("x.id = posts.id
AND (posts.score IS NULL OR x.score <> posts.score)", @weightings)
filter_topics(builder, min_topic_age)
builder.exec
end
def update_posts_rank(min_topic_age)
builder = SqlBuilder.new("UPDATE posts SET percent_rank = x.percent_rank
FROM (SELECT id, percent_rank()
OVER (PARTITION BY topic_id ORDER BY SCORE DESC) as percent_rank
FROM posts) AS x
WHERE x.id = posts.id AND
/*where*/")
builder.where("x.id = posts.id AND
(posts.percent_rank IS NULL OR x.percent_rank <> posts.percent_rank)")
# Update the topics
exec_sql "UPDATE topics AS t
filter_topics(builder, min_topic_age)
builder.exec
end
def update_topics_rank(min_topic_age)
builder = SqlBuilder.new("UPDATE topics AS t
SET has_summary = (t.like_count >= :likes_required AND
t.posts_count >= :posts_required AND
x.max_score >= :score_required),
@ -41,7 +79,9 @@ class ScoreCalculator
AVG(p.score) AS avg_score
FROM posts AS p
GROUP BY p.topic_id) AS x
WHERE x.topic_id = t.id AND
/*where*/")
builder.where("x.topic_id = t.id AND
(
(t.score <> x.avg_score OR t.score IS NULL) OR
(t.has_summary IS NULL OR t.has_summary <> (
@ -53,31 +93,45 @@ class ScoreCalculator
",
likes_required: SiteSetting.summary_likes_required,
posts_required: SiteSetting.summary_posts_required,
score_required: SiteSetting.summary_score_threshold
score_required: SiteSetting.summary_score_threshold)
# Update percentage rank of topics
exec_sql("UPDATE topics SET percent_rank = x.percent_rank
if min_topic_age
builder.where("t.bumped_at > :bumped_at ",
bumped_at: min_topic_age)
end
builder.exec
end
def update_topics_percent_rank(min_topic_age)
builder = SqlBuilder.new("UPDATE topics SET percent_rank = x.percent_rank
FROM (SELECT id, percent_rank()
OVER (ORDER BY SCORE DESC) as percent_rank
FROM topics) AS x
WHERE x.id = topics.id AND (topics.percent_rank <> x.percent_rank OR topics.percent_rank IS NULL)")
/*where*/")
builder.where("x.id = topics.id AND (topics.percent_rank <> x.percent_rank OR topics.percent_rank IS NULL)")
if min_topic_age
builder.where("topics.bumped_at > :bumped_at ",
bumped_at: min_topic_age)
end
builder.exec
end
private
def exec_sql(sql, params=nil)
ActiveRecord::Base.exec_sql(sql, params)
def filter_topics(builder, min_topic_age)
if min_topic_age
builder.where('posts.topic_id IN
(SELECT id FROM topics WHERE bumped_at > :bumped_at)',
bumped_at: min_topic_age)
end
# Generate a SQL statement to update the scores of all posts
def post_score_sql
components = []
@weightings.keys.each { |k| components << "COALESCE(#{k.to_s}, 0) * :#{k.to_s}" }
components = components.join(" + ")
builder
end
"UPDATE posts SET score = x.score
FROM (SELECT id, #{components} as score FROM posts) AS x
WHERE x.id = posts.id AND (posts.score IS NULL OR x.score <> posts.score)"
end
end

View File

@ -787,4 +787,12 @@ describe Post do
end
end
describe "calculate_avg_time" do
it "should not crash" do
Post.calculate_avg_time
Post.calculate_avg_time(1.day.ago)
end
end
end

View File

@ -1318,4 +1318,11 @@ describe Topic do
end
end
describe "calculate_avg_time" do
it "does not explode" do
Topic.calculate_avg_time
Topic.calculate_avg_time(1.day.ago)
end
end
end