PERF: remove avg_time calculations and regular jobs from posts and topics

After careful analysis of large data-sets it became apparent that avg_time
had no impact whatsoever on "best of" topic scoring. Calculating avg_time
was a very costly operation especially on large databases.

We have some longer term plans of introducing other weighting that is read
time based into our scoring for "best of" and "top" topics, but in the
interim to stop a large amount of work that is not achieving any value we
are removing the jobs.

Column removal will follow once we decide on a new replacement metric.
This commit is contained in:
Sam Saffron 2019-05-06 15:58:49 +10:00
parent ba3cf7109c
commit f8eddd40ad
12 changed files with 0 additions and 198 deletions

View File

@ -1,14 +0,0 @@
module Jobs
class CalculateAvgTime < Jobs::Scheduled
every 1.day
# PERF: these calculations can become exceedingly expnsive
# they run a huge gemoetric mean and are hard to optimise
# defer to only run once a day
def execute(args)
# Update the average times
Post.calculate_avg_time(2.days.ago)
Topic.calculate_avg_time(2.days.ago)
end
end
end

View File

@ -8,8 +8,6 @@ module Jobs
every 1.week
def execute(args)
Post.calculate_avg_time
Topic.calculate_avg_time
ScoreCalculator.new.calculate
MiniScheduler::Stat.purge_old
Draft.cleanup!

View File

@ -674,37 +674,6 @@ class Post < ActiveRecord::Base
end
# This calculates the geometric mean of the post timings and stores it along with
# each post.
def self.calculate_avg_time(min_topic_age = nil)
retry_lock_error do
builder = DB.build("UPDATE posts
SET avg_time = (x.gmean / 1000)
FROM (SELECT post_timings.topic_id,
post_timings.post_number,
round(exp(avg(CASE WHEN msecs > 0 THEN ln(msecs) ELSE 0 END))) AS gmean
FROM post_timings
INNER JOIN posts AS p2
ON p2.post_number = post_timings.post_number
AND p2.topic_id = post_timings.topic_id
AND p2.user_id <> post_timings.user_id
/*where2*/
GROUP BY post_timings.topic_id, post_timings.post_number) AS x
/*where*/")
builder.where("x.topic_id = posts.topic_id
AND x.post_number = posts.post_number
AND (posts.avg_time <> (x.gmean / 1000)::int OR posts.avg_time IS NULL)")
if min_topic_age
builder.where2("p2.topic_id IN (SELECT id FROM topics where bumped_at > :bumped_at)",
bumped_at: min_topic_age)
end
builder.exec
end
end
before_save do
self.last_editor_id ||= user_id

View File

@ -671,31 +671,6 @@ class Topic < ActiveRecord::Base
SQL
end
# This calculates the geometric mean of the posts and stores it with the topic
def self.calculate_avg_time(min_topic_age = nil)
builder = DB.build <<~SQL
UPDATE topics
SET avg_time = x.gmean
FROM (SELECT topic_id,
round(exp(avg(ln(avg_time)))) AS gmean
FROM posts
WHERE avg_time > 0 AND avg_time IS NOT NULL
GROUP BY topic_id) AS x
/*where*/
SQL
builder.where <<~SQL
x.topic_id = topics.id AND
(topics.avg_time <> x.gmean OR topics.avg_time IS NULL)
SQL
if min_topic_age
builder.where("topics.bumped_at > :bumped_at", bumped_at: min_topic_age)
end
builder.exec
end
def changed_to_category(new_category)
return true if new_category.blank? || Category.exists?(topic_id: id)
return false if new_category.id == SiteSetting.uncategorized_category_id && !SiteSetting.allow_uncategorized_topics

View File

@ -23,7 +23,6 @@ class PostSerializer < BasicPostSerializer
:reply_count,
:reply_to_post_number,
:quote_count,
:avg_time,
:incoming_link_count,
:reads,
:score,

View File

@ -6,7 +6,6 @@ class ScoreCalculator
like_score: 15,
incoming_link_count: 5,
bookmark_count: 2,
avg_time: 0.05,
reads: 0.2
}
end

View File

@ -1023,14 +1023,6 @@ describe Post do
end
end
describe "calculate_avg_time" do
it "should not crash" do
Post.calculate_avg_time
Post.calculate_avg_time(1.day.ago)
end
end
describe "has_host_spam" do
let(:raw) { "hello from my site http://www.example.net http://#{GlobalSetting.hostname} http://#{RailsMultisite::ConnectionManagement.current_hostname}" }

View File

@ -162,69 +162,6 @@ describe PostTiming do
end
describe 'avg times' do
describe 'posts' do
it 'has no avg_time by default' do
expect(@post.avg_time).to be_blank
end
it "doesn't change when we calculate the avg time for the post because there's no timings" do
Post.calculate_avg_time
@post.reload
expect(@post.avg_time).to be_blank
end
end
describe 'topics' do
it 'has no avg_time by default' do
expect(@topic.avg_time).to be_blank
end
it "doesn't change when we calculate the avg time for the post because there's no timings" do
Topic.calculate_avg_time
@topic.reload
expect(@topic.avg_time).to be_blank
end
end
describe "it doesn't create an avg time for the same user" do
it 'something' do
PostTiming.record_timing(@timing_attrs.merge(user_id: @post.user_id))
Post.calculate_avg_time
@post.reload
expect(@post.avg_time).to be_blank
end
end
describe 'with a timing for another user' do
before do
PostTiming.record_timing(@timing_attrs)
Post.calculate_avg_time
@post.reload
end
it 'has a post avg_time from the timing' do
expect(@post.avg_time).to be_present
end
describe 'forum topics' do
before do
Topic.calculate_avg_time
@topic.reload
end
it 'has an avg_time from the timing' do
expect(@topic.avg_time).to be_present
end
end
end
end
end
end

View File

@ -2030,13 +2030,6 @@ describe Topic do
end
describe "calculate_avg_time" do
it "does not explode" do
Topic.calculate_avg_time
Topic.calculate_avg_time(1.day.ago)
end
end
describe "expandable_first_post?" do
let(:topic) { Fabricate.build(:topic) }

View File

@ -15,7 +15,6 @@ export default {
reply_count: 1,
reply_to_post_number: null,
quote_count: 0,
avg_time: 25,
incoming_link_count: 314,
reads: 475,
score: 1702.25,
@ -68,7 +67,6 @@ export default {
reply_count: 0,
reply_to_post_number: null,
quote_count: 0,
avg_time: null,
incoming_link_count: 0,
reads: 1,
score: 0,
@ -118,7 +116,6 @@ export default {
reply_count: 0,
reply_to_post_number: null,
quote_count: 0,
avg_time: null,
incoming_link_count: 0,
reads: 1,
score: 0,

View File

@ -838,7 +838,6 @@ export default {
reply_count: 1,
reply_to_post_number: null,
quote_count: 0,
avg_time: 24,
incoming_link_count: 4422,
reads: 327,
score: 21978.4,
@ -928,7 +927,6 @@ export default {
reply_count: 2,
reply_to_post_number: null,
quote_count: 0,
avg_time: 36,
incoming_link_count: 4680,
reads: 491,
score: 23815.8,
@ -1019,7 +1017,6 @@ export default {
reply_count: 1,
reply_to_post_number: null,
quote_count: 0,
avg_time: 36,
incoming_link_count: 1483,
reads: 274,
score: 7985.4,
@ -1110,7 +1107,6 @@ export default {
reply_count: 0,
reply_to_post_number: null,
quote_count: 0,
avg_time: 39,
incoming_link_count: 1241,
reads: 149,
score: 6161.35,
@ -1200,7 +1196,6 @@ export default {
reply_count: 1,
reply_to_post_number: null,
quote_count: 0,
avg_time: 22,
incoming_link_count: 386,
reads: 163,
score: 1953.7,

View File

@ -20,7 +20,6 @@ export default {
reply_count: 1,
reply_to_post_number: null,
quote_count: 0,
avg_time: 25,
incoming_link_count: 314,
reads: 475,
score: 1702.25,
@ -164,7 +163,6 @@ export default {
reply_count: 0,
reply_to_post_number: null,
quote_count: 0,
avg_time: 27,
incoming_link_count: 16,
reads: 460,
score: 308.35,
@ -268,7 +266,6 @@ export default {
reply_count: 3,
reply_to_post_number: null,
quote_count: 0,
avg_time: 33,
incoming_link_count: 5,
reads: 449,
score: 191.45,
@ -382,7 +379,6 @@ export default {
reply_count: 0,
reply_to_post_number: null,
quote_count: 0,
avg_time: 20,
incoming_link_count: 15,
reads: 401,
score: 291.2,
@ -486,7 +482,6 @@ export default {
reply_count: 2,
reply_to_post_number: 3,
quote_count: 1,
avg_time: 22,
incoming_link_count: 10,
reads: 386,
score: 213.3,
@ -592,7 +587,6 @@ export default {
reply_count: 1,
reply_to_post_number: 5,
quote_count: 0,
avg_time: 17,
incoming_link_count: 4,
reads: 329,
score: 106.65,
@ -708,7 +702,6 @@ export default {
reply_count: 1,
reply_to_post_number: 6,
quote_count: 1,
avg_time: 16,
incoming_link_count: 0,
reads: 326,
score: 86.0,
@ -812,7 +805,6 @@ export default {
reply_count: 1,
reply_to_post_number: 7,
quote_count: 0,
avg_time: 11,
incoming_link_count: 2,
reads: 296,
score: 89.75,
@ -922,7 +914,6 @@ export default {
reply_count: 1,
reply_to_post_number: 8,
quote_count: 1,
avg_time: 10,
incoming_link_count: 0,
reads: 293,
score: 79.1,
@ -1017,7 +1008,6 @@ export default {
reply_count: 1,
reply_to_post_number: 9,
quote_count: 0,
avg_time: 7,
incoming_link_count: 0,
reads: 275,
score: 75.35,
@ -1117,7 +1107,6 @@ export default {
reply_count: 1,
reply_to_post_number: 10,
quote_count: 0,
avg_time: 7,
incoming_link_count: 1,
reads: 273,
score: 79.95,
@ -1217,7 +1206,6 @@ export default {
reply_count: 1,
reply_to_post_number: 11,
quote_count: 1,
avg_time: 7,
incoming_link_count: 2,
reads: 273,
score: 84.95,
@ -1312,7 +1300,6 @@ export default {
reply_count: 0,
reply_to_post_number: null,
quote_count: 0,
avg_time: 7,
incoming_link_count: 11,
reads: 290,
score: 158.35,
@ -1407,7 +1394,6 @@ export default {
reply_count: 1,
reply_to_post_number: 12,
quote_count: 1,
avg_time: 9,
incoming_link_count: 0,
reads: 283,
score: 137.05,
@ -1511,7 +1497,6 @@ export default {
reply_count: 1,
reply_to_post_number: 14,
quote_count: 1,
avg_time: 7,
incoming_link_count: 0,
reads: 260,
score: 72.35,
@ -1606,7 +1591,6 @@ export default {
reply_count: 1,
reply_to_post_number: 15,
quote_count: 1,
avg_time: 7,
incoming_link_count: 0,
reads: 254,
score: 71.15,
@ -1701,7 +1685,6 @@ export default {
reply_count: 2,
reply_to_post_number: 16,
quote_count: 1,
avg_time: 7,
incoming_link_count: 0,
reads: 255,
score: 76.35,
@ -1796,7 +1779,6 @@ export default {
reply_count: 2,
reply_to_post_number: 17,
quote_count: 0,
avg_time: 8,
incoming_link_count: 0,
reads: 264,
score: 168.2,
@ -1896,7 +1878,6 @@ export default {
reply_count: 0,
reply_to_post_number: 18,
quote_count: 0,
avg_time: 8,
incoming_link_count: 1,
reads: 241,
score: 68.6,
@ -1996,7 +1977,6 @@ export default {
reply_count: 0,
reply_to_post_number: null,
quote_count: 0,
avg_time: 9,
incoming_link_count: 2,
reads: 244,
score: 74.25,
@ -3161,7 +3141,6 @@ export default {
reply_count: 0,
reply_to_post_number: null,
quote_count: 0,
avg_time: null,
incoming_link_count: 14,
reads: 24,
score: 224.6,
@ -3224,7 +3203,6 @@ export default {
reply_count: 0,
reply_to_post_number: null,
quote_count: 0,
avg_time: null,
incoming_link_count: 6,
reads: 22,
score: 34.2,
@ -3277,7 +3255,6 @@ export default {
reply_count: 1,
reply_to_post_number: null,
quote_count: 0,
avg_time: null,
incoming_link_count: 0,
reads: 14,
score: 7.2,
@ -3330,7 +3307,6 @@ export default {
reply_count: 0,
reply_to_post_number: null,
quote_count: 0,
avg_time: null,
incoming_link_count: 6,
reads: 12,
score: 31.6,
@ -3383,7 +3359,6 @@ export default {
reply_count: 0,
reply_to_post_number: 3,
quote_count: 1,
avg_time: null,
incoming_link_count: 0,
reads: 11,
score: 16.0,
@ -3735,7 +3710,6 @@ export default {
reply_count: 0,
reply_to_post_number: null,
quote_count: 0,
avg_time: null,
incoming_link_count: 0,
reads: 1,
score: 0,
@ -3784,7 +3758,6 @@ export default {
reply_count: 0,
reply_to_post_number: null,
quote_count: 0,
avg_time: null,
incoming_link_count: 0,
reads: 1,
score: 0,
@ -3833,7 +3806,6 @@ export default {
reply_count: 0,
reply_to_post_number: null,
quote_count: 0,
avg_time: null,
incoming_link_count: 0,
reads: 1,
score: 0,
@ -4028,7 +4000,6 @@ export default {
reply_count: 0,
reply_to_post_number: null,
quote_count: 0,
avg_time: null,
incoming_link_count: 0,
reads: 1,
score: 0,
@ -4081,7 +4052,6 @@ export default {
reply_count: 0,
reply_to_post_number: null,
quote_count: 0,
avg_time: null,
incoming_link_count: 0,
reads: 1,
score: 0,
@ -4299,7 +4269,6 @@ export default {
reply_count: 0,
reply_to_post_number: null,
quote_count: 0,
avg_time: null,
incoming_link_count: 0,
reads: 1,
score: 0,
@ -4348,7 +4317,6 @@ export default {
reply_count: 0,
reply_to_post_number: null,
quote_count: 0,
avg_time: null,
incoming_link_count: 0,
reads: 1,
score: 0,
@ -4397,7 +4365,6 @@ export default {
reply_count: 0,
reply_to_post_number: null,
quote_count: 0,
avg_time: null,
incoming_link_count: 0,
reads: 1,
score: 0,
@ -4594,7 +4561,6 @@ export default {
reply_count: 0,
reply_to_post_number: null,
quote_count: 0,
avg_time: null,
incoming_link_count: 0,
reads: 1,
score: 0,
@ -4643,7 +4609,6 @@ export default {
reply_count: 0,
reply_to_post_number: null,
quote_count: 0,
avg_time: null,
incoming_link_count: 0,
reads: 1,
score: 0,
@ -4692,7 +4657,6 @@ export default {
reply_count: 0,
reply_to_post_number: null,
quote_count: 0,
avg_time: null,
incoming_link_count: 0,
reads: 1,
score: 0,
@ -4888,7 +4852,6 @@ export default {
reply_count: 0,
reply_to_post_number: null,
quote_count: 0,
avg_time: 15,
incoming_link_count: 0,
reads: 2,
score: 1.15,
@ -4951,7 +4914,6 @@ export default {
reply_count: 0,
reply_to_post_number: null,
quote_count: 0,
avg_time: 16,
incoming_link_count: 0,
reads: 2,
score: 1.2,