From 690ff4499ce00fbe939707a0cdfb51a254e461b6 Mon Sep 17 00:00:00 2001 From: Sam Date: Thu, 1 Feb 2024 17:11:40 +1100 Subject: [PATCH] DEV: adjustments to hot algorithm (#25517) 1. Serial likers will just like a bunch of posts on the same topic, this will heavily inflate hot score. To avoid artificial "heat" generated by one user only count the first like on the topic within the recent_cutoff range per topic 2. When looking at recent topics prefer "unique likers", defer to total likes on older topics cause we do not have an easy count for unique likers 3. Stop taking 1 off like_count, it is not needed - platforms like reddit allow you to like own post so they need to remove it. --- app/models/topic_hot_score.rb | 7 +++-- spec/models/topic_hot_scores_spec.rb | 41 ++++++++++++++++++++++------ 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/app/models/topic_hot_score.rb b/app/models/topic_hot_score.rb index 7c026ecd92e..6e874cd050e 100644 --- a/app/models/topic_hot_score.rb +++ b/app/models/topic_hot_score.rb @@ -66,7 +66,7 @@ class TopicHotScore < ActiveRecord::Base t.id AS topic_id, COUNT(DISTINCT p.user_id) AS unique_participants, ( - SELECT COUNT(*) + SELECT COUNT(distinct pa.user_id) FROM post_actions pa JOIN posts p2 ON p2.id = pa.post_id WHERE p2.topic_id = t.id @@ -100,7 +100,10 @@ class TopicHotScore < ActiveRecord::Base # we need an extra index for this DB.exec(<<~SQL, args) UPDATE topic_hot_scores ths - SET score = (topics.like_count - 1) / + SET score = ( + CASE WHEN topics.created_at > :recent_cutoff + THEN ths.recent_likes ELSE topics.like_count END + ) / (EXTRACT(EPOCH FROM (:now - topics.created_at)) / 3600 + 2) ^ :gravity + CASE WHEN ths.recent_first_bumped_at IS NULL THEN 0 ELSE diff --git a/spec/models/topic_hot_scores_spec.rb b/spec/models/topic_hot_scores_spec.rb index 2abb404ed22..8f2a3ddbea4 100644 --- a/spec/models/topic_hot_scores_spec.rb +++ b/spec/models/topic_hot_scores_spec.rb @@ -4,6 +4,7 @@ RSpec.describe TopicHotScore do describe ".update_scores" do fab!(:user) fab!(:user2) { Fabricate(:user) } + fab!(:user3) { Fabricate(:user) } it "can correctly update like counts and post counts and account for activity" do freeze_time @@ -24,35 +25,57 @@ RSpec.describe TopicHotScore do PostActionCreator.like(user2, new_reply) PostActionCreator.like(user, newer_reply) + # user 3 likes two posts, but we should only count 1 + # this avoids a single user from trivially inflating hot scores + PostActionCreator.like(user3, new_reply) + PostActionCreator.like(user3, newer_reply) + TopicHotScore.update_scores hot_scoring = TopicHotScore.find_by(topic_id: topic.id) - expect(hot_scoring.recent_likes).to eq(2) + expect(hot_scoring.recent_likes).to eq(3) expect(hot_scoring.recent_posters).to eq(2) expect(hot_scoring.recent_first_bumped_at).to eq_time(new_reply.created_at) - expect(hot_scoring.score).to be_within(0.001).of(1.219) + expect(hot_scoring.score).to be_within(0.001).of(1.771) expect(TopicHotScore.find_by(topic_id: -1).recent_likes).to eq(0) end + it "prefers recent_likes to topic like count for recent topics" do + freeze_time + + topic = Fabricate(:topic, created_at: 1.hour.ago) + post = Fabricate(:post, topic: topic, created_at: 1.minute.ago) + PostActionCreator.like(user, post) + + TopicHotScore.update_scores + score = TopicHotScore.find_by(topic_id: topic.id).score + + topic.update!(like_count: 100) + + TopicHotScore.update_scores + + expect(TopicHotScore.find_by(topic_id: topic.id).score).to be_within(0.001).of(score) + end + it "can correctly set scores for topics" do freeze_time - topic1 = Fabricate(:topic, like_count: 3, created_at: 1.hour.ago) - topic2 = Fabricate(:topic, like_count: 10, created_at: 3.hour.ago) + topic1 = Fabricate(:topic, like_count: 3, created_at: 2.weeks.ago) + topic2 = Fabricate(:topic, like_count: 10, created_at: 2.weeks.ago) TopicHotScore.update_scores - expect(TopicHotScore.find_by(topic_id: topic1.id).score).to be_within(0.001).of(0.535) - expect(TopicHotScore.find_by(topic_id: topic2.id).score).to be_within(0.001).of(1.304) + expect(TopicHotScore.find_by(topic_id: topic1.id).score).to be_within(0.001).of(0.002) + expect(TopicHotScore.find_by(topic_id: topic2.id).score).to be_within(0.001).of(0.009) - freeze_time(2.hours.from_now) + freeze_time(6.weeks.from_now) TopicHotScore.update_scores - expect(TopicHotScore.find_by(topic_id: topic1.id).score).to be_within(0.001).of(0.289) - expect(TopicHotScore.find_by(topic_id: topic2.id).score).to be_within(0.001).of(0.871) + expect(TopicHotScore.find_by(topic_id: topic1.id).score).to be_within(0.0001).of(0.0005) + expect(TopicHotScore.find_by(topic_id: topic2.id).score).to be_within(0.001).of(0.001) end end end