From ebd3971533793f83db0efab27c65a7e60aa5e72b Mon Sep 17 00:00:00 2001 From: Sam Date: Wed, 17 Jan 2024 13:01:04 +1100 Subject: [PATCH] FEATURE: experiment with hot sort order (#25274) This introduces a new experimental hot sort ordering. It attempts to float top conversations by first prioritizing a topics with lots of recent activity (likes and users responding) The schedule that updates hot topics is disabled unless the hidden site setting: `experimental_hot_topics` is enabled. You can control "decay" with `hot_topic_gravity` and `recency` with `hot_topics_recent_days` Data is stored in the new `topic_hot_scores` table and you can check it out on the `/hot` route once enabled. --------- Co-authored-by: Penar Musaraj --- app/controllers/list_controller.rb | 8 + app/jobs/scheduled/update_topic_hot_scores.rb | 11 ++ app/models/topic_hot_score.rb | 140 ++++++++++++++++++ config/locales/client.en.yml | 5 + config/routes.rb | 1 + config/site_settings.yml | 9 ++ .../20240116043702_create_topic_hot_scores.rb | 17 +++ lib/discourse.rb | 4 +- lib/topic_query.rb | 8 + spec/models/topic_hot_scores_spec.rb | 58 ++++++++ 10 files changed, 259 insertions(+), 2 deletions(-) create mode 100644 app/jobs/scheduled/update_topic_hot_scores.rb create mode 100644 app/models/topic_hot_score.rb create mode 100644 db/migrate/20240116043702_create_topic_hot_scores.rb create mode 100644 spec/models/topic_hot_scores_spec.rb diff --git a/app/controllers/list_controller.rb b/app/controllers/list_controller.rb index 3f1f94225fe..d650d18519e 100644 --- a/app/controllers/list_controller.rb +++ b/app/controllers/list_controller.rb @@ -261,6 +261,14 @@ class ListController < ApplicationController render "list", formats: [:rss] end + def hot_feed + discourse_expires_in 1.minute + + @topic_list = TopicQuery.new(nil).list_hot + + render "list", formats: [:rss] + end + def category_feed guardian.ensure_can_see!(@category) discourse_expires_in 1.minute diff --git a/app/jobs/scheduled/update_topic_hot_scores.rb b/app/jobs/scheduled/update_topic_hot_scores.rb new file mode 100644 index 00000000000..59df23f31f7 --- /dev/null +++ b/app/jobs/scheduled/update_topic_hot_scores.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +module Jobs + class UpdateTopicHotScores < ::Jobs::Scheduled + every 10.minutes + + def execute(args) + TopicHotScore.update_scores if SiteSetting.experimental_hot_topics + end + end +end diff --git a/app/models/topic_hot_score.rb b/app/models/topic_hot_score.rb new file mode 100644 index 00000000000..a7e0d22cdb8 --- /dev/null +++ b/app/models/topic_hot_score.rb @@ -0,0 +1,140 @@ +# frozen_string_literal: true + +class TopicHotScore < ActiveRecord::Base + belongs_to :topic + + DEFAULT_BATCH_SIZE = 1000 + + def self.update_scores(max = DEFAULT_BATCH_SIZE) + # score is + # (total likes - 1) / (age in hours + 2) ^ gravity + + # 1. insert a new record if one does not exist (up to batch size) + # 2. update recently created (up to batch size) + # 3. update all top scoring topics (up to batch size) + + now = Time.zone.now + + args = { + now: now, + gravity: SiteSetting.hot_topics_gravity, + max: max, + private_message: Archetype.private_message, + recent_cutoff: now - SiteSetting.hot_topics_recent_days.days, + } + + # insert up to BATCH_SIZE records that are missing from table + DB.exec(<<~SQL, args) + INSERT INTO topic_hot_scores ( + topic_id, + score, + recent_likes, + recent_posters, + created_at, + updated_at + ) + SELECT + topics.id, + 0.0, + 0, + 0, + :now, + :now + + FROM topics + LEFT OUTER JOIN topic_hot_scores ON topic_hot_scores.topic_id = topics.id + WHERE topic_hot_scores.topic_id IS NULL + AND topics.deleted_at IS NULL + AND topics.archetype <> :private_message + AND topics.created_at <= :now + ORDER BY topics.bumped_at desc + LIMIT :max + SQL + + # update recent counts for batch + DB.exec(<<~SQL, args) + UPDATE topic_hot_scores thsOrig + SET + recent_likes = COALESCE(new_values.likes_count, 0), + recent_posters = COALESCE(new_values.unique_participants, 0), + recent_first_bumped_at = COALESCE(new_values.first_bumped_at, ths.recent_first_bumped_at) + FROM + topic_hot_scores ths + LEFT OUTER JOIN + ( + SELECT + t.id AS topic_id, + COUNT(DISTINCT p.user_id) AS unique_participants, + ( + SELECT COUNT(*) + FROM post_actions pa + JOIN posts p2 ON p2.id = pa.post_id + WHERE p2.topic_id = t.id + AND pa.post_action_type_id = 2 -- action_type for 'like' + AND pa.created_at >= :recent_cutoff + AND pa.deleted_at IS NULL + ) AS likes_count, + MIN(p.created_at) AS first_bumped_at + FROM + topics t + JOIN + posts p ON t.id = p.topic_id + WHERE + p.created_at >= :recent_cutoff + AND t.archetype <> 'private_message' + AND t.deleted_at IS NULL + AND p.deleted_at IS NULL + AND t.created_at <= :now + AND t.bumped_at >= :recent_cutoff + AND p.created_at < :now + AND p.created_at >= :recent_cutoff + GROUP BY + t.id + ) AS new_values + ON ths.topic_id = new_values.topic_id + + WHERE thsOrig.topic_id = ths.topic_id + SQL + + # update up to BATCH_SIZE records that are out of date based on age + # we need an extra index for this + DB.exec(<<~SQL, args) + UPDATE topic_hot_scores ths + SET score = topics.like_count / + (EXTRACT(EPOCH FROM (:now - topics.created_at)) / 3600 + 2) ^ :gravity + + + CASE WHEN ths.recent_first_bumped_at IS NULL THEN 0 ELSE + (ths.recent_likes + ths.recent_posters) / + (EXTRACT(EPOCH FROM (:now - recent_first_bumped_at)) / 3600 + 2) ^ :gravity + END + , + updated_at = :now + + FROM topics + WHERE topics.id IN ( + SELECT topic_id FROM topic_hot_scores + ORDER BY score DESC, recent_first_bumped_at DESC NULLS LAST + LIMIT :max + ) AND ths.topic_id = topics.id + SQL + end +end + +# == Schema Information +# +# Table name: topic_hot_scores +# +# id :bigint not null, primary key +# topic_id :integer not null +# score :float default(0.0), not null +# recent_likes :integer default(0), not null +# recent_posters :integer default(0), not null +# recent_first_bumped_at :datetime +# created_at :datetime not null +# updated_at :datetime not null +# +# Indexes +# +# index_topic_hot_scores_on_score_and_topic_id (score,topic_id) UNIQUE +# index_topic_hot_scores_on_topic_id (topic_id) UNIQUE +# diff --git a/config/locales/client.en.yml b/config/locales/client.en.yml index 648eeb0ce3f..dbdae947324 100644 --- a/config/locales/client.en.yml +++ b/config/locales/client.en.yml @@ -2971,6 +2971,7 @@ en: category: "There are no more %{category} topics." tag: "There are no more %{tag} topics." top: "There are no more top topics." + hot: "There are no more hot topics." bookmarks: "There are no more bookmarked topics." filter: "There are no more topics." @@ -4059,6 +4060,10 @@ en: title: "Unseen" lower_title: "unseen" help: "new topics and topics you are currently watching or tracking with unread posts" + hot: + title: "Hot" + lower_title: "hot" + help: "top recent topics" new: lower_title_with_count: one: "%{count} new" diff --git a/config/routes.rb b/config/routes.rb index 6d3e09a50e0..a9fa7597d8b 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -1217,6 +1217,7 @@ Discourse::Application.routes.draw do get "latest.rss" => "list#latest_feed", :format => :rss get "top.rss" => "list#top_feed", :format => :rss + get "hot.rss" => "list#hot_feed", :format => :rss Discourse.filters.each { |filter| get "#{filter}" => "list##{filter}" } diff --git a/config/site_settings.yml b/config/site_settings.yml index 2cc30791f03..d5071a7ca8e 100644 --- a/config/site_settings.yml +++ b/config/site_settings.yml @@ -3111,3 +3111,12 @@ dashboard: verbose_user_stat_count_logging: hidden: true default: false + experimental_hot_topics: + hidden: true + default: false + hot_topics_gravity: + hidden: true + default: 1.8 + hot_topics_recent_days: + hidden: true + default: 7 diff --git a/db/migrate/20240116043702_create_topic_hot_scores.rb b/db/migrate/20240116043702_create_topic_hot_scores.rb new file mode 100644 index 00000000000..f9157a7f0c3 --- /dev/null +++ b/db/migrate/20240116043702_create_topic_hot_scores.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +class CreateTopicHotScores < ActiveRecord::Migration[7.0] + def change + create_table :topic_hot_scores do |t| + t.integer :topic_id, null: false + t.float :score, null: false, default: 0.0 + t.integer :recent_likes, null: false, default: 0 + t.integer :recent_posters, null: false, default: 0 + t.datetime :recent_first_bumped_at + t.timestamps + end + + add_index :topic_hot_scores, :topic_id, unique: true + add_index :topic_hot_scores, %i[score topic_id], unique: true + end +end diff --git a/lib/discourse.rb b/lib/discourse.rb index bc7cdb5363b..1af04378c0d 100644 --- a/lib/discourse.rb +++ b/lib/discourse.rb @@ -313,11 +313,11 @@ module Discourse end def self.filters - @filters ||= %i[latest unread new unseen top read posted bookmarks] + @filters ||= %i[latest unread new unseen top read posted bookmarks hot] end def self.anonymous_filters - @anonymous_filters ||= %i[latest top categories] + @anonymous_filters ||= %i[latest top categories hot] end def self.top_menu_items diff --git a/lib/topic_query.rb b/lib/topic_query.rb index 6382124898a..9aa0a83b30b 100644 --- a/lib/topic_query.rb +++ b/lib/topic_query.rb @@ -338,6 +338,14 @@ class TopicQuery create_list(:bookmarks) { |l| l.where("tu.bookmarked") } end + def list_hot + create_list(:hot, unordered: true) do |topics| + topics.joins("JOIN topic_hot_scores on topics.id = topic_hot_scores.topic_id").order( + "topic_hot_scores.score DESC", + ) + end + end + def list_top_for(period) score_column = TopTopic.score_column_for_period(period) create_list(:top, unordered: true) do |topics| diff --git a/spec/models/topic_hot_scores_spec.rb b/spec/models/topic_hot_scores_spec.rb new file mode 100644 index 00000000000..2391276968d --- /dev/null +++ b/spec/models/topic_hot_scores_spec.rb @@ -0,0 +1,58 @@ +# frozen_string_literal: true + +RSpec.describe TopicHotScore do + describe ".update_scores" do + fab!(:user) + fab!(:user2) { Fabricate(:user) } + + it "can correctly update like counts and post counts and account for activity" do + freeze_time + + TopicHotScore.create!(topic_id: -1, score: 0.0, recent_likes: 99, recent_posters: 0) + + old_post = Fabricate(:post, created_at: 10.months.ago) + topic = old_post.topic + + new_reply = Fabricate(:post, user: user, topic: topic, created_at: 4.hours.ago) + newer_reply = Fabricate(:post, user: user2, topic: topic, created_at: 1.hour.ago) + Fabricate(:post, user: user2, topic: topic, created_at: 1.minute.ago) + + freeze_time(1.year.ago) + PostActionCreator.like(user, old_post) + freeze_time(1.year.from_now) + + PostActionCreator.like(user2, new_reply) + PostActionCreator.like(user, newer_reply) + + TopicHotScore.update_scores + + hot_scoring = TopicHotScore.find_by(topic_id: topic.id) + + expect(hot_scoring.recent_likes).to eq(2) + expect(hot_scoring.recent_posters).to eq(2) + expect(hot_scoring.recent_first_bumped_at).to eq_time(new_reply.created_at) + expect(hot_scoring.score).to be_within(0.001).of(1.020) + + expect(TopicHotScore.find_by(topic_id: -1).recent_likes).to eq(0) + end + + it "can correctly set scores for topics" do + freeze_time + + topic1 = Fabricate(:topic, like_count: 3, created_at: 1.hour.ago) + topic2 = Fabricate(:topic, like_count: 10, created_at: 3.hour.ago) + + TopicHotScore.update_scores + + expect(TopicHotScore.find_by(topic_id: topic1.id).score).to be_within(0.001).of(0.415) + expect(TopicHotScore.find_by(topic_id: topic2.id).score).to be_within(0.001).of(0.551) + + freeze_time(2.hours.from_now) + + TopicHotScore.update_scores + + expect(TopicHotScore.find_by(topic_id: topic1.id).score).to be_within(0.001).of(0.165) + expect(TopicHotScore.find_by(topic_id: topic2.id).score).to be_within(0.001).of(0.301) + end + end +end