FEATURE: Stream topic summaries. (#23065)

When we receive the stream parameter, we'll queue a job that periodically publishes partial updates, and after the summarization finishes, a final one with the completed version, plus metadata.

`summary-box` listens to these updates via MessageBus, and updates state accordingly.
This commit is contained in:
Roman Rizzi 2023-08-11 15:08:49 -03:00 committed by GitHub
parent 840bea3c51
commit 7ca5ee6cd2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 370 additions and 127 deletions

View File

@ -1,3 +1,8 @@
<div
class="summary-box__container"
{{did-insert this.subscribe}}
{{will-destroy this.unsubscribe}}
>
{{#if @postAttrs.hasTopRepliesSummary}} {{#if @postAttrs.hasTopRepliesSummary}}
<p>{{html-safe this.topRepliesSummaryInfo}}</p> <p>{{html-safe this.topRepliesSummaryInfo}}</p>
{{/if}} {{/if}}
@ -40,6 +45,8 @@
<AiSummarySkeleton /> <AiSummarySkeleton />
{{else}} {{else}}
<div class="generated-summary">{{this.summary}}</div> <div class="generated-summary">{{this.summary}}</div>
{{#if this.summarizedOn}}
<div class="summarized-on"> <div class="summarized-on">
<p> <p>
{{i18n "summary.summarized_on" date=this.summarizedOn}} {{i18n "summary.summarized_on" date=this.summarizedOn}}
@ -58,5 +65,7 @@
{{/if}} {{/if}}
</div> </div>
{{/if}} {{/if}}
{{/if}}
</article> </article>
{{/if}} {{/if}}
</div>

View File

@ -7,11 +7,13 @@ import { ajax } from "discourse/lib/ajax";
import { popupAjaxError } from "discourse/lib/ajax-error"; import { popupAjaxError } from "discourse/lib/ajax-error";
import { cookAsync } from "discourse/lib/text"; import { cookAsync } from "discourse/lib/text";
import { shortDateNoYear } from "discourse/lib/formatter"; import { shortDateNoYear } from "discourse/lib/formatter";
import { bind } from "discourse-common/utils/decorators";
const MIN_POST_READ_TIME = 4; const MIN_POST_READ_TIME = 4;
export default class SummaryBox extends Component { export default class SummaryBox extends Component {
@service siteSettings; @service siteSettings;
@service messageBus;
@tracked summary = ""; @tracked summary = "";
@tracked summarizedOn = null; @tracked summarizedOn = null;
@ -25,6 +27,40 @@ export default class SummaryBox extends Component {
@tracked canCollapseSummary = false; @tracked canCollapseSummary = false;
@tracked loadingSummary = false; @tracked loadingSummary = false;
@bind
subscribe() {
const channel = `/summaries/topic/${this.args.postAttrs.topicId}`;
this.messageBus.subscribe(channel, this._updateSummary);
}
@bind
unsubscribe() {
this.messageBus.unsubscribe("/summaries/topic/*", this._updateSummary);
}
@bind
_updateSummary(update) {
const topicSummary = update.topic_summary;
if (topicSummary.summarized_text) {
cookAsync(topicSummary.summarized_text).then((cooked) => {
this.summary = cooked;
this.loadingSummary = false;
});
}
if (update.done) {
this.summarizedOn = shortDateNoYear(topicSummary.summarized_on);
this.summarizedBy = topicSummary.algorithm;
this.newPostsSinceSummary = topicSummary.new_posts_since_summary;
this.outdated = topicSummary.outdated;
this.newPostsSinceSummary = topicSummary.new_posts_since_summary;
this.canRegenerate = topicSummary.outdated && topicSummary.can_regenerate;
this.canCollapseSummary = !this.canRegenerate;
}
}
get generateSummaryTitle() { get generateSummaryTitle() {
const title = this.canRegenerate const title = this.canRegenerate
? "summary.buttons.regenerate" ? "summary.buttons.regenerate"
@ -130,27 +166,12 @@ export default class SummaryBox extends Component {
this.loadingSummary = true; this.loadingSummary = true;
} }
let fetchURL = `/t/${this.args.postAttrs.topicId}/strategy-summary`; let fetchURL = `/t/${this.args.postAttrs.topicId}/strategy-summary?stream=true`;
if (this.canRegenerate) { if (this.canRegenerate) {
fetchURL += "?skip_age_check=true"; fetchURL += "&skip_age_check=true";
} }
ajax(fetchURL) ajax(fetchURL).catch(popupAjaxError);
.then((data) => {
cookAsync(data.summary).then((cooked) => {
this.summary = cooked;
this.summarizedOn = shortDateNoYear(data.summarized_on);
this.summarizedBy = data.summarized_by;
this.newPostsSinceSummary = data.new_posts_since_summary;
this.outdated = data.outdated;
this.newPostsSinceSummary = data.new_posts_since_summary;
this.canRegenerate = data.outdated && data.can_regenerate;
this.canCollapseSummary = !this.canRegenerate;
});
})
.catch(popupAjaxError)
.finally(() => (this.loadingSummary = false));
} }
} }

View File

@ -0,0 +1,73 @@
import {
acceptance,
exists,
publishToMessageBus,
query,
updateCurrentUser,
} from "discourse/tests/helpers/qunit-helpers";
import { test } from "qunit";
import { click, visit } from "@ember/test-helpers";
import { cloneJSON } from "discourse-common/lib/object";
import topicFixtures from "discourse/tests/fixtures/topic";
acceptance("Topic - Summary", function (needs) {
const currentUserId = 5;
needs.user();
needs.pretender((server, helper) => {
server.get("/t/1.json", () => {
const json = cloneJSON(topicFixtures["/t/130.json"]);
json.id = 1;
json.summarizable = true;
return helper.response(json);
});
server.get("/t/1/strategy-summary", () => {
return helper.response({});
});
});
needs.hooks.beforeEach(() => {
updateCurrentUser({ id: currentUserId });
});
test("displays streamed summary", async function (assert) {
await visit("/t/-/1");
const partialSummary = "This a";
await publishToMessageBus("/summaries/topic/1", {
done: false,
topic_summary: { summarized_text: partialSummary },
});
await click(".topic-strategy-summarization");
assert.strictEqual(
query(".summary-box .generated-summary p").innerText,
partialSummary,
"Updates the summary with a partial result"
);
const finalSummary = "This is a completed summary";
await publishToMessageBus("/summaries/topic/1", {
done: true,
topic_summary: {
summarized_text: finalSummary,
summarized_on: "2023-01-01T04:00:00.000Z",
algorithm: "OpenAI GPT-4",
outdated: false,
new_posts_since_summary: false,
can_regenerate: true,
},
});
assert.strictEqual(
query(".summary-box .generated-summary p").innerText,
finalSummary,
"Updates the summary with a partial result"
);
assert.ok(exists(".summary-box .summarized-on"), "summary metadata exists");
});
});

View File

@ -1185,18 +1185,21 @@ class TopicsController < ApplicationController
opts = params.permit(:skip_age_check) opts = params.permit(:skip_age_check)
if params[:stream]
Jobs.enqueue(
:stream_topic_summary,
topic_id: topic.id,
user_id: current_user.id,
opts: opts.as_json,
)
render json: success_json
else
hijack do hijack do
summary = TopicSummarization.new(strategy).summarize(topic, current_user, opts) summary = TopicSummarization.new(strategy).summarize(topic, current_user, opts)
render json: { render_serialized(summary, TopicSummarySerializer)
summary: summary.summarized_text, end
summarized_on: summary.updated_at,
summarized_by: summary.algorithm,
outdated: summary.outdated,
can_regenerate: Summarization::Base.can_request_summary_for?(current_user),
new_posts_since_summary:
topic.highest_post_number.to_i - summary.content_range&.max.to_i,
}
end end
end end

View File

@ -0,0 +1,49 @@
# frozen_string_literal: true
module Jobs
class StreamTopicSummary < ::Jobs::Base
sidekiq_options retry: false
def execute(args)
return unless topic = Topic.find_by(id: args[:topic_id])
return unless user = User.find_by(id: args[:user_id])
strategy = Summarization::Base.selected_strategy
return if strategy.nil? || !Summarization::Base.can_see_summary?(topic, user)
guardian = Guardian.new(user)
return unless guardian.can_see?(topic)
opts = args[:opts] || {}
streamed_summary = +""
start = Time.now
summary =
TopicSummarization
.new(strategy)
.summarize(topic, user, opts) do |partial_summary|
streamed_summary << partial_summary
# Throttle updates.
if (Time.now - start > 0.5) || Rails.env.test?
payload = { done: false, topic_summary: { summarized_text: streamed_summary } }
publish_update(topic, user, payload)
start = Time.now
end
end
publish_update(
topic,
user,
TopicSummarySerializer.new(summary, { scope: guardian }).as_json.merge(done: true),
)
end
private
def publish_update(topic, user, payload)
MessageBus.publish("/summaries/topic/#{topic.id}", payload, user_ids: [user.id])
end
end
end

View File

@ -0,0 +1,13 @@
# frozen_string_literal: true
class TopicSummarySerializer < ApplicationSerializer
attributes :summarized_text, :algorithm, :outdated, :can_regenerate, :new_posts_since_summary
def can_regenerate
Summarization::Base.can_request_summary_for?(scope.current_user)
end
def new_posts_since_summary
object.target.highest_post_number.to_i - object.content_range&.end.to_i
end
end

View File

@ -5,7 +5,7 @@ class TopicSummarization
@strategy = strategy @strategy = strategy
end end
def summarize(topic, user, opts = {}) def summarize(topic, user, opts = {}, &on_partial_blk)
existing_summary = SummarySection.find_by(target: topic, meta_section_id: nil) existing_summary = SummarySection.find_by(target: topic, meta_section_id: nil)
# Existing summary shouldn't be nil in this scenario because the controller checks its existence. # Existing summary shouldn't be nil in this scenario because the controller checks its existence.
@ -37,7 +37,7 @@ class TopicSummarization
content[:contents] << { poster: username, id: pn, text: raw } content[:contents] << { poster: username, id: pn, text: raw }
end end
summarization_result = strategy.summarize(content) summarization_result = strategy.summarize(content, &on_partial_blk)
cache_summary(summarization_result, targets_data.map(&:first), topic) cache_summary(summarization_result, targets_data.map(&:first), topic)
end end

View File

@ -72,6 +72,8 @@ module Summarization
# - content_title (optional): Provides guidance about what the content is about. # - content_title (optional): Provides guidance about what the content is about.
# - contents (required): Array of hashes with content to summarize (e.g. [{ poster: "asd", id: 1, text: "This is a text" }]) # - contents (required): Array of hashes with content to summarize (e.g. [{ poster: "asd", id: 1, text: "This is a text" }])
# All keys are required. # All keys are required.
# @param &on_partial_blk { Block - Optional } - If the strategy supports it, the passed block
# will get called with partial summarized text as its generated.
# #
# @returns { Hash } - The summarized content, plus chunks if the content couldn't be summarized in one pass. Example: # @returns { Hash } - The summarized content, plus chunks if the content couldn't be summarized in one pass. Example:
# { # {

View File

@ -0,0 +1,76 @@
# frozen_string_literal: true
RSpec.describe Jobs::StreamTopicSummary do
subject(:job) { described_class.new }
describe "#execute" do
fab!(:topic) { Fabricate(:topic) }
let(:plugin) { Plugin::Instance.new }
let(:strategy) { DummyCustomSummarization.new({ summary: "dummy", chunks: [] }) }
fab!(:user) { Fabricate(:leader) }
before { Group.find(Group::AUTO_GROUPS[:trust_level_3]).add(user) }
before do
plugin.register_summarization_strategy(strategy)
SiteSetting.summarization_strategy = strategy.model
end
describe "validates params" do
it "does nothing if there is no topic" do
messages =
MessageBus.track_publish("/summaries/topic/#{topic.id}") do
job.execute(topic_id: nil, user_id: user.id)
end
expect(messages).to be_empty
end
it "does nothing if there is no user" do
messages =
MessageBus.track_publish("/summaries/topic/#{topic.id}") do
job.execute(topic_id: topic.id, user_id: nil)
end
expect(messages).to be_empty
end
it "does nothing if the user is not allowed to see the topic" do
private_topic = Fabricate(:private_message_topic)
messages =
MessageBus.track_publish("/summaries/topic/#{private_topic.id}") do
job.execute(topic_id: private_topic.id, user_id: user.id)
end
expect(messages).to be_empty
end
end
it "publishes updates with a partial summary" do
messages =
MessageBus.track_publish("/summaries/topic/#{topic.id}") do
job.execute(topic_id: topic.id, user_id: user.id)
end
partial_summary_update = messages.first.data
expect(partial_summary_update[:done]).to eq(false)
expect(partial_summary_update.dig(:topic_summary, :summarized_text)).to eq("dummy")
end
it "publishes a final update to signal we're done and provide metadata" do
messages =
MessageBus.track_publish("/summaries/topic/#{topic.id}") do
job.execute(topic_id: topic.id, user_id: user.id)
end
final_update = messages.last.data
expect(final_update[:done]).to eq(true)
expect(final_update.dig(:topic_summary, :algorithm)).to eq(strategy.model)
expect(final_update.dig(:topic_summary, :outdated)).to eq(false)
expect(final_update.dig(:topic_summary, :can_regenerate)).to eq(true)
expect(final_update.dig(:topic_summary, :new_posts_since_summary)).to be_zero
end
end
end

View File

@ -5508,9 +5508,9 @@ RSpec.describe TopicsController do
describe "#summary" do describe "#summary" do
fab!(:topic) { Fabricate(:topic) } fab!(:topic) { Fabricate(:topic) }
let(:plugin) { Plugin::Instance.new } let(:plugin) { Plugin::Instance.new }
let(:strategy) { DummyCustomSummarization.new({ summary: "dummy", chunks: [] }) }
before do before do
strategy = DummyCustomSummarization.new("dummy")
plugin.register_summarization_strategy(strategy) plugin.register_summarization_strategy(strategy)
SiteSetting.summarization_strategy = strategy.model SiteSetting.summarization_strategy = strategy.model
end end
@ -5536,14 +5536,17 @@ RSpec.describe TopicsController do
expect(response.status).to eq(200) expect(response.status).to eq(200)
summary = response.parsed_body summary = response.parsed_body
expect(summary["summary"]).to eq(section.summarized_text) expect(summary.dig("topic_summary", "summarized_text")).to eq(section.summarized_text)
end end
end end
context "when the user is a member of an allowlisted group" do context "when the user is a member of an allowlisted group" do
fab!(:user) { Fabricate(:leader) } fab!(:user) { Fabricate(:leader) }
before { sign_in(user) } before do
sign_in(user)
Group.find(Group::AUTO_GROUPS[:trust_level_3]).add(user)
end
it "returns a 404 if there is no topic" do it "returns a 404 if there is no topic" do
invalid_topic_id = 999 invalid_topic_id = 999
@ -5560,6 +5563,20 @@ RSpec.describe TopicsController do
expect(response.status).to eq(403) expect(response.status).to eq(403)
end end
it "returns a summary" do
get "/t/#{topic.id}/strategy-summary.json"
expect(response.status).to eq(200)
summary = response.parsed_body["topic_summary"]
section = SummarySection.last
expect(summary["summarized_text"]).to eq(section.summarized_text)
expect(summary["algorithm"]).to eq(strategy.model)
expect(summary["outdated"]).to eq(false)
expect(summary["can_regenerate"]).to eq(true)
expect(summary["new_posts_since_summary"]).to be_zero
end
end end
context "when the user is not a member of an allowlisted group" do context "when the user is not a member of an allowlisted group" do
@ -5587,7 +5604,7 @@ RSpec.describe TopicsController do
expect(response.status).to eq(200) expect(response.status).to eq(200)
summary = response.parsed_body summary = response.parsed_body
expect(summary["summary"]).to eq(section.summarized_text) expect(summary.dig("topic_summary", "summarized_text")).to eq(section.summarized_text)
end end
end end
end end

View File

@ -186,5 +186,17 @@ describe TopicSummarization do
end end
end end
end end
describe "stream partial updates" do
let(:summary) { { summary: "This is the final summary", chunks: [] } }
it "receives a blk that is passed to the underlying strategy and called with partial summaries" do
partial_result = nil
summarization.summarize(topic, user) { |partial_summary| partial_result = partial_summary }
expect(partial_result).to eq(summary[:summary])
end
end
end end
end end

View File

@ -22,6 +22,6 @@ class DummyCustomSummarization < Summarization::Base
end end
def summarize(_content) def summarize(_content)
@summarization_result @summarization_result.tap { |result| yield(result[:summary]) if block_given? }
end end
end end

View File

@ -1,32 +0,0 @@
# frozen_string_literal: true
RSpec.describe "Topic summarization", type: :system, js: true do
fab!(:user) { Fabricate(:admin) }
# has_summary to force topic map to be present.
fab!(:topic) { Fabricate(:topic, has_summary: true) }
fab!(:post_1) { Fabricate(:post, topic: topic) }
fab!(:post_2) { Fabricate(:post, topic: topic) }
let(:plugin) { Plugin::Instance.new }
let(:expected_summary) { "This is a summary" }
let(:summarization_result) { { summary: expected_summary, chunks: [] } }
before do
sign_in(user)
strategy = DummyCustomSummarization.new(summarization_result)
plugin.register_summarization_strategy(strategy)
SiteSetting.summarization_strategy = strategy.model
end
it "returns a summary using the selected timeframe" do
visit("/t/-/#{topic.id}")
find(".topic-strategy-summarization").click
summary = find(".summary-box .generated-summary p").text
expect(summary).to eq(expected_summary)
end
end