mirror of
https://github.com/discourse/discourse.git
synced 2024-12-01 07:23:38 +08:00
238d71bcad
Users unable to generate new summaries won't be able to regenerate them. They'll only see the warning saying it's outdated.
126 lines
3.5 KiB
Ruby
126 lines
3.5 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
class TopicSummarization
|
|
def initialize(strategy)
|
|
@strategy = strategy
|
|
end
|
|
|
|
def summarize(topic, user, opts = {})
|
|
existing_summary = SummarySection.find_by(target: topic, meta_section_id: nil)
|
|
|
|
# Existing summary shouldn't be nil in this scenario because the controller checks its existence.
|
|
return if !user && !existing_summary
|
|
|
|
targets_data = summary_targets(topic).pluck(:post_number, :raw, :username)
|
|
|
|
current_topic_sha = build_sha(targets_data.map(&:first))
|
|
can_summarize = Summarization::Base.can_request_summary_for?(user)
|
|
|
|
if use_cached?(existing_summary, can_summarize, current_topic_sha, !!opts[:skip_age_check])
|
|
# It's important that we signal a cached summary is outdated
|
|
if can_summarize && new_targets?(existing_summary, current_topic_sha)
|
|
existing_summary.mark_as_outdated
|
|
end
|
|
|
|
return existing_summary
|
|
end
|
|
|
|
delete_cached_summaries_of(topic) if existing_summary
|
|
|
|
content = {
|
|
resource_path: "#{Discourse.base_path}/t/-/#{topic.id}",
|
|
content_title: topic.title,
|
|
contents: [],
|
|
}
|
|
|
|
targets_data.map do |(pn, raw, username)|
|
|
content[:contents] << { poster: username, id: pn, text: raw }
|
|
end
|
|
|
|
summarization_result = strategy.summarize(content)
|
|
|
|
cache_summary(summarization_result, targets_data.map(&:first), topic)
|
|
end
|
|
|
|
def summary_targets(topic)
|
|
topic.has_summary? ? best_replies(topic) : pick_selection(topic)
|
|
end
|
|
|
|
private
|
|
|
|
attr_reader :strategy
|
|
|
|
def best_replies(topic)
|
|
Post
|
|
.summary(topic.id)
|
|
.where("post_type = ?", Post.types[:regular])
|
|
.where("NOT hidden")
|
|
.joins(:user)
|
|
.order(:post_number)
|
|
end
|
|
|
|
def pick_selection(topic)
|
|
posts =
|
|
Post
|
|
.where(topic_id: topic.id)
|
|
.where("post_type = ?", Post.types[:regular])
|
|
.where("NOT hidden")
|
|
.order(:post_number)
|
|
|
|
post_numbers = posts.limit(5).pluck(:post_number)
|
|
post_numbers += posts.reorder("posts.score desc").limit(50).pluck(:post_number)
|
|
post_numbers += posts.reorder("post_number desc").limit(5).pluck(:post_number)
|
|
|
|
Post
|
|
.where(topic_id: topic.id)
|
|
.joins(:user)
|
|
.where("post_number in (?)", post_numbers)
|
|
.order(:post_number)
|
|
end
|
|
|
|
def delete_cached_summaries_of(topic)
|
|
SummarySection.where(target: topic).destroy_all
|
|
end
|
|
|
|
# For users without permissions to generate a summary or fresh summaries, we return what we have cached.
|
|
def use_cached?(existing_summary, can_summarize, current_sha, skip_age_check)
|
|
existing_summary &&
|
|
!(
|
|
can_summarize && new_targets?(existing_summary, current_sha) &&
|
|
(skip_age_check || existing_summary.created_at < 1.hour.ago)
|
|
)
|
|
end
|
|
|
|
def new_targets?(summary, current_sha)
|
|
summary.original_content_sha != current_sha
|
|
end
|
|
|
|
def cache_summary(result, post_numbers, topic)
|
|
main_summary =
|
|
SummarySection.create!(
|
|
target: topic,
|
|
algorithm: strategy.model,
|
|
content_range: (post_numbers.first..post_numbers.last),
|
|
summarized_text: result[:summary],
|
|
original_content_sha: build_sha(post_numbers),
|
|
)
|
|
|
|
result[:chunks].each do |chunk|
|
|
SummarySection.create!(
|
|
target: topic,
|
|
algorithm: strategy.model,
|
|
content_range: chunk[:ids].min..chunk[:ids].max,
|
|
summarized_text: chunk[:summary],
|
|
original_content_sha: build_sha(chunk[:ids]),
|
|
meta_section_id: main_summary.id,
|
|
)
|
|
end
|
|
|
|
main_summary
|
|
end
|
|
|
|
def build_sha(ids)
|
|
Digest::SHA256.hexdigest(ids.join)
|
|
end
|
|
end
|