FEATURE: Prefer topic_embed's cached content when summarizing (#25190)

This commit is contained in:
Roman Rizzi 2024-01-09 14:00:01 -03:00 committed by GitHub
parent 2e0ec679c5
commit 47597219b1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 30 additions and 2 deletions

View File

@ -32,7 +32,13 @@ class TopicSummarization
}
targets_data.map do |(pn, raw, username)|
content[:contents] << { poster: username, id: pn, text: raw }
raw_text = raw
if pn == 1 && topic.topic_embed&.embed_content_cache.present?
raw_text = topic.topic_embed&.embed_content_cache
end
content[:contents] << { poster: username, id: pn, text: raw_text }
end
summarization_result = strategy.summarize(content, user, &on_partial_blk)

View File

@ -2,5 +2,6 @@
Fabricator(:topic_embed) do
post
embed_url "http://eviltrout.com/123"
topic { |te| te[:post].topic }
end

View File

@ -100,6 +100,24 @@ describe TopicSummarization do
section = summarization.summarize(topic, user)
expect(section.summarized_text).to eq(cached_summary_text)
end
context "when the topic has embed content cached" do
it "embed content is used instead of the raw text" do
topic_embed =
Fabricate(
:topic_embed,
topic: topic,
embed_content_cache: "<p>hello world new post :D</p>",
)
summarization.summarize(topic, user)
first_post_data =
strategy.content[:contents].detect { |c| c[:id] == topic.first_post.post_number }
expect(first_post_data[:text]).to eq(topic_embed.embed_content_cache)
end
end
end
context "when the content was summarized in multiple chunks" do

View File

@ -21,7 +21,10 @@ class DummyCustomSummarization < Summarization::Base
"dummy"
end
def summarize(_content, _user)
def summarize(content, _user)
@content = content
@summarization_result.tap { |result| yield(result[:summary]) if block_given? }
end
attr_reader :content
end