FEATURE: Show more context in Discourse topic oneboxes

Currently when generating a onebox for Discourse topics, some important
context is missing such as categories and tags.

This patch addresses this issue by introducing a new onebox engine
dedicated to display this information when available. Indeed to get this
new information, categories and tags are exposed in the topic metadata
as opengraph tags.
This commit is contained in:
Loïc Guitaut 2022-11-24 16:28:21 +01:00 committed by Loïc Guitaut
parent d2e9ea6193
commit 14d97f9cf1
16 changed files with 298 additions and 26 deletions

View File

@ -738,7 +738,8 @@ aside.onebox.xkcd .onebox-body img {
// allowlistedgeneric twitter labels
.onebox.allowlistedgeneric,
.onebox.whitelistedgeneric {
.onebox.whitelistedgeneric,
.onebox.discoursetopic {
.label1,
.label2 {
color: var(--primary-med-or-secondary-med);
@ -754,6 +755,7 @@ aside.onebox.xkcd .onebox-body img {
.onebox {
&.allowlistedgeneric,
&.whitelistedgeneric,
&.discoursetopic,
&.gfycat,
&.githubfolder {
.site-icon {
@ -769,6 +771,24 @@ aside.onebox.xkcd .onebox-body img {
}
}
.onebox.discoursetopic {
h3 {
width: 100%;
margin-bottom: 0.2rem !important;
}
.d-icon-tag {
width: 0.75rem;
padding-top: 0.3rem;
position: absolute;
color: var(--primary-medium);
}
.discourse-tags .discourse-tag:first-of-type {
padding-left: 1rem;
}
}
.onebox.gfycat p {
span.label1 a {
white-space: nowrap;

View File

@ -131,7 +131,8 @@
.discourse-tags {
display: inline-flex;
flex-wrap: wrap;
a {
a,
span {
margin-right: 0.25em;
}
}

View File

@ -311,6 +311,15 @@ module ApplicationHelper
result << tag(:meta, { name: "twitter:#{property}", content: content }, nil, true)
end
end
Array
.wrap(opts[:breadcrumbs])
.each do |breadcrumb|
result << tag(:meta, property: "og:article:section", content: breadcrumb[:name])
result << tag(:meta, property: "og:article:section:color", content: breadcrumb[:color])
end
Array
.wrap(opts[:tags])
.each { |tag_name| result << tag(:meta, property: "og:article:tag", content: tag_name) }
if opts[:read_time] && opts[:read_time] > 0 && opts[:like_count] && opts[:like_count] > 0
result << tag(:meta, name: "twitter:label1", value: I18n.t("reading_time"))

View File

@ -132,7 +132,7 @@
<% content_for :head do %>
<%= auto_discovery_link_tag(@topic_view, {action: :feed, slug: @topic_view.topic.slug, topic_id: @topic_view.topic.id}, rel: 'alternate nofollow', title: t('rss_posts_in_topic', topic: @topic_view.title), type: 'application/rss+xml') %>
<%= raw crawlable_meta_data(title: @topic_view.title, description: @topic_view.summary(strip_images: true), image: @topic_view.image_url, read_time: @topic_view.read_time, like_count: @topic_view.like_count, ignore_canonical: true, published_time: @topic_view.published_time) %>
<%= raw crawlable_meta_data(title: @topic_view.title, description: @topic_view.summary(strip_images: true), image: @topic_view.image_url, read_time: @topic_view.read_time, like_count: @topic_view.like_count, ignore_canonical: true, published_time: @topic_view.published_time, breadcrumbs: @breadcrumbs, tags: @topic_view.tags) %>
<% if @topic_view.prev_page || @topic_view.next_page %>
<% if @topic_view.prev_page %>

View File

@ -70,7 +70,7 @@ module CookedProcessorMixin
found = false
parent = img
while parent = parent.parent
if parent["class"] && parent["class"].include?("allowlistedgeneric")
if parent["class"] && parent["class"].match?(/\b(allowlistedgeneric|discoursetopic)\b/)
found = true
break
end

View File

@ -0,0 +1,59 @@
# frozen_string_literal: true
module Onebox
module Engine
class DiscourseTopicOnebox
include Engine
include StandardEmbed
include LayoutSupport
matches_regexp(%r{/t/.*(/\d+)?})
def data
@data ||= {
categories: categories,
link: link,
article_published_time: published_time.strftime("%-d %b %y"),
article_published_time_title: published_time.strftime("%I:%M%p - %d %B %Y"),
domain: html_entities.decode(raw[:site_name].truncate(80, separator: " ")),
description: html_entities.decode(raw[:description].truncate(250, separator: " ")),
title: html_entities.decode(raw[:title].truncate(80, separator: " ")),
image: image,
render_tags?: render_tags?,
render_category_block?: render_category_block?,
}.reverse_merge(raw)
end
alias verified_data data
private
def categories
Array
.wrap(raw[:article_sections])
.map
.with_index { |name, index| { name: name, color: raw[:article_section_colors][index] } }
end
def published_time
@published_time ||= Time.parse(raw[:published_time])
end
def html_entities
@html_entities ||= HTMLEntities.new
end
def image
image = Onebox::Helpers.get_absolute_image_url(raw[:image], @url)
Onebox::Helpers.normalize_url_for_output(html_entities.decode(image))
end
def render_tags?
raw[:article_tags].present?
end
def render_category_block?
render_tags? || categories.present?
end
end
end
end

View File

@ -4,19 +4,11 @@ module Onebox
class Normalizer
attr_reader :data
def get(attr, length = nil, sanitize = true)
return nil if Onebox::Helpers.blank?(data)
def get(attr, *args)
value = data[attr]
return nil if Onebox::Helpers.blank?(value)
value = html_entities.decode(value)
value = Sanitize.fragment(value) if sanitize
value.strip!
value = Onebox::Helpers.truncate(value, length) unless length.nil?
value
return if value.blank?
return value.map { |v| sanitize_value(v, *args) } if value.is_a?(Array)
sanitize_value(value, *args)
end
def method_missing(attr, *args, &block)
@ -48,5 +40,13 @@ module Onebox
def html_entities
@html_entities ||= HTMLEntities.new
end
def sanitize_value(value, length = nil, sanitize = true)
value = html_entities.decode(value)
value = Sanitize.fragment(value) if sanitize
value.strip!
value = Onebox::Helpers.truncate(value, length) if length
value
end
end
end

View File

@ -22,6 +22,8 @@ module Onebox
private
COLLECTIONS = %i[article_section article_section_color article_tag]
def extract(doc)
return {} if Onebox::Helpers.blank?(doc)
@ -33,7 +35,14 @@ module Onebox
if (m["property"] && m["property"][/^(?:og|article|product):(.+)$/i]) ||
(m["name"] && m["name"][/^(?:og|article|product):(.+)$/i])
value = (m["content"] || m["value"]).to_s
data[$1.tr("-:", "_").to_sym] ||= value unless Onebox::Helpers.blank?(value)
next if Onebox::Helpers.blank?(value)
key = $1.tr("-:", "_").to_sym
data[key] ||= value
if key.in?(COLLECTIONS)
collection_name = "#{key}s".to_sym
data[collection_name] ||= []
data[collection_name] << value
end
end
end

View File

@ -10,7 +10,7 @@ module Onebox
Sanitize::Config::RELAXED,
elements:
Sanitize::Config::RELAXED[:elements] +
%w[audio details embed iframe source video svg path],
%w[audio details embed iframe source video svg path use],
attributes: {
"a" => Sanitize::Config::RELAXED[:attributes]["a"] + %w[target],
"audio" => %w[controls controlslist],
@ -40,7 +40,8 @@ module Onebox
"path" => %w[d fill-rule],
"svg" => %w[aria-hidden width height viewbox],
"div" => [:data], # any data-* attributes,
"span" => [:data], # any data-* attributes
"span" => [:data], # any data-* attributes,
"use" => %w[href],
},
add_attributes: {
"iframe" => {
@ -89,6 +90,9 @@ module Onebox
"source" => {
"src" => HTTP_PROTOCOLS,
},
"use" => {
"href" => [:relative],
},
},
css: {
properties: Sanitize::Config::RELAXED[:css][:properties] + %w[--aspect-ratio],

View File

@ -0,0 +1,42 @@
{{#image}}<img src="{{image}}" class="thumbnail"/>{{/image}}
<div class="title-wrapper">
<h3><a href="{{link}}" target="_blank" rel="noopener">{{title}}</a></h3>
{{#render_category_block?}}
<div class="topic-category">
{{#categories}}
<span class="badge-wrapper bullet">
<span class="badge-category-bg" style="background-color: #{{color}};"></span>
<span class="badge-category clear-badge">
<span class="category-name">{{name}}</span>
</span>
</span>
{{/categories}}
{{#render_tags?}}
<div class="topic-header-extra">
<div class="list-tags">
<div class="discourse-tags">
<svg class="fa d-icon d-icon-tag svg-icon svg-string" xmlns="http://www.w3.org/2000/svg"><use href="#tag" /></svg>
{{#article_tags}}
<span class="discourse-tag simple">{{.}}</span>
{{/article_tags}}
</div>
</div>
</div>
{{/render_tags?}}
</div>
{{/render_category_block?}}
</div>
{{#description}}
<p>{{description}}</p>
{{/description}}
{{#data1}}
<p>
<span class="label1">{{label1}}: {{data1}}</span>
{{#data2}}
<span class="label2">{{label2}}: {{data2}}</span>
{{/data2}}
</p>
{{/data1}}

View File

@ -713,6 +713,10 @@ class TopicView
@mentioned_users = mentioned_users.to_h { |u| [u.username, u] }
end
def tags
@topic.tags.map(&:name)
end
protected
def read_posts_set
@ -814,13 +818,9 @@ class TopicView
end
def find_topic(topic_or_topic_id)
if topic_or_topic_id.is_a?(Topic)
topic_or_topic_id
else
# with_deleted covered in #check_and_raise_exceptions
finder = Topic.with_deleted.where(id: topic_or_topic_id).includes(:category)
finder.first
end
return topic_or_topic_id if topic_or_topic_id.is_a?(Topic)
# with_deleted covered in #check_and_raise_exceptions
Topic.with_deleted.includes(:category, :tags).find_by(id: topic_or_topic_id)
end
def unfiltered_posts

View File

@ -194,6 +194,10 @@ And that too in just over an year, way to go! [boom]">
<meta name="twitter:data2" value="9 ❤" />
<meta property="article:published_time" content="2014-02-06T04:55:19+00:00" />
<meta property="og:ignore_canonical" content="true" />
<meta property="og:article:section" content="praise" />
<meta property="og:article:section:color" content="9EB83B" />
<meta property="og:article:tag" content="how-to" />
<meta property="og:article:tag" content="sso" />

View File

@ -640,6 +640,38 @@ RSpec.describe ApplicationHelper do
expect(helper.crawlable_meta_data).not_to include("twitter:image")
end
end
context "with breadcrumbs" do
subject(:metadata) { helper.crawlable_meta_data(breadcrumbs: breadcrumbs) }
let(:breadcrumbs) do
[{ name: "section1", color: "ff0000" }, { name: "section2", color: "0000ff" }]
end
let(:tags) { <<~HTML.strip }
<meta property="og:article:section" content="section1" />
<meta property="og:article:section:color" content="ff0000" />
<meta property="og:article:section" content="section2" />
<meta property="og:article:section:color" content="0000ff" />
HTML
it "generates section and color tags" do
expect(metadata).to include tags
end
end
context "with tags" do
subject(:metadata) { helper.crawlable_meta_data(tags: tags) }
let(:tags) { %w[tag1 tag2] }
let(:output_tags) { <<~HTML.strip }
<meta property="og:article:tag" content="tag1" />
<meta property="og:article:tag" content="tag2" />
HTML
it "generates tag tags" do
expect(metadata).to include output_tags
end
end
end
describe "discourse_color_scheme_stylesheets" do

View File

@ -0,0 +1,51 @@
# frozen_string_literal: true
RSpec.describe Onebox::Engine::DiscourseTopicOnebox do
subject(:onebox) { described_class.new(url) }
describe "#data" do
subject(:data) { onebox.data }
let(:url) do
"https://meta.discourse.org/t/congratulations-most-stars-in-2013-github-octoverse/12483"
end
let(:expected_data) do
{
article_published_time: "6 Feb 14",
article_published_time_title: "04:55AM - 06 February 2014",
article_tags: %w[how-to sso],
card: "summary",
categories: [{ name: "praise", color: "9EB83B" }],
data1: "1 mins 🕑",
data2: "9 ❤",
description:
"Congratulations Discourse for qualifying Repositories with the most stars on GitHub Octoverse. And that too in just over an year, way to go! 💥",
domain: "Discourse Meta",
favicon:
"https://d11a6trkgmumsb.cloudfront.net/optimized/3X/b/3/b33be9538df3547fcf9d1a51a4637d77392ac6f9_2_32x32.png",
ignore_canonical: "true",
image:
"https://d11a6trkgmumsb.cloudfront.net/optimized/2X/d/d063b3b0807377d98695ee08042a9ba0a8c593bd_2_690x362.png",
label1: "Reading time",
label2: "Likes",
link:
"https://meta.discourse.org/t/congratulations-most-stars-in-2013-github-octoverse/12483",
published_time: "2014-02-06T04:55:19+00:00",
render_category_block?: true,
render_tags?: true,
site_name: "Discourse Meta",
title: "Congratulations, most stars in 2013 GitHub Octoverse!",
url:
"https://meta.discourse.org/t/congratulations-most-stars-in-2013-github-octoverse/12483",
}
end
before do
stub_request(:get, url).to_return(status: 200, body: onebox_response("discourse_topic"))
end
it "returns the expected data" do
expect(data).to include expected_data
end
end
end

View File

@ -24,4 +24,32 @@ RSpec.describe Onebox::OpenGraph do
og = described_class.new(doc)
expect(og.image).to eq("http://test.com/test&apos;ing.mp3")
end
describe "Collections" do
subject(:graph) { described_class.new(doc) }
let(:doc) { Nokogiri.HTML(<<-HTML) }
<html>
<title>test</title>
<meta property="og:article:tag" content="&lt;b&gt;tag1&lt;/b&gt;" />
<meta property="og:article:tag" content="tag2" />
<meta property="og:article:section" content="category1" />
<meta property="og:article:section" content="category2" />
<meta property="og:article:section:color" content="ff0000" />
<meta property="og:article:section:color" content="0000ff" />
</html>
HTML
it "handles multiple article:tag tags" do
expect(graph.article_tags).to eq %w[tag1 tag2]
end
it "handles multiple article:section tags" do
expect(graph.article_sections).to eq %w[category1 category2]
end
it "handles multiple article:section:color tags" do
expect(graph.article_section_colors).to eq %w[ff0000 0000ff]
end
end
end

View File

@ -1072,4 +1072,17 @@ RSpec.describe TopicView do
end
end
end
describe "#tags" do
subject(:topic_view_tags) { topic_view.tags }
let(:topic_view) { described_class.new(topic, user) }
let(:topic) { Fabricate.build(:topic, tags: tags) }
let(:tags) { Fabricate.build_times(2, :tag) }
let(:user) { Fabricate(:user) }
it "returns the tags names" do
expect(topic_view_tags).to match tags.map(&:name)
end
end
end