mirror of
https://github.com/discourse/discourse.git
synced 2025-03-21 08:35:47 +08:00
FEATURE: option to enable inline oneboxes for all domains
Also, change to prefer title over open graph which is often way too sparse
This commit is contained in:
parent
582ae9ab8d
commit
f6bc572fb8
@ -986,6 +986,7 @@ en:
|
|||||||
post_onebox_maxlength: "Maximum length of a oneboxed Discourse post in characters."
|
post_onebox_maxlength: "Maximum length of a oneboxed Discourse post in characters."
|
||||||
onebox_domains_blacklist: "A list of domains that will never be oneboxed."
|
onebox_domains_blacklist: "A list of domains that will never be oneboxed."
|
||||||
inline_onebox_domains_whitelist: "A list of domains that will be oneboxed in miniature form if linked without a title"
|
inline_onebox_domains_whitelist: "A list of domains that will be oneboxed in miniature form if linked without a title"
|
||||||
|
enable_inline_onebox_on_all_domains: "Ignore inline_onebox_domain_whitelist site setting and allow inline onebox on all domains."
|
||||||
max_oneboxes_per_post: "Maximum number of oneboxes in a post."
|
max_oneboxes_per_post: "Maximum number of oneboxes in a post."
|
||||||
|
|
||||||
logo_url: "The logo image at the top left of your site, should be a wide rectangle shape. If left blank site title text will be shown."
|
logo_url: "The logo image at the top left of your site, should be a wide rectangle shape. If left blank site title text will be shown."
|
||||||
|
@ -949,6 +949,8 @@ onebox:
|
|||||||
inline_onebox_domains_whitelist:
|
inline_onebox_domains_whitelist:
|
||||||
default: ''
|
default: ''
|
||||||
type: list
|
type: list
|
||||||
|
enable_inline_onebox_on_all_domains:
|
||||||
|
default: false
|
||||||
|
|
||||||
spam:
|
spam:
|
||||||
add_rel_nofollow_to_user_content: true
|
add_rel_nofollow_to_user_content: true
|
||||||
|
@ -36,13 +36,15 @@ class InlineOneboxer
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
if whitelist = SiteSetting.inline_onebox_domains_whitelist
|
always_allow = SiteSetting.enable_inline_onebox_on_all_domains
|
||||||
|
domains = SiteSetting.inline_onebox_domains_whitelist&.split('|') unless always_allow
|
||||||
|
|
||||||
|
if always_allow || domains
|
||||||
uri = URI(url) rescue nil
|
uri = URI(url) rescue nil
|
||||||
|
|
||||||
domains = whitelist.split('|')
|
|
||||||
if uri.present? &&
|
if uri.present? &&
|
||||||
uri.hostname.present? &&
|
uri.hostname.present? &&
|
||||||
domains.include?(uri.hostname) &&
|
(always_allow || domains.include?(uri.hostname)) &&
|
||||||
title = RetrieveTitle.crawl(url)
|
title = RetrieveTitle.crawl(url)
|
||||||
return onebox_for(url, title, opts)
|
return onebox_for(url, title, opts)
|
||||||
end
|
end
|
||||||
|
@ -13,11 +13,11 @@ module RetrieveTitle
|
|||||||
title = nil
|
title = nil
|
||||||
if doc = Nokogiri::HTML(html)
|
if doc = Nokogiri::HTML(html)
|
||||||
|
|
||||||
if node = doc.at('meta[property="og:title"]')
|
title = doc.at('title')&.inner_text
|
||||||
|
|
||||||
|
if !title && node = doc.at('meta[property="og:title"]')
|
||||||
title = node['content']
|
title = node['content']
|
||||||
end
|
end
|
||||||
|
|
||||||
title ||= doc.at('title')&.inner_text
|
|
||||||
end
|
end
|
||||||
|
|
||||||
if title.present?
|
if title.present?
|
||||||
@ -42,9 +42,6 @@ module RetrieveTitle
|
|||||||
|
|
||||||
# Fetch the beginning of a HTML document at a url
|
# Fetch the beginning of a HTML document at a url
|
||||||
def self.fetch_beginning(url)
|
def self.fetch_beginning(url)
|
||||||
# Never crawl in test mode
|
|
||||||
return if Rails.env.test?
|
|
||||||
|
|
||||||
fd = FinalDestination.new(url)
|
fd = FinalDestination.new(url)
|
||||||
uri = fd.resolve
|
uri = fd.resolve
|
||||||
return "" unless uri
|
return "" unless uri
|
||||||
|
@ -68,6 +68,25 @@ describe InlineOneboxer do
|
|||||||
expect(onebox).to be_blank
|
expect(onebox).to be_blank
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it "will crawl anything if allowed to" do
|
||||||
|
SiteSetting.enable_inline_onebox_on_all_domains = true
|
||||||
|
|
||||||
|
# Final destination does a HEAD and a GET
|
||||||
|
stub_request(:head, "https://eviltrout.com/some-path").to_return(status: 200)
|
||||||
|
|
||||||
|
stub_request(:get, "https://eviltrout.com/some-path").
|
||||||
|
to_return(status: 200, body: "<html><head><title>a blog</title></head></html>", headers: {})
|
||||||
|
|
||||||
|
onebox = InlineOneboxer.lookup(
|
||||||
|
"https://eviltrout.com/some-path",
|
||||||
|
skip_cache: true
|
||||||
|
)
|
||||||
|
|
||||||
|
expect(onebox).to be_present
|
||||||
|
expect(onebox[:url]).to eq("https://eviltrout.com/some-path")
|
||||||
|
expect(onebox[:title]).to eq("a blog")
|
||||||
|
end
|
||||||
|
|
||||||
it "will lookup whitelisted domains" do
|
it "will lookup whitelisted domains" do
|
||||||
SiteSetting.inline_onebox_domains_whitelist = "eviltrout.com"
|
SiteSetting.inline_onebox_domains_whitelist = "eviltrout.com"
|
||||||
RetrieveTitle.stubs(:crawl).returns("Evil Trout's Blog")
|
RetrieveTitle.stubs(:crawl).returns("Evil Trout's Blog")
|
||||||
|
@ -21,11 +21,22 @@ describe RetrieveTitle do
|
|||||||
expect(title).to eq("Another Title")
|
expect(title).to eq("Another Title")
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it "will pick og:title if title is missing" do
|
||||||
|
title = RetrieveTitle.extract_title(<<~HTML
|
||||||
|
<html>
|
||||||
|
<meta property="og:title" content="Good Title"
|
||||||
|
</html>
|
||||||
|
HTML
|
||||||
|
)
|
||||||
|
|
||||||
|
expect(title).to eq("Good Title")
|
||||||
|
end
|
||||||
|
|
||||||
it "will prefer the title from an opengraph tag" do
|
it "will prefer the title from an opengraph tag" do
|
||||||
title = RetrieveTitle.extract_title(<<~HTML
|
title = RetrieveTitle.extract_title(<<~HTML
|
||||||
<html>
|
<html>
|
||||||
<title>Bad Title</title>
|
<title>Good Title</title>
|
||||||
<meta property="og:title" content="Good Title" />
|
<meta property="og:title" content="Bad Title"
|
||||||
</html>
|
</html>
|
||||||
HTML
|
HTML
|
||||||
)
|
)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user