mirror of
https://github.com/discourse/discourse.git
synced 2025-03-24 04:45:34 +08:00

Meta topic: https://meta.discourse.org/t/prevent-to-linkify-when-there-is-a-redirect/226964/2?u=osama. This commit adds a new site setting `block_onebox_on_redirect` (default off) for blocking oneboxes (full and inline) of URLs that redirect. Note that an initial http → https redirect is still allowed if the redirect location is identical to the source (minus the scheme of course). For example, if a user includes a link to `http://example.com/page` and the link resolves to `https://example.com/page`, then the link will onebox (assuming it can be oneboxed) even if the setting is enabled. The reason for this is a user may type out a URL (i.e. the URL is short and memorizable) with http and since a lot of sites support TLS with http traffic automatically redirected to https, so we should still allow the URL to onebox.
318 lines
12 KiB
Ruby
318 lines
12 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
describe InlineOneboxer do
|
|
|
|
it "should return nothing with empty input" do
|
|
expect(InlineOneboxer.new([]).process).to be_blank
|
|
end
|
|
|
|
it "can onebox a topic" do
|
|
topic = Fabricate(:topic)
|
|
results = InlineOneboxer.new([topic.url], skip_cache: true).process
|
|
expect(results).to be_present
|
|
expect(results[0][:url]).to eq(topic.url)
|
|
expect(results[0][:title]).to eq(topic.title)
|
|
end
|
|
|
|
it "doesn't onebox private messages" do
|
|
topic = Fabricate(:private_message_topic)
|
|
results = InlineOneboxer.new([topic.url], skip_cache: true).process
|
|
expect(results).to be_blank
|
|
end
|
|
|
|
context "caching" do
|
|
fab!(:topic) { Fabricate(:topic) }
|
|
|
|
before do
|
|
InlineOneboxer.invalidate(topic.url)
|
|
end
|
|
|
|
it "puts an entry in the cache" do
|
|
SiteSetting.enable_inline_onebox_on_all_domains = true
|
|
url = "https://example.com/good-url"
|
|
stub_request(:get, url).to_return(status: 200, body: "<html><head><title>a blog</title></head></html>")
|
|
|
|
InlineOneboxer.invalidate(url)
|
|
expect(InlineOneboxer.cache_lookup(url)).to be_blank
|
|
|
|
result = InlineOneboxer.lookup(url)
|
|
expect(result[:title]).to be_present
|
|
|
|
cached = InlineOneboxer.cache_lookup(url)
|
|
expect(cached[:url]).to eq(url)
|
|
expect(cached[:title]).to eq('a blog')
|
|
end
|
|
end
|
|
|
|
context ".lookup" do
|
|
let(:category) { Fabricate(:private_category, group: Group[:staff]) }
|
|
let(:category2) { Fabricate(:private_category, group: Group[:staff]) }
|
|
|
|
let(:admin) { Fabricate(:admin) }
|
|
|
|
it "can lookup private topics if in same category" do
|
|
topic = Fabricate(:topic, category: category)
|
|
topic1 = Fabricate(:topic, category: category)
|
|
topic2 = Fabricate(:topic, category: category2)
|
|
|
|
# Link to `topic` from new topic (same category)
|
|
onebox = InlineOneboxer.lookup(topic.url, user_id: admin.id, category_id: category.id, skip_cache: true)
|
|
expect(onebox).to be_present
|
|
expect(onebox[:url]).to eq(topic.url)
|
|
expect(onebox[:title]).to eq(topic.title)
|
|
|
|
# Link to `topic` from `topic`
|
|
onebox = InlineOneboxer.lookup(topic.url, user_id: admin.id, category_id: topic.category_id, topic_id: topic.id, skip_cache: true)
|
|
expect(onebox).to be_present
|
|
expect(onebox[:url]).to eq(topic.url)
|
|
expect(onebox[:title]).to eq(topic.title)
|
|
|
|
# Link to `topic` from `topic1` (same category)
|
|
onebox = InlineOneboxer.lookup(topic.url, user_id: admin.id, category_id: topic1.category_id, topic_id: topic1.id, skip_cache: true)
|
|
expect(onebox).to be_present
|
|
expect(onebox[:url]).to eq(topic.url)
|
|
expect(onebox[:title]).to eq(topic.title)
|
|
|
|
# Link to `topic` from `topic2` (different category)
|
|
onebox = InlineOneboxer.lookup(topic.url, user_id: admin.id, category_id: topic2.category_id, topic_id: topic2.id, skip_cache: true)
|
|
expect(onebox).to be_blank
|
|
end
|
|
|
|
it "can lookup one link at a time" do
|
|
topic = Fabricate(:topic)
|
|
onebox = InlineOneboxer.lookup(topic.url, skip_cache: true)
|
|
expect(onebox).to be_present
|
|
expect(onebox[:url]).to eq(topic.url)
|
|
expect(onebox[:title]).to eq(topic.title)
|
|
end
|
|
|
|
it "returns nothing for unknown links" do
|
|
expect(InlineOneboxer.lookup(nil)).to be_nil
|
|
expect(InlineOneboxer.lookup("/test")).to be_nil
|
|
end
|
|
|
|
it "will return the fancy title" do
|
|
topic = Fabricate(:topic, title: "Hello :pizza: with an emoji")
|
|
onebox = InlineOneboxer.lookup(topic.url, skip_cache: true)
|
|
expect(onebox).to be_present
|
|
expect(onebox[:url]).to eq(topic.url)
|
|
expect(onebox[:title]).to eq("Hello 🍕 with an emoji")
|
|
end
|
|
|
|
it "will append the post number post author's username to the title" do
|
|
topic = Fabricate(:topic, title: "Inline oneboxer")
|
|
Fabricate(:post, topic: topic) # OP
|
|
Fabricate(:post, topic: topic)
|
|
lookup = -> (number) do
|
|
InlineOneboxer.lookup(
|
|
"#{topic.url}/#{number}",
|
|
skip_cache: true
|
|
)[:title]
|
|
end
|
|
posts = topic.reload.posts.order("post_number ASC")
|
|
|
|
expect(lookup.call(0)).to eq("Inline oneboxer")
|
|
expect(lookup.call(1)).to eq("Inline oneboxer")
|
|
expect(lookup.call(2)).to eq("Inline oneboxer - #2 by #{posts[1].user.username}")
|
|
|
|
Fabricate(:post, topic: topic, post_type: Post.types[:whisper])
|
|
posts = topic.reload.posts.order("post_number ASC")
|
|
# because the last post in the topic is a whisper, the onebox title
|
|
# will be the first regular post directly before our whisper
|
|
expect(lookup.call(3)).to eq("Inline oneboxer - #2 by #{posts[1].user.username}")
|
|
expect(lookup.call(99)).to eq("Inline oneboxer - #2 by #{posts[1].user.username}")
|
|
|
|
Fabricate(:post, topic: topic)
|
|
posts = topic.reload.posts.order("post_number ASC")
|
|
# username not appended to whisper posts
|
|
expect(lookup.call(3)).to eq("Inline oneboxer - #3")
|
|
expect(lookup.call(4)).to eq("Inline oneboxer - #4 by #{posts[3].user.username}")
|
|
expect(lookup.call(99)).to eq("Inline oneboxer - #4 by #{posts[3].user.username}")
|
|
end
|
|
|
|
it "will not crawl domains that aren't allowlisted" do
|
|
SiteSetting.enable_inline_onebox_on_all_domains = false
|
|
onebox = InlineOneboxer.lookup("https://eviltrout.com", skip_cache: true)
|
|
expect(onebox).to be_blank
|
|
end
|
|
|
|
it "will crawl anything if allowed to" do
|
|
SiteSetting.enable_inline_onebox_on_all_domains = true
|
|
|
|
stub_request(:get, "https://eviltrout.com/some-path").
|
|
to_return(status: 200, body: "<html><head><title>a blog</title></head></html>")
|
|
|
|
onebox = InlineOneboxer.lookup(
|
|
"https://eviltrout.com/some-path",
|
|
skip_cache: true
|
|
)
|
|
|
|
expect(onebox).to be_present
|
|
expect(onebox[:url]).to eq("https://eviltrout.com/some-path")
|
|
expect(onebox[:title]).to eq("a blog")
|
|
end
|
|
|
|
it "will not return a onebox if it does not meet minimal length" do
|
|
SiteSetting.enable_inline_onebox_on_all_domains = true
|
|
|
|
stub_request(:get, "https://eviltrout.com/some-path").
|
|
to_return(status: 200, body: "<html><head><title>a</title></head></html>")
|
|
|
|
onebox = InlineOneboxer.lookup(
|
|
"https://eviltrout.com/some-path",
|
|
skip_cache: true
|
|
)
|
|
|
|
expect(onebox).to be_present
|
|
expect(onebox[:url]).to eq("https://eviltrout.com/some-path")
|
|
expect(onebox[:title]).to eq(nil)
|
|
end
|
|
|
|
it "will lookup allowlisted domains" do
|
|
SiteSetting.allowed_inline_onebox_domains = "eviltrout.com"
|
|
RetrieveTitle.stubs(:crawl).returns("Evil Trout's Blog")
|
|
|
|
onebox = InlineOneboxer.lookup(
|
|
"https://eviltrout.com/some-path",
|
|
skip_cache: true
|
|
)
|
|
expect(onebox).to be_present
|
|
expect(onebox[:url]).to eq("https://eviltrout.com/some-path")
|
|
expect(onebox[:title]).to eq("Evil Trout's Blog")
|
|
end
|
|
|
|
describe "lookups for blocked domains in the hostname" do
|
|
shared_examples "blocks the domain" do |setting, domain_to_test|
|
|
it "does not retrieve title" do
|
|
stub_request(:get, domain_to_test)
|
|
.to_return(
|
|
status: 200,
|
|
body: "<html><head><title>hello world</title></head></html>",
|
|
)
|
|
SiteSetting.blocked_onebox_domains = setting
|
|
|
|
onebox = InlineOneboxer.lookup(domain_to_test, skip_cache: true)
|
|
|
|
expect(onebox).to be_blank
|
|
end
|
|
end
|
|
|
|
shared_examples "does not fulfil blocked domain" do |setting, domain_to_test|
|
|
it "retrieves title" do
|
|
stub_request(:get, domain_to_test)
|
|
.to_return(
|
|
status: 200,
|
|
body: "<html><head><title>hello world</title></head></html>",
|
|
)
|
|
SiteSetting.blocked_onebox_domains = setting
|
|
|
|
onebox = InlineOneboxer.lookup(domain_to_test, skip_cache: true)
|
|
|
|
expect(onebox[:title]).to be_present
|
|
end
|
|
end
|
|
|
|
include_examples "blocks the domain", "api.cat.org|kitten.cloud", "https://api.cat.org"
|
|
include_examples "blocks the domain", "api.cat.org|kitten.cloud", "http://kitten.cloud"
|
|
|
|
include_examples "blocks the domain", "kitten.cloud", "http://cat.kitten.cloud"
|
|
|
|
include_examples "blocks the domain", "api.cat.org", "https://api.cat.org/subdirectory/moar"
|
|
include_examples "blocks the domain", "kitten.cloud", "https://cat.kitten.cloud/subd"
|
|
|
|
include_examples "does not fulfil blocked domain", "kitten.cloud", "https://cat.2kitten.cloud"
|
|
include_examples "does not fulfil blocked domain", "kitten.cloud", "https://cat.kitten.cloud9"
|
|
include_examples "does not fulfil blocked domain", "api.cat.org", "https://api-cat.org"
|
|
|
|
it "doesn't retrieve title if a blocked domain is encountered anywhere in the redirect chain" do
|
|
SiteSetting.blocked_onebox_domains = "redirect.com"
|
|
stub_request(:get, "https://mainwebsite.com/blah")
|
|
.to_return(status: 301, body: "", headers: { "location" => "https://redirect.com/blah" })
|
|
stub_request(:get, "https://redirect.com/blah")
|
|
.to_return(status: 301, body: "", headers: { "location" => "https://finalwebsite.com/blah" })
|
|
stub_request(:get, "https://finalwebsite.com/blah")
|
|
.to_return(status: 200, body: "<html><head><title>hello world</title></head></html>")
|
|
onebox = InlineOneboxer.lookup("https://mainwebsite.com/blah", skip_cache: true)
|
|
|
|
expect(onebox[:title]).to be_blank
|
|
end
|
|
|
|
it "doesn't retrieve title if the Discourse-No-Onebox header == 1" do
|
|
stub_request(:get, "https://mainwebsite.com/blah")
|
|
.to_return(
|
|
status: 200,
|
|
body: "<html><head><title>hello world</title></head></html>",
|
|
headers: { "Discourse-No-Onebox" => "1" }
|
|
)
|
|
onebox = InlineOneboxer.lookup("https://mainwebsite.com/blah", skip_cache: true)
|
|
expect(onebox[:title]).to be_blank
|
|
|
|
stub_request(:get, "https://mainwebsite.com/blah/2")
|
|
.to_return(
|
|
status: 301,
|
|
body: "",
|
|
headers: { "location" => "https://mainwebsite.com/blah/2/redirect" }
|
|
)
|
|
stub_request(:get, "https://mainwebsite.com/blah/2/redirect")
|
|
.to_return(
|
|
status: 301,
|
|
body: "",
|
|
headers: { "Discourse-No-Onebox" => "1", "location" => "https://somethingdoesnotmatter.com" }
|
|
)
|
|
onebox = InlineOneboxer.lookup("https://mainwebsite.com/blah/2", skip_cache: true)
|
|
expect(onebox[:title]).to be_blank
|
|
onebox = InlineOneboxer.lookup("https://mainwebsite.com/blah/2/redirect", skip_cache: true)
|
|
expect(onebox[:title]).to be_blank
|
|
end
|
|
end
|
|
|
|
context "when block_onebox_on_redirect is enabled" do
|
|
before do
|
|
SiteSetting.block_onebox_on_redirect = true
|
|
end
|
|
|
|
after do
|
|
FinalDestination.clear_https_cache!("redirects.com")
|
|
end
|
|
|
|
it "doesn't onebox if the URL redirects" do
|
|
stub_request(:get, "https://redirects.com/blah/gg")
|
|
.to_return(
|
|
status: 301,
|
|
body: "",
|
|
headers: { "location" => "https://redirects.com/blah/gg/redirect" }
|
|
)
|
|
onebox = InlineOneboxer.lookup("https://redirects.com/blah/gg", skip_cache: true)
|
|
expect(onebox[:title]).to be_blank
|
|
end
|
|
|
|
it "allows an initial http -> https redirect if the redirect URL is identical to the original" do
|
|
stub_request(:get, "http://redirects.com/blah/gg")
|
|
.to_return(
|
|
status: 301,
|
|
body: "",
|
|
headers: { "location" => "https://redirects.com/blah/gg" }
|
|
)
|
|
stub_request(:get, "https://redirects.com/blah/gg")
|
|
.to_return(
|
|
status: 200,
|
|
body: "<html><head><title>The Redirects Website</title></head></html>"
|
|
)
|
|
onebox = InlineOneboxer.lookup("http://redirects.com/blah/gg", skip_cache: true)
|
|
expect(onebox[:title]).to eq("The Redirects Website")
|
|
end
|
|
|
|
it "doesn't allow an initial http -> https redirect if the redirect URL is different to the original" do
|
|
stub_request(:get, "http://redirects.com/blah/gg")
|
|
.to_return(
|
|
status: 301,
|
|
body: "",
|
|
headers: { "location" => "https://redirects.com/blah/gg/2" }
|
|
)
|
|
onebox = InlineOneboxer.lookup("http://redirects.com/blah/gg", skip_cache: true)
|
|
expect(onebox[:title]).to be_blank
|
|
end
|
|
end
|
|
end
|
|
end
|