2019-04-30 08:27:42 +08:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2022-07-28 10:27:38 +08:00
|
|
|
RSpec.describe InlineOneboxer do
|
2017-07-20 03:08:54 +08:00
|
|
|
it "should return nothing with empty input" do
|
|
|
|
expect(InlineOneboxer.new([]).process).to be_blank
|
|
|
|
end
|
|
|
|
|
|
|
|
it "can onebox a topic" do
|
|
|
|
topic = Fabricate(:topic)
|
2017-07-22 03:29:04 +08:00
|
|
|
results = InlineOneboxer.new([topic.url], skip_cache: true).process
|
2017-07-20 03:08:54 +08:00
|
|
|
expect(results).to be_present
|
|
|
|
expect(results[0][:url]).to eq(topic.url)
|
|
|
|
expect(results[0][:title]).to eq(topic.title)
|
|
|
|
end
|
|
|
|
|
|
|
|
it "doesn't onebox private messages" do
|
|
|
|
topic = Fabricate(:private_message_topic)
|
2017-07-22 03:29:04 +08:00
|
|
|
results = InlineOneboxer.new([topic.url], skip_cache: true).process
|
2017-07-20 03:08:54 +08:00
|
|
|
expect(results).to be_blank
|
|
|
|
end
|
|
|
|
|
2022-07-28 00:14:14 +08:00
|
|
|
describe "caching" do
|
2023-11-10 06:47:59 +08:00
|
|
|
fab!(:topic)
|
2017-07-22 03:29:04 +08:00
|
|
|
|
2020-06-24 17:54:54 +08:00
|
|
|
before { InlineOneboxer.invalidate(topic.url) }
|
2017-07-22 03:29:04 +08:00
|
|
|
|
2017-07-20 03:08:54 +08:00
|
|
|
it "puts an entry in the cache" do
|
2020-02-12 18:11:28 +08:00
|
|
|
SiteSetting.enable_inline_onebox_on_all_domains = true
|
2022-03-11 14:18:12 +08:00
|
|
|
url = "https://example.com/good-url"
|
2020-02-12 18:11:28 +08:00
|
|
|
stub_request(:get, url).to_return(
|
|
|
|
status: 200,
|
|
|
|
body: "<html><head><title>a blog</title></head></html>",
|
|
|
|
)
|
2017-07-20 03:08:54 +08:00
|
|
|
|
2020-06-24 17:54:54 +08:00
|
|
|
InlineOneboxer.invalidate(url)
|
2020-02-12 18:11:28 +08:00
|
|
|
expect(InlineOneboxer.cache_lookup(url)).to be_blank
|
|
|
|
|
|
|
|
result = InlineOneboxer.lookup(url)
|
2022-03-11 14:18:12 +08:00
|
|
|
expect(result[:title]).to be_present
|
2017-07-20 03:08:54 +08:00
|
|
|
|
2020-02-12 18:11:28 +08:00
|
|
|
cached = InlineOneboxer.cache_lookup(url)
|
|
|
|
expect(cached[:url]).to eq(url)
|
|
|
|
expect(cached[:title]).to eq("a blog")
|
2017-07-20 03:08:54 +08:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2022-07-27 18:21:10 +08:00
|
|
|
describe ".lookup" do
|
2020-02-12 18:11:28 +08:00
|
|
|
let(:category) { Fabricate(:private_category, group: Group[:staff]) }
|
|
|
|
let(:category2) { Fabricate(:private_category, group: Group[:staff]) }
|
|
|
|
|
|
|
|
let(:admin) { Fabricate(:admin) }
|
|
|
|
|
|
|
|
it "can lookup private topics if in same category" do
|
|
|
|
topic = Fabricate(:topic, category: category)
|
|
|
|
topic1 = Fabricate(:topic, category: category)
|
|
|
|
topic2 = Fabricate(:topic, category: category2)
|
|
|
|
|
|
|
|
# Link to `topic` from new topic (same category)
|
|
|
|
onebox =
|
|
|
|
InlineOneboxer.lookup(
|
|
|
|
topic.url,
|
|
|
|
user_id: admin.id,
|
|
|
|
category_id: category.id,
|
|
|
|
skip_cache: true,
|
|
|
|
)
|
|
|
|
expect(onebox).to be_present
|
|
|
|
expect(onebox[:url]).to eq(topic.url)
|
|
|
|
expect(onebox[:title]).to eq(topic.title)
|
|
|
|
|
|
|
|
# Link to `topic` from `topic`
|
|
|
|
onebox =
|
|
|
|
InlineOneboxer.lookup(
|
|
|
|
topic.url,
|
|
|
|
user_id: admin.id,
|
|
|
|
category_id: topic.category_id,
|
|
|
|
topic_id: topic.id,
|
|
|
|
skip_cache: true,
|
|
|
|
)
|
|
|
|
expect(onebox).to be_present
|
|
|
|
expect(onebox[:url]).to eq(topic.url)
|
|
|
|
expect(onebox[:title]).to eq(topic.title)
|
|
|
|
|
|
|
|
# Link to `topic` from `topic1` (same category)
|
|
|
|
onebox =
|
|
|
|
InlineOneboxer.lookup(
|
|
|
|
topic.url,
|
|
|
|
user_id: admin.id,
|
|
|
|
category_id: topic1.category_id,
|
|
|
|
topic_id: topic1.id,
|
|
|
|
skip_cache: true,
|
|
|
|
)
|
|
|
|
expect(onebox).to be_present
|
|
|
|
expect(onebox[:url]).to eq(topic.url)
|
|
|
|
expect(onebox[:title]).to eq(topic.title)
|
|
|
|
|
|
|
|
# Link to `topic` from `topic2` (different category)
|
|
|
|
onebox =
|
|
|
|
InlineOneboxer.lookup(
|
|
|
|
topic.url,
|
|
|
|
user_id: admin.id,
|
|
|
|
category_id: topic2.category_id,
|
|
|
|
topic_id: topic2.id,
|
|
|
|
skip_cache: true,
|
|
|
|
)
|
|
|
|
expect(onebox).to be_blank
|
|
|
|
end
|
|
|
|
|
2017-07-20 03:08:54 +08:00
|
|
|
it "can lookup one link at a time" do
|
|
|
|
topic = Fabricate(:topic)
|
2017-07-22 03:29:04 +08:00
|
|
|
onebox = InlineOneboxer.lookup(topic.url, skip_cache: true)
|
2017-07-20 03:08:54 +08:00
|
|
|
expect(onebox).to be_present
|
|
|
|
expect(onebox[:url]).to eq(topic.url)
|
|
|
|
expect(onebox[:title]).to eq(topic.title)
|
|
|
|
end
|
2017-07-21 04:01:16 +08:00
|
|
|
|
|
|
|
it "returns nothing for unknown links" do
|
|
|
|
expect(InlineOneboxer.lookup(nil)).to be_nil
|
|
|
|
expect(InlineOneboxer.lookup("/test")).to be_nil
|
|
|
|
end
|
2017-07-22 02:24:28 +08:00
|
|
|
|
|
|
|
it "will return the fancy title" do
|
|
|
|
topic = Fabricate(:topic, title: "Hello :pizza: with an emoji")
|
2017-07-22 03:29:04 +08:00
|
|
|
onebox = InlineOneboxer.lookup(topic.url, skip_cache: true)
|
2017-07-22 02:24:28 +08:00
|
|
|
expect(onebox).to be_present
|
|
|
|
expect(onebox[:url]).to eq(topic.url)
|
|
|
|
expect(onebox[:title]).to eq("Hello 🍕 with an emoji")
|
|
|
|
end
|
|
|
|
|
2021-05-21 09:43:47 +08:00
|
|
|
it "will append the post number post author's username to the title" do
|
2020-12-17 08:19:13 +08:00
|
|
|
topic = Fabricate(:topic, title: "Inline oneboxer")
|
|
|
|
Fabricate(:post, topic: topic) # OP
|
|
|
|
Fabricate(:post, topic: topic)
|
2023-11-29 13:38:07 +08:00
|
|
|
lookup = ->(number) do
|
2020-12-17 08:19:13 +08:00
|
|
|
InlineOneboxer.lookup("#{topic.url}/#{number}", skip_cache: true)[:title]
|
2023-11-29 13:38:07 +08:00
|
|
|
end
|
2020-12-17 08:19:13 +08:00
|
|
|
posts = topic.reload.posts.order("post_number ASC")
|
|
|
|
|
|
|
|
expect(lookup.call(0)).to eq("Inline oneboxer")
|
|
|
|
expect(lookup.call(1)).to eq("Inline oneboxer")
|
|
|
|
expect(lookup.call(2)).to eq("Inline oneboxer - #2 by #{posts[1].user.username}")
|
|
|
|
|
|
|
|
Fabricate(:post, topic: topic, post_type: Post.types[:whisper])
|
|
|
|
posts = topic.reload.posts.order("post_number ASC")
|
|
|
|
# because the last post in the topic is a whisper, the onebox title
|
|
|
|
# will be the first regular post directly before our whisper
|
|
|
|
expect(lookup.call(3)).to eq("Inline oneboxer - #2 by #{posts[1].user.username}")
|
|
|
|
expect(lookup.call(99)).to eq("Inline oneboxer - #2 by #{posts[1].user.username}")
|
|
|
|
|
|
|
|
Fabricate(:post, topic: topic)
|
|
|
|
posts = topic.reload.posts.order("post_number ASC")
|
|
|
|
# username not appended to whisper posts
|
|
|
|
expect(lookup.call(3)).to eq("Inline oneboxer - #3")
|
|
|
|
expect(lookup.call(4)).to eq("Inline oneboxer - #4 by #{posts[3].user.username}")
|
|
|
|
expect(lookup.call(99)).to eq("Inline oneboxer - #4 by #{posts[3].user.username}")
|
|
|
|
end
|
|
|
|
|
2020-07-27 08:23:54 +08:00
|
|
|
it "will not crawl domains that aren't allowlisted" do
|
2020-12-18 07:27:32 +08:00
|
|
|
SiteSetting.enable_inline_onebox_on_all_domains = false
|
2017-07-22 03:29:04 +08:00
|
|
|
onebox = InlineOneboxer.lookup("https://eviltrout.com", skip_cache: true)
|
|
|
|
expect(onebox).to be_blank
|
|
|
|
end
|
|
|
|
|
2017-08-03 02:27:21 +08:00
|
|
|
it "will crawl anything if allowed to" do
|
|
|
|
SiteSetting.enable_inline_onebox_on_all_domains = true
|
|
|
|
|
|
|
|
stub_request(:get, "https://eviltrout.com/some-path").to_return(
|
2018-02-24 19:35:57 +08:00
|
|
|
status: 200,
|
|
|
|
body: "<html><head><title>a blog</title></head></html>",
|
2017-08-03 02:27:21 +08:00
|
|
|
)
|
|
|
|
|
2017-07-22 03:29:04 +08:00
|
|
|
onebox = InlineOneboxer.lookup("https://eviltrout.com/some-path", skip_cache: true)
|
2023-01-09 19:18:21 +08:00
|
|
|
|
2017-08-03 02:27:21 +08:00
|
|
|
expect(onebox).to be_present
|
|
|
|
expect(onebox[:url]).to eq("https://eviltrout.com/some-path")
|
|
|
|
expect(onebox[:title]).to eq("a blog")
|
|
|
|
end
|
|
|
|
|
2018-01-30 05:39:41 +08:00
|
|
|
it "will not return a onebox if it does not meet minimal length" do
|
|
|
|
SiteSetting.enable_inline_onebox_on_all_domains = true
|
|
|
|
|
|
|
|
stub_request(:get, "https://eviltrout.com/some-path").to_return(
|
2018-02-24 19:35:57 +08:00
|
|
|
status: 200,
|
|
|
|
body: "<html><head><title>a</title></head></html>",
|
2018-01-30 05:39:41 +08:00
|
|
|
)
|
|
|
|
|
2017-07-22 03:29:04 +08:00
|
|
|
onebox = InlineOneboxer.lookup("https://eviltrout.com/some-path", skip_cache: true)
|
2023-01-09 19:18:21 +08:00
|
|
|
|
2018-01-30 05:39:41 +08:00
|
|
|
expect(onebox).to be_present
|
|
|
|
expect(onebox[:url]).to eq("https://eviltrout.com/some-path")
|
|
|
|
expect(onebox[:title]).to eq(nil)
|
|
|
|
end
|
|
|
|
|
2020-07-27 08:23:54 +08:00
|
|
|
it "will lookup allowlisted domains" do
|
|
|
|
SiteSetting.allowed_inline_onebox_domains = "eviltrout.com"
|
2017-07-22 03:29:04 +08:00
|
|
|
RetrieveTitle.stubs(:crawl).returns("Evil Trout's Blog")
|
|
|
|
|
|
|
|
onebox = InlineOneboxer.lookup("https://eviltrout.com/some-path", skip_cache: true)
|
|
|
|
expect(onebox).to be_present
|
|
|
|
expect(onebox[:url]).to eq("https://eviltrout.com/some-path")
|
|
|
|
expect(onebox[:title]).to eq("Evil Trout's Blog")
|
|
|
|
end
|
|
|
|
|
2022-01-20 14:12:34 +08:00
|
|
|
describe "lookups for blocked domains in the hostname" do
|
|
|
|
shared_examples "blocks the domain" do |setting, domain_to_test|
|
|
|
|
it "does not retrieve title" do
|
2022-03-11 14:18:12 +08:00
|
|
|
stub_request(:get, domain_to_test).to_return(
|
|
|
|
status: 200,
|
|
|
|
body: "<html><head><title>hello world</title></head></html>",
|
|
|
|
)
|
2022-01-20 14:12:34 +08:00
|
|
|
SiteSetting.blocked_onebox_domains = setting
|
|
|
|
|
|
|
|
onebox = InlineOneboxer.lookup(domain_to_test, skip_cache: true)
|
|
|
|
|
|
|
|
expect(onebox).to be_blank
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
shared_examples "does not fulfil blocked domain" do |setting, domain_to_test|
|
|
|
|
it "retrieves title" do
|
2022-03-11 14:18:12 +08:00
|
|
|
stub_request(:get, domain_to_test).to_return(
|
|
|
|
status: 200,
|
|
|
|
body: "<html><head><title>hello world</title></head></html>",
|
|
|
|
)
|
2022-01-20 14:12:34 +08:00
|
|
|
SiteSetting.blocked_onebox_domains = setting
|
2017-07-20 03:08:54 +08:00
|
|
|
|
2022-01-20 14:12:34 +08:00
|
|
|
onebox = InlineOneboxer.lookup(domain_to_test, skip_cache: true)
|
|
|
|
|
2022-03-11 14:18:12 +08:00
|
|
|
expect(onebox[:title]).to be_present
|
2022-01-20 14:12:34 +08:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
include_examples "blocks the domain", "api.cat.org|kitten.cloud", "https://api.cat.org"
|
|
|
|
include_examples "blocks the domain", "api.cat.org|kitten.cloud", "http://kitten.cloud"
|
|
|
|
|
|
|
|
include_examples "blocks the domain", "kitten.cloud", "http://cat.kitten.cloud"
|
|
|
|
|
|
|
|
include_examples "blocks the domain", "api.cat.org", "https://api.cat.org/subdirectory/moar"
|
|
|
|
include_examples "blocks the domain", "kitten.cloud", "https://cat.kitten.cloud/subd"
|
|
|
|
|
|
|
|
include_examples "does not fulfil blocked domain", "kitten.cloud", "https://cat.2kitten.cloud"
|
|
|
|
include_examples "does not fulfil blocked domain", "kitten.cloud", "https://cat.kitten.cloud9"
|
|
|
|
include_examples "does not fulfil blocked domain", "api.cat.org", "https://api-cat.org"
|
2022-03-11 14:18:12 +08:00
|
|
|
|
|
|
|
it "doesn't retrieve title if a blocked domain is encountered anywhere in the redirect chain" do
|
|
|
|
SiteSetting.blocked_onebox_domains = "redirect.com"
|
|
|
|
stub_request(:get, "https://mainwebsite.com/blah").to_return(
|
|
|
|
status: 301,
|
|
|
|
body: "",
|
|
|
|
headers: {
|
|
|
|
"location" => "https://redirect.com/blah",
|
|
|
|
},
|
|
|
|
)
|
|
|
|
stub_request(:get, "https://redirect.com/blah").to_return(
|
|
|
|
status: 301,
|
|
|
|
body: "",
|
|
|
|
headers: {
|
|
|
|
"location" => "https://finalwebsite.com/blah",
|
|
|
|
},
|
|
|
|
)
|
|
|
|
stub_request(:get, "https://finalwebsite.com/blah").to_return(
|
|
|
|
status: 200,
|
|
|
|
body: "<html><head><title>hello world</title></head></html>",
|
|
|
|
)
|
|
|
|
onebox = InlineOneboxer.lookup("https://mainwebsite.com/blah", skip_cache: true)
|
|
|
|
|
|
|
|
expect(onebox[:title]).to be_blank
|
|
|
|
end
|
|
|
|
|
|
|
|
it "doesn't retrieve title if the Discourse-No-Onebox header == 1" do
|
|
|
|
stub_request(:get, "https://mainwebsite.com/blah").to_return(
|
|
|
|
status: 200,
|
|
|
|
body: "<html><head><title>hello world</title></head></html>",
|
|
|
|
headers: {
|
|
|
|
"Discourse-No-Onebox" => "1",
|
|
|
|
},
|
|
|
|
)
|
|
|
|
onebox = InlineOneboxer.lookup("https://mainwebsite.com/blah", skip_cache: true)
|
|
|
|
expect(onebox[:title]).to be_blank
|
|
|
|
|
|
|
|
stub_request(:get, "https://mainwebsite.com/blah/2").to_return(
|
|
|
|
status: 301,
|
|
|
|
body: "",
|
|
|
|
headers: {
|
|
|
|
"location" => "https://mainwebsite.com/blah/2/redirect",
|
|
|
|
},
|
|
|
|
)
|
|
|
|
stub_request(:get, "https://mainwebsite.com/blah/2/redirect").to_return(
|
|
|
|
status: 301,
|
|
|
|
body: "",
|
|
|
|
headers: {
|
|
|
|
"Discourse-No-Onebox" => "1",
|
|
|
|
"location" => "https://somethingdoesnotmatter.com",
|
|
|
|
},
|
|
|
|
)
|
|
|
|
onebox = InlineOneboxer.lookup("https://mainwebsite.com/blah/2", skip_cache: true)
|
|
|
|
expect(onebox[:title]).to be_blank
|
|
|
|
onebox = InlineOneboxer.lookup("https://mainwebsite.com/blah/2/redirect", skip_cache: true)
|
|
|
|
expect(onebox[:title]).to be_blank
|
|
|
|
end
|
2022-01-20 14:12:34 +08:00
|
|
|
end
|
2022-05-23 18:52:06 +08:00
|
|
|
|
|
|
|
context "when block_onebox_on_redirect is enabled" do
|
|
|
|
before { SiteSetting.block_onebox_on_redirect = true }
|
|
|
|
|
|
|
|
after { FinalDestination.clear_https_cache!("redirects.com") }
|
|
|
|
|
|
|
|
it "doesn't onebox if the URL redirects" do
|
|
|
|
stub_request(:get, "https://redirects.com/blah/gg").to_return(
|
|
|
|
status: 301,
|
|
|
|
body: "",
|
|
|
|
headers: {
|
|
|
|
"location" => "https://redirects.com/blah/gg/redirect",
|
|
|
|
},
|
|
|
|
)
|
|
|
|
onebox = InlineOneboxer.lookup("https://redirects.com/blah/gg", skip_cache: true)
|
|
|
|
expect(onebox[:title]).to be_blank
|
|
|
|
end
|
|
|
|
|
|
|
|
it "allows an initial http -> https redirect if the redirect URL is identical to the original" do
|
|
|
|
stub_request(:get, "http://redirects.com/blah/gg").to_return(
|
|
|
|
status: 301,
|
|
|
|
body: "",
|
|
|
|
headers: {
|
|
|
|
"location" => "https://redirects.com/blah/gg",
|
|
|
|
},
|
|
|
|
)
|
|
|
|
stub_request(:get, "https://redirects.com/blah/gg").to_return(
|
|
|
|
status: 200,
|
|
|
|
body: "<html><head><title>The Redirects Website</title></head></html>",
|
|
|
|
)
|
|
|
|
onebox = InlineOneboxer.lookup("http://redirects.com/blah/gg", skip_cache: true)
|
|
|
|
expect(onebox[:title]).to eq("The Redirects Website")
|
|
|
|
end
|
|
|
|
|
|
|
|
it "doesn't allow an initial http -> https redirect if the redirect URL is different to the original" do
|
|
|
|
stub_request(:get, "http://redirects.com/blah/gg").to_return(
|
|
|
|
status: 301,
|
|
|
|
body: "",
|
|
|
|
headers: {
|
|
|
|
"location" => "https://redirects.com/blah/gg/2",
|
|
|
|
},
|
|
|
|
)
|
|
|
|
onebox = InlineOneboxer.lookup("http://redirects.com/blah/gg", skip_cache: true)
|
|
|
|
expect(onebox[:title]).to be_blank
|
|
|
|
end
|
|
|
|
end
|
2022-05-25 19:51:47 +08:00
|
|
|
|
|
|
|
it "censors external oneboxes" do
|
|
|
|
Fabricate(:watched_word, action: WatchedWord.actions[:censor], word: "my")
|
|
|
|
|
|
|
|
SiteSetting.enable_inline_onebox_on_all_domains = true
|
|
|
|
|
|
|
|
stub_request(:get, "https://eviltrout.com/some-path").to_return(
|
|
|
|
status: 200,
|
|
|
|
body: "<html><head><title>welcome to my blog</title></head></html>",
|
|
|
|
)
|
|
|
|
|
2017-07-22 03:29:04 +08:00
|
|
|
onebox = InlineOneboxer.lookup("https://eviltrout.com/some-path", skip_cache: true)
|
2023-01-09 19:18:21 +08:00
|
|
|
|
2022-05-25 19:51:47 +08:00
|
|
|
expect(onebox).to be_present
|
|
|
|
expect(onebox[:url]).to eq("https://eviltrout.com/some-path")
|
|
|
|
expect(onebox[:title]).to eq("welcome to ■■ blog")
|
|
|
|
end
|
2022-05-26 12:01:44 +08:00
|
|
|
|
|
|
|
it "does not try and censor external oneboxes returning a blank title" do
|
|
|
|
Fabricate(:watched_word, action: WatchedWord.actions[:censor], word: "my")
|
|
|
|
|
|
|
|
SiteSetting.enable_inline_onebox_on_all_domains = true
|
|
|
|
|
|
|
|
stub_request(:get, "https://eviltrout.com/some-path").to_return(status: 404, body: "")
|
|
|
|
|
|
|
|
onebox = InlineOneboxer.lookup("https://eviltrout.com/some-path", skip_cache: true)
|
|
|
|
|
|
|
|
expect(onebox).to be_present
|
|
|
|
expect(onebox[:url]).to eq("https://eviltrout.com/some-path")
|
|
|
|
expect(onebox[:title]).to eq(nil)
|
|
|
|
end
|
2022-01-20 14:12:34 +08:00
|
|
|
end
|
2022-05-24 01:02:02 +08:00
|
|
|
|
2022-07-28 00:14:14 +08:00
|
|
|
describe ".register_local_handler" do
|
2022-05-24 01:02:02 +08:00
|
|
|
it "calls registered local handler" do
|
|
|
|
InlineOneboxer.register_local_handler("wizard") do |url, route|
|
|
|
|
{ url: url, title: "Custom Onebox for Wizard" }
|
|
|
|
end
|
|
|
|
|
|
|
|
url = "#{Discourse.base_url}/wizard"
|
|
|
|
results = InlineOneboxer.new([url], skip_cache: true).process
|
|
|
|
expect(results).to be_present
|
|
|
|
expect(results[0][:url]).to eq(url)
|
|
|
|
expect(results[0][:title]).to eq("Custom Onebox for Wizard")
|
|
|
|
end
|
|
|
|
end
|
2017-07-20 03:08:54 +08:00
|
|
|
end
|