discourse/spec/models/topic_embed_spec.rb
Krzysztof Kotlarek 24cca10da7 SECURITY: SSRF vulnerability in TopicEmbed
Block redirects when making the final request in TopicEmbed to prevent Server Side Request Forgery (SSRF)
2023-11-09 13:47:21 +11:00

563 lines
19 KiB
Ruby
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# frozen_string_literal: true
require "stringio"
RSpec.describe TopicEmbed do
it { is_expected.to belong_to :topic }
it { is_expected.to belong_to :post }
it { is_expected.to validate_presence_of :embed_url }
describe ".import" do
fab!(:user) { Fabricate(:user) }
let(:title) { "How to turn a fish from good to evil in 30 seconds" }
let(:url) { "http://eviltrout.com/123" }
let(:contents) do
"<p>hello world new post <a href='/hello'>hello</a> <img src='images/wat.jpg'></p>"
end
fab!(:embeddable_host) { Fabricate(:embeddable_host) }
fab!(:category) { Fabricate(:category) }
fab!(:tag) { Fabricate(:tag) }
it "returns nil when the URL is malformed" do
expect(TopicEmbed.import(user, "invalid url", title, contents)).to eq(nil)
expect(TopicEmbed.count).to eq(0)
end
it "Allows figure and figcaption HTML tags" do
html = <<~HTML
<html>
<head>
<title>Some title</title>
</head>
<body>
<div class='content'>
<p>some content</p>
<figure>
<img src="/a.png">
<figcaption>Some caption</figcaption>
<figure>
</div>
</body>
</html>
HTML
parsed = TopicEmbed.parse_html(html, "https://blog.discourse.com/somepost.html")
# div inception is inserted by the readability gem
expected = <<~HTML
<div><div>
<div>
<p>some content</p>
<figure>
<img src="https://blog.discourse.com/a.png">
<figcaption>Some caption</figcaption>
<figure>
</figure></figure></div>
</div></div>
HTML
expect(parsed.body.strip).to eq(expected.strip)
end
context "when creating a post" do
let!(:post) { TopicEmbed.import(user, url, title, contents) }
let(:topic_embed) { TopicEmbed.find_by(post: post) }
it "works as expected with a new URL" do
expect(post).to be_present
# It uses raw_html rendering
expect(post.cook_method).to eq(Post.cook_methods[:raw_html])
expect(post.cooked).to eq(post.raw)
# It converts relative URLs to absolute
expect(post.cooked).to have_tag("a", with: { href: "http://eviltrout.com/hello" })
expect(post.cooked).to have_tag("img", with: { src: "http://eviltrout.com/images/wat.jpg" })
# It converts relative URLs to absolute when expanded
stub_request(:get, url).to_return(status: 200, body: contents)
expect(TopicEmbed.expanded_for(post)).to have_tag(
"img",
with: {
src: "http://eviltrout.com/images/wat.jpg",
},
)
expect(post.topic.has_topic_embed?).to eq(true)
expect(TopicEmbed.where(topic_id: post.topic_id)).to be_present
expect(post.topic.category).to eq(embeddable_host.category)
expect(post.topic).not_to be_visible
end
it "Supports updating the post content" do
expect do
TopicEmbed.import(user, url, "New title received", "<p>muhahaha new contents!</p>")
end.to change { topic_embed.reload.content_sha1 }
expect(topic_embed.topic.title).to eq("New title received")
expect(topic_embed.post.cooked).to match(/new contents/)
end
it "Supports updating the post author" do
new_user = Fabricate(:user)
TopicEmbed.import(new_user, url, title, contents)
topic_embed.reload
expect(topic_embed.post.user).to eq(new_user)
expect(topic_embed.post.topic.user).to eq(new_user)
end
it "Should leave uppercase Feed Entry URL untouched in content" do
cased_url = "http://eviltrout.com/ABCD"
post = TopicEmbed.import(user, cased_url, title, "some random content")
expect(post.cooked).to match(/#{cased_url}/)
end
it "Should leave lowercase Feed Entry URL untouched in content" do
cased_url = "http://eviltrout.com/abcd"
post = TopicEmbed.import(user, cased_url, title, "some random content")
expect(post.cooked).to match(/#{cased_url}/)
end
it "will make the topic unlisted if `embed_unlisted` is set until someone replies" do
Jobs.run_immediately!
SiteSetting.embed_unlisted = true
imported_post =
TopicEmbed.import(user, "http://eviltrout.com/abcd", title, "some random content")
expect(imported_post.topic).not_to be_visible
pc =
PostCreator.new(
Fabricate(:user),
raw: "this is a reply that will make the topic visible",
topic_id: imported_post.topic_id,
reply_to_post_number: 1,
)
pc.create
expect(imported_post.topic.reload).to be_visible
end
it "won't be invisible if `embed_unlisted` is set to false" do
Jobs.run_immediately!
SiteSetting.embed_unlisted = false
imported_post =
TopicEmbed.import(user, "http://eviltrout.com/abcd", title, "some random content")
expect(imported_post.topic).to be_visible
end
it "creates the topic in the category passed as a parameter" do
Jobs.run_immediately!
imported_post =
TopicEmbed.import(
user,
"http://eviltrout.com/abcd",
title,
"some random content",
category_id: category.id,
)
expect(imported_post.topic.category).not_to eq(embeddable_host.category)
expect(imported_post.topic.category).to eq(category)
end
it "does not create duplicate topics with different protocols in the embed_url" do
Jobs.run_immediately!
expect {
TopicEmbed.import(user, "http://eviltrout.com/abcd", title, "some random content")
}.to change { Topic.all.count }.by(1)
expect {
TopicEmbed.import(user, "https://eviltrout.com/abcd", title, "some random content")
}.to_not change { Topic.all.count }
end
it "creates the topic with the tag passed as a parameter" do
Jobs.run_immediately!
SiteSetting.tagging_enabled = true
imported_post =
TopicEmbed.import(
user,
"http://eviltrout.com/abcd",
title,
"some random content",
tags: [tag.name],
)
expect(imported_post.topic.tags).to include(tag)
end
it "respects overriding the cook_method when asked" do
Jobs.run_immediately!
SiteSetting.embed_support_markdown = false
stub_request(:get, "https://www.youtube.com/watch?v=K56soYl0U1w").to_return(
status: 200,
body: "",
headers: {
},
)
stub_request(:get, "https://www.youtube.com/embed/K56soYl0U1w").to_return(
status: 200,
body: "",
headers: {
},
)
imported_post =
TopicEmbed.import(
user,
"http://eviltrout.com/abcd",
title,
"https://www.youtube.com/watch?v=K56soYl0U1w",
cook_method: Post.cook_methods[:regular],
)
expect(imported_post.cooked).to match(/onebox|iframe/)
end
end
context "when post creation supports markdown rendering" do
before { SiteSetting.embed_support_markdown = true }
it "works as expected" do
post = TopicEmbed.import(user, url, title, "some random content")
expect(post).to be_present
# It uses regular rendering
expect(post.cook_method).to eq(Post.cook_methods[:regular])
end
end
describe "embedded content truncation" do
MAX_LENGTH_BEFORE_TRUNCATION = 100
let(:long_content) { "<p>#{"a" * MAX_LENGTH_BEFORE_TRUNCATION}</p>\n<p>more</p>" }
it "truncates the imported post when truncation is enabled" do
SiteSetting.embed_truncate = true
post = TopicEmbed.import(user, url, title, long_content)
expect(post.raw).not_to include(long_content)
end
it "keeps everything in the imported post when truncation is disabled" do
SiteSetting.embed_truncate = false
post = TopicEmbed.import(user, url, title, long_content)
expect(post.raw).to include(long_content)
end
it "looks at first div when there is no paragraph" do
no_para = "<div><h>testing it</h></div>"
SiteSetting.embed_truncate = true
post = TopicEmbed.import(user, url, title, no_para)
expect(post.raw).to include("testing it")
end
end
end
describe ".topic_id_for_embed" do
it "returns correct topic id irrespective of url protocol" do
topic_embed = Fabricate(:topic_embed, embed_url: "http://example.com/post/248")
expect(TopicEmbed.topic_id_for_embed("http://exAMPle.com/post/248")).to eq(
topic_embed.topic_id,
)
expect(TopicEmbed.topic_id_for_embed("https://example.com/post/248/")).to eq(
topic_embed.topic_id,
)
expect(TopicEmbed.topic_id_for_embed("http://example.com/post/248/2")).to eq(nil)
expect(TopicEmbed.topic_id_for_embed("http://examples.com/post/248")).to eq(nil)
expect(TopicEmbed.topic_id_for_embed("http://example.com/post/24")).to eq(nil)
expect(TopicEmbed.topic_id_for_embed("http://example.com/post")).to eq(nil)
end
it "finds the topic id when the embed_url contains a query string" do
topic_embed = Fabricate(:topic_embed, embed_url: "http://example.com/post/248?key=foo")
expect(TopicEmbed.topic_id_for_embed("http://example.com/post/248?key=foo")).to eq(
topic_embed.topic_id,
)
end
end
describe ".find_remote" do
fab!(:embeddable_host) { Fabricate(:embeddable_host) }
describe ".title_scrub" do
let(:url) { "http://eviltrout.com/123" }
let(:contents) do
"<title>Through the Looking Glass - Classic Books</title><body>some content here</body>"
end
before { stub_request(:get, url).to_return(status: 200, body: contents) }
it "doesn't scrub the title by default" do
response = TopicEmbed.find_remote(url)
expect(response.title).to eq("Through the Looking Glass - Classic Books")
end
it "scrubs the title when the option is enabled" do
SiteSetting.embed_title_scrubber = " - Classic Books$"
response = TopicEmbed.find_remote(url)
expect(response.title).to eq("Through the Looking Glass")
end
it "doesn't follow redirect when making request" do
FinalDestination.any_instance.stubs(:resolve).returns(URI("https://redirect.com"))
stub_request(:get, "https://redirect.com/").to_return(
status: 301,
body: "<title>Moved permanently</title>",
headers: {
"Location" => "https://www.example.org/",
},
)
response = TopicEmbed.find_remote(url)
expect(response.title).to eq("Moved permanently")
end
end
context 'with post with allowed classes "foo" and "emoji"' do
fab!(:user) { Fabricate(:user) }
let(:url) { "http://eviltrout.com/123" }
let(:contents) do
"my normal size emoji <p class='foo'>Hi</p> <img class='emoji other foo' src='/images/smiley.jpg'>"
end
before do
SiteSetting.allowed_embed_classnames = "emoji, foo"
stub_request(:get, url).to_return(status: 200, body: contents)
@response = TopicEmbed.find_remote(url)
end
it "has no author tag" do
expect(@response.author).to be_blank
end
it "img node has emoji class" do
expect(@response.body).to have_tag("img", with: { class: "emoji" })
end
it "img node has foo class" do
expect(@response.body).to have_tag("img", with: { class: "foo" })
end
it "p node has foo class" do
expect(@response.body).to have_tag("p", with: { class: "foo" })
end
it "nodes removes classes other than emoji" do
expect(@response.body).to have_tag("img", without: { class: "other" })
end
end
context "with post with author metadata" do
fab!(:user) { Fabricate(:user, username: "eviltrout") }
let(:url) { "http://eviltrout.com/321" }
let(:contents) do
'<html><head><meta name="author" content="eviltrout"></head><body>rich and morty</body></html>'
end
before(:each) { stub_request(:get, url).to_return(status: 200, body: contents) }
it "has no author tag" do
response = TopicEmbed.find_remote(url)
expect(response.author).to eq(user)
end
end
context "with post with no allowed classes" do
fab!(:user) { Fabricate(:user) }
let(:url) { "http://eviltrout.com/123" }
let(:contents) do
"my normal size emoji <p class='foo'>Hi</p> <img class='emoji other foo' src='/images/smiley.jpg'>"
end
before(:each) do
SiteSetting.allowed_embed_classnames = ""
stub_request(:get, url).to_return(status: 200, body: contents)
@response = TopicEmbed.find_remote(url)
end
it 'img node doesn\'t have emoji class' do
expect(@response.body).to have_tag("img", without: { class: "emoji" })
end
it 'img node doesn\'t have foo class' do
expect(@response.body).to have_tag("img", without: { class: "foo" })
end
it 'p node doesn\'t foo class' do
expect(@response.body).to have_tag("p", without: { class: "foo" })
end
it 'img node doesn\'t have other class' do
expect(@response.body).to have_tag("img", without: { class: "other" })
end
end
context "with non-ascii URL" do
let(:url) { "http://eviltrout.com/test/ماهی" }
let(:contents) { "<title>سلام</title><body>این یک پاراگراف آزمون است.</body>" }
before { stub_request(:get, url).to_return(status: 200, body: contents) }
it "doesn't throw an error" do
response = TopicEmbed.find_remote(url)
expect(response.title).to eq("سلام")
end
end
context "with encoded URL" do
let(:url) { "http://example.com/hello%20world" }
let(:contents) { "<title>Hello World!</title><body></body>" }
before { stub_request(:get, url).to_return(status: 200, body: contents) }
it "doesn't throw an error" do
response = TopicEmbed.find_remote(url)
expect(response.title).to eq("Hello World!")
end
end
context "with non-http URL" do
it "throws an error" do
url = "/test.txt"
expect(TopicEmbed.find_remote(url)).to be_nil
end
end
context "with emails" do
let(:url) { "http://example.com/foo" }
let(:contents) do
'<p><a href="mailto:foo%40example.com">URL encoded @ symbol</a></p><p><a href="mailto:bar@example.com">normal mailto link</a></p>'
end
before { stub_request(:get, url).to_return(status: 200, body: contents) }
it "handles mailto links" do
response = TopicEmbed.find_remote(url)
expect(response.body).to have_tag("a", with: { href: "mailto:foo@example.com" })
expect(response.body).to have_tag("a", with: { href: "mailto:bar@example.com" })
end
end
context "with malformed href" do
let(:url) { "http://example.com/foo" }
let(:contents) { '<p><a href="(http://foo.bar)">Baz</a></p>' }
before { stub_request(:get, url).to_return(status: 200, body: contents) }
it "doesnt raise an exception" do
expect { TopicEmbed.find_remote(url) }.not_to raise_error
end
end
context "with canonical links" do
fab!(:user) { Fabricate(:user) }
let(:title) { "How to turn a fish from good to evil in 30 seconds" }
let(:url) { "http://eviltrout.com/123?asd" }
let(:canonical_url) { "http://eviltrout.com/123" }
let(:url2) { "http://eviltrout.com/blog?post=1&canonical=false" }
let(:canonical_url2) { "http://eviltrout.com/blog?post=1" }
let(:content) { "<head><link rel=\"canonical\" href=\"#{canonical_url}\"></head>" }
let(:content2) { "<head><link rel=\"canonical\" href=\"#{canonical_url2}\"></head>" }
let(:canonical_content) { "<title>Canonical</title><body></body>" }
before do
stub_request(:get, url).to_return(status: 200, body: content)
stub_request(:head, canonical_url)
stub_request(:get, canonical_url).to_return(status: 200, body: canonical_content)
stub_request(:get, url2).to_return(status: 200, body: content2)
stub_request(:head, canonical_url2)
stub_request(:get, canonical_url2).to_return(status: 200, body: canonical_content)
end
it "fetches canonical content" do
response = TopicEmbed.find_remote(url)
expect(response.title).to eq("Canonical")
expect(response.url).to eq(canonical_url)
end
it "does not create duplicate topics when url differs from canonical_url" do
Jobs.run_immediately!
expect { TopicEmbed.import_remote(canonical_url, { title: title, user: user }) }.to change {
Topic.all.count
}.by(1)
expect { TopicEmbed.import_remote(url, { title: title, user: user }) }.to_not change {
Topic.all.count
}
end
it "does not create duplicate topics when url contains extra params" do
Jobs.run_immediately!
expect {
TopicEmbed.import_remote(canonical_url2, { title: title, user: user })
}.to change { Topic.all.count }.by(1)
expect { TopicEmbed.import_remote(url2, { title: title, user: user }) }.to_not change {
Topic.all.count
}
end
end
end
describe ".absolutize_urls" do
it "handles badly formed URIs" do
invalid_url = "http://source.com/#double#anchor"
contents = "hello world new post <a href='/hello'>hello</a>"
raw = TopicEmbed.absolutize_urls(invalid_url, contents)
expect(raw).to eq("hello world new post <a href=\"http://source.com/hello\">hello</a>")
end
it "handles malformed links" do
url = "https://somesource.com"
contents = <<~HTML
hello world new post <a href="mailto:somemail@somewhere.org>">hello</a>
some image <img src="https:/><invalidimagesrc/">
HTML
raw = TopicEmbed.absolutize_urls(url, contents)
expect(raw).to eq(contents)
end
end
describe ".imported_from_html" do
after { I18n.reload! }
it "uses the default site locale for the 'imported_from' footer" do
TranslationOverride.upsert!(
"en",
"embed.imported_from",
"English translation of embed.imported_from with %{link}",
)
TranslationOverride.upsert!(
"de",
"embed.imported_from",
"German translation of embed.imported_from with %{link}",
)
I18n.locale = :en
expected_html = TopicEmbed.imported_from_html("some_url")
I18n.locale = :de
expect(TopicEmbed.imported_from_html("some_url")).to eq(expected_html)
end
it "normalize_encodes the url" do
html =
TopicEmbed.imported_from_html(
'http://www.discourse.org/%23<%2Fa><img%20src%3Dx%20onerror%3Dalert("document.domain")%3B>',
)
expected_html =
"\n<hr>\n<small>This is a companion discussion topic for the original entry at <a href='http://www.discourse.org/%23%3C/a%3E%3Cimg%20src=x%20onerror=alert(%22document.domain%22);%3E'>http://www.discourse.org/%23%3C/a%3E%3Cimg%20src=x%20onerror=alert(%22document.domain%22);%3E</a></small>\n"
expect(html).to eq(expected_html)
end
end
end