FIX: Make sure rel attributes are correctly set. (#10645)

We must guarantee that "rel=noopener" was set if "target=_blank" is present, which is not always the case for trusted users. Also, if the link contains the "nofollow" attribute, it has to have the "ugc" attribute as well.
This commit is contained in:
Roman Rizzi 2020-09-10 12:59:51 -03:00 committed by GitHub
parent dee451605b
commit efb9fd6ac0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 68 additions and 57 deletions

View File

@ -656,9 +656,8 @@ class CookedPostProcessor
end
def enforce_nofollow
if !@omit_nofollow && SiteSetting.add_rel_nofollow_to_user_content
PrettyText.add_rel_nofollow_to_user_content(@doc)
end
add_nofollow = !@omit_nofollow && SiteSetting.add_rel_nofollow_to_user_content
PrettyText.add_rel_attributes_to_user_content(@doc, add_nofollow)
end
def pull_hotlinked_images

View File

@ -269,9 +269,8 @@ module PrettyText
doc = Nokogiri::HTML5.fragment(sanitized)
if !options[:omit_nofollow] && SiteSetting.add_rel_nofollow_to_user_content
add_rel_nofollow_to_user_content(doc)
end
add_nofollow = !options[:omit_nofollow] && SiteSetting.add_rel_nofollow_to_user_content
add_rel_attributes_to_user_content(doc, add_nofollow)
if SiteSetting.enable_mentions
add_mentions(doc, user_id: opts[:user_id])
@ -284,7 +283,7 @@ module PrettyText
loofah_fragment.scrub!(scrubber).to_html
end
def self.add_rel_nofollow_to_user_content(doc)
def self.add_rel_attributes_to_user_content(doc, add_nofollow)
allowlist = []
domains = SiteSetting.exclude_rel_nofollow_domains
@ -293,22 +292,21 @@ module PrettyText
site_uri = nil
doc.css("a").each do |l|
href = l["href"].to_s
l["rel"] = "noopener" if l["target"] == "_blank"
begin
uri = URI(UrlHelper.encode_component(href))
site_uri ||= URI(Discourse.base_url)
if !uri.host.present? ||
uri.host == site_uri.host ||
uri.host.ends_with?(".#{site_uri.host}") ||
allowlist.any? { |u| uri.host == u || uri.host.ends_with?(".#{u}") }
# we are good no need for nofollow
l.remove_attribute("rel")
else
l["rel"] = "nofollow noopener"
end
same_domain = !uri.host.present? ||
uri.host == site_uri.host ||
uri.host.ends_with?(".#{site_uri.host}") ||
allowlist.any? { |u| uri.host == u || uri.host.ends_with?(".#{u}") }
l["rel"] = "noopener nofollow ugc" if add_nofollow && !same_domain
rescue URI::Error
# add a nofollow anyway
l["rel"] = "nofollow noopener"
l["rel"] = "noopener nofollow ugc"
end
end
end

View File

@ -237,13 +237,13 @@ describe CookedPostProcessor do
count: 1
)
expect(html).to have_tag("a[rel='nofollow noopener']")
expect(html).to have_tag("a[rel='noopener nofollow ugc']")
end
it 'removes nofollow if user is staff/tl3' do
cpp = CookedPostProcessor.new(staff_post, invalidate_oneboxes: true)
cpp.post_process
expect(cpp.html).to_not have_tag("a[rel='nofollow noopener']")
expect(cpp.html).to_not have_tag("a[rel='noopener nofollow ugc']")
end
end
end
@ -1102,7 +1102,7 @@ describe CookedPostProcessor do
SiteSetting.add_rel_nofollow_to_user_content = false
Oneboxer.expects(:onebox)
.with("http://www.youtube.com/watch?v=9bZkp7q19f0", invalidate_oneboxes: true, user_id: nil, category_id: post.topic.category_id)
.returns('<aside class="onebox"><a href="https://www.youtube.com/watch?v=9bZkp7q19f0" rel="nofollow noopener ugc">GANGNAM STYLE</a></aside>')
.returns('<aside class="onebox"><a href="https://www.youtube.com/watch?v=9bZkp7q19f0" rel="noopener nofollow ugc">GANGNAM STYLE</a></aside>')
cpp.post_process_oneboxes
end
@ -1123,7 +1123,7 @@ describe CookedPostProcessor do
SiteSetting.tl3_links_no_follow = false
Oneboxer.expects(:onebox)
.with("http://www.youtube.com/watch?v=9bZkp7q19f0", invalidate_oneboxes: true, user_id: nil, category_id: post.topic.category_id)
.returns('<aside class="onebox"><a href="https://www.youtube.com/watch?v=9bZkp7q19f0" rel="nofollow ugc noopener">GANGNAM STYLE</a></aside>')
.returns('<aside class="onebox"><a href="https://www.youtube.com/watch?v=9bZkp7q19f0" rel="noopener nofollow ugc">GANGNAM STYLE</a></aside>')
cpp.post_process_oneboxes
end
@ -1204,7 +1204,7 @@ describe CookedPostProcessor do
expect(cpp.html).to match_html <<~HTML.rstrip
<p><a href="//test.localhost/#{upload_path}/original/2X/2345678901234567.jpg">Link</a><br>
<img src="//test.localhost/#{upload_path}/original/1X/1234567890123456.jpg"><br>
<a href="http://www.google.com" rel="nofollow noopener">Google</a><br>
<a href="http://www.google.com" rel="noopener nofollow ugc">Google</a><br>
<img src="http://foo.bar/image.png"><br>
<a class="attachment" href="//test.localhost/#{upload_path}/original/1X/af2c2618032c679333bebf745e75f9088748d737.txt">text.txt</a> (20 Bytes)<br>
<img src="//test.localhost/images/emoji/twitter/smile.png?v=#{Emoji::EMOJI_VERSION}" title=":smile:" class="emoji only-emoji" alt=":smile:"></p>
@ -1219,7 +1219,7 @@ describe CookedPostProcessor do
expect(cpp.html).to match_html <<~HTML.rstrip
<p><a href="//my.cdn.com/#{upload_path}/original/2X/2345678901234567.jpg">Link</a><br>
<img src="//my.cdn.com/#{upload_path}/original/1X/1234567890123456.jpg"><br>
<a href="http://www.google.com" rel="nofollow noopener">Google</a><br>
<a href="http://www.google.com" rel="noopener nofollow ugc">Google</a><br>
<img src="http://foo.bar/image.png"><br>
<a class="attachment" href="//my.cdn.com/#{upload_path}/original/1X/af2c2618032c679333bebf745e75f9088748d737.txt">text.txt</a> (20 Bytes)<br>
<img src="//my.cdn.com/images/emoji/twitter/smile.png?v=#{Emoji::EMOJI_VERSION}" title=":smile:" class="emoji only-emoji" alt=":smile:"></p>
@ -1232,7 +1232,7 @@ describe CookedPostProcessor do
expect(cpp.html).to match_html <<~HTML.rstrip
<p><a href="https://my.cdn.com/#{upload_path}/original/2X/2345678901234567.jpg">Link</a><br>
<img src="https://my.cdn.com/#{upload_path}/original/1X/1234567890123456.jpg"><br>
<a href="http://www.google.com" rel="nofollow noopener">Google</a><br>
<a href="http://www.google.com" rel="noopener nofollow ugc">Google</a><br>
<img src="http://foo.bar/image.png"><br>
<a class="attachment" href="https://my.cdn.com/#{upload_path}/original/1X/af2c2618032c679333bebf745e75f9088748d737.txt">text.txt</a> (20 Bytes)<br>
<img src="https://my.cdn.com/images/emoji/twitter/smile.png?v=#{Emoji::EMOJI_VERSION}" title=":smile:" class="emoji only-emoji" alt=":smile:"></p>
@ -1246,7 +1246,7 @@ describe CookedPostProcessor do
expect(cpp.html).to match_html <<~HTML.rstrip
<p><a href="//my.cdn.com/#{upload_path}/original/2X/2345678901234567.jpg">Link</a><br>
<img src="//my.cdn.com/#{upload_path}/original/1X/1234567890123456.jpg"><br>
<a href="http://www.google.com" rel="nofollow noopener">Google</a><br>
<a href="http://www.google.com" rel="noopener nofollow ugc">Google</a><br>
<img src="http://foo.bar/image.png"><br>
<a class="attachment" href="//test.localhost/#{upload_path}/original/1X/af2c2618032c679333bebf745e75f9088748d737.txt">text.txt</a> (20 Bytes)<br>
<img src="//my.cdn.com/images/emoji/twitter/smile.png?v=#{Emoji::EMOJI_VERSION}" title=":smile:" class="emoji only-emoji" alt=":smile:"></p>
@ -1260,7 +1260,7 @@ describe CookedPostProcessor do
expect(cpp.html).to match_html <<~HTML.rstrip
<p><a href="//my.cdn.com/#{upload_path}/original/2X/2345678901234567.jpg">Link</a><br>
<img src="//my.cdn.com/#{upload_path}/original/1X/1234567890123456.jpg"><br>
<a href="http://www.google.com" rel="nofollow noopener">Google</a><br>
<a href="http://www.google.com" rel="noopener nofollow ugc">Google</a><br>
<img src="http://foo.bar/image.png"><br>
<a class="attachment" href="//test.localhost/#{upload_path}/original/1X/af2c2618032c679333bebf745e75f9088748d737.txt">text.txt</a> (20 Bytes)<br>
<img src="//my.cdn.com/images/emoji/twitter/smile.png?v=#{Emoji::EMOJI_VERSION}" title=":smile:" class="emoji only-emoji" alt=":smile:"></p>

View File

@ -425,7 +425,7 @@ describe PrettyText do
end
it "can handle mentions inside a hyperlink" do
expect(PrettyText.cook("[link @inner](http://site.com)")).to match_html '<p><a href="http://site.com" rel="nofollow noopener">link @inner</a></p>'
expect(PrettyText.cook("[link @inner](http://site.com)")).to match_html '<p><a href="http://site.com" rel="noopener nofollow ugc">link @inner</a></p>'
end
it "can handle a list of mentions" do
@ -513,42 +513,56 @@ describe PrettyText do
end
end
describe "rel nofollow" do
describe "rel attributes" do
before do
SiteSetting.add_rel_nofollow_to_user_content = true
SiteSetting.exclude_rel_nofollow_domains = "foo.com|bar.com"
end
it "should inject nofollow in all user provided links" do
expect(PrettyText.cook('<a href="http://cnn.com">cnn</a>')).to match(/nofollow noopener/)
expect(PrettyText.cook('<a href="http://cnn.com">cnn</a>')).to match(/noopener nofollow ugc/)
end
it "should not inject nofollow in all local links" do
expect(PrettyText.cook("<a href='#{Discourse.base_url}/test.html'>cnn</a>") !~ /nofollow/).to eq(true)
expect(PrettyText.cook("<a href='#{Discourse.base_url}/test.html'>cnn</a>") !~ /nofollow ugc/).to eq(true)
end
it "should not inject nofollow in all subdomain links" do
expect(PrettyText.cook("<a href='#{Discourse.base_url.sub('http://', 'http://bla.')}/test.html'>cnn</a>") !~ /nofollow/).to eq(true)
expect(PrettyText.cook("<a href='#{Discourse.base_url.sub('http://', 'http://bla.')}/test.html'>cnn</a>") !~ /nofollow ugc/).to eq(true)
end
it "should inject nofollow in all non subdomain links" do
expect(PrettyText.cook("<a href='#{Discourse.base_url.sub('http://', 'http://bla')}/test.html'>cnn</a>")).to match(/nofollow/)
expect(PrettyText.cook("<a href='#{Discourse.base_url.sub('http://', 'http://bla')}/test.html'>cnn</a>")).to match(/nofollow ugc/)
end
it "should not inject nofollow for foo.com" do
expect(PrettyText.cook("<a href='http://foo.com/test.html'>cnn</a>") !~ /nofollow/).to eq(true)
expect(PrettyText.cook("<a href='http://foo.com/test.html'>cnn</a>") !~ /nofollow ugc/).to eq(true)
end
it "should inject nofollow for afoo.com" do
expect(PrettyText.cook("<a href='http://afoo.com/test.html'>cnn</a>")).to match(/nofollow/)
expect(PrettyText.cook("<a href='http://afoo.com/test.html'>cnn</a>")).to match(/nofollow ugc/)
end
it "should not inject nofollow for bar.foo.com" do
expect(PrettyText.cook("<a href='http://bar.foo.com/test.html'>cnn</a>") !~ /nofollow/).to eq(true)
expect(PrettyText.cook("<a href='http://bar.foo.com/test.html'>cnn</a>") !~ /nofollow ugc/).to eq(true)
end
it "should not inject nofollow if omit_nofollow option is given" do
expect(PrettyText.cook('<a href="http://cnn.com">cnn</a>', omit_nofollow: true) !~ /nofollow/).to eq(true)
expect(PrettyText.cook('<a href="http://cnn.com">cnn</a>', omit_nofollow: true) !~ /nofollow ugc/).to eq(true)
end
it 'adds the noopener attribute even if omit_nofollow option is given' do
raw_html = '<a href="https://www.mysite.com/" target="_blank">Check out my site!</a>'
expect(
PrettyText.cook(raw_html, omit_nofollow: true)
).to match(/noopener/)
end
it 'adds the noopener attribute even if omit_nofollow option is given' do
raw_html = '<a href="https://www.mysite.com/" target="_blank">Check out my site!</a>'
expect(
PrettyText.cook(raw_html, omit_nofollow: false)
).to match(/noopener nofollow ugc/)
end
end
@ -724,7 +738,7 @@ describe PrettyText do
more stuff
RAW
post = Fabricate(:post, raw: raw)
expect(post.excerpt).to eq("hello <a href=\"https://site.com\" rel=\"nofollow noopener\">site</a>")
expect(post.excerpt).to eq("hello <a href=\"https://site.com\" rel=\"noopener nofollow ugc\">site</a>")
end
it "handles span excerpt at the beginning of a post" do
@ -1109,7 +1123,7 @@ describe PrettyText do
it "supports href schemes" do
SiteSetting.allowed_href_schemes = "macappstore|steam"
cooked = cook("[Steam URL Scheme](steam://store/452530)")
expected = '<p><a href="steam://store/452530" rel="nofollow noopener">Steam URL Scheme</a></p>'
expected = '<p><a href="steam://store/452530" rel="noopener nofollow ugc">Steam URL Scheme</a></p>'
expect(cooked).to eq(n expected)
end
@ -1123,7 +1137,7 @@ describe PrettyText do
it 'allows only tel URL scheme to start with a plus character' do
SiteSetting.allowed_href_schemes = "tel|steam"
cooked = cook("[Tel URL Scheme](tel://+452530579785)")
expected = '<p><a href="tel://+452530579785" rel="nofollow noopener">Tel URL Scheme</a></p>'
expected = '<p><a href="tel://+452530579785" rel="noopener nofollow ugc">Tel URL Scheme</a></p>'
expect(cooked).to eq(n expected)
cooked2 = cook("[Steam URL Scheme](steam://+store/452530)")
@ -1151,7 +1165,7 @@ describe PrettyText do
cooked = PrettyText.cook("[`a` #known::tag here](http://example.com)")
html = <<~HTML
<p><a href="http://example.com" rel="nofollow noopener"><code>a</code> #known::tag here</a></p>
<p><a href="http://example.com" rel="noopener nofollow ugc"><code>a</code> #known::tag here</a></p>
HTML
expect(cooked).to eq(html.strip)
@ -1251,7 +1265,7 @@ HTML
it "won't break links by censoring them." do
expect_cooked_match("The link still works. [whiz](http://www.whiz.com)",
'<p>The link still works. <a href="http://www.whiz.com" rel="nofollow noopener">■■■■</a></p>')
'<p>The link still works. <a href="http://www.whiz.com" rel="noopener nofollow ugc">■■■■</a></p>')
end
it "escapes regexp characters" do
@ -1415,19 +1429,19 @@ HTML
it "supports url bbcode" do
cooked = PrettyText.cook "[url]http://sam.com[/url]"
html = '<p><a href="http://sam.com" data-bbcode="true" rel="nofollow noopener">http://sam.com</a></p>'
html = '<p><a href="http://sam.com" data-bbcode="true" rel="noopener nofollow ugc">http://sam.com</a></p>'
expect(cooked).to eq(html)
end
it "supports nesting tags in url" do
cooked = PrettyText.cook("[url=http://sam.com][b]I am sam[/b][/url]")
html = '<p><a href="http://sam.com" data-bbcode="true" rel="nofollow noopener"><span class="bbcode-b">I am sam</span></a></p>'
html = '<p><a href="http://sam.com" data-bbcode="true" rel="noopener nofollow ugc"><span class="bbcode-b">I am sam</span></a></p>'
expect(cooked).to eq(html)
end
it "supports query params in bbcode url" do
cooked = PrettyText.cook("[url=https://www.amazon.com/Camcorder-Hausbell-302S-Control-Infrared/dp/B01KLOA1PI/?tag=discourse]BBcode link[/url]")
html = '<p><a href="https://www.amazon.com/Camcorder-Hausbell-302S-Control-Infrared/dp/B01KLOA1PI/?tag=discourse" data-bbcode="true" rel="nofollow noopener">BBcode link</a></p>'
html = '<p><a href="https://www.amazon.com/Camcorder-Hausbell-302S-Control-Infrared/dp/B01KLOA1PI/?tag=discourse" data-bbcode="true" rel="noopener nofollow ugc">BBcode link</a></p>'
expect(cooked).to eq(html)
end
@ -1445,13 +1459,13 @@ HTML
it "support special handling for space in urls" do
cooked = PrettyText.cook "http://testing.com?a%20b"
html = '<p><a href="http://testing.com?a%20b" class="onebox" target="_blank" rel="nofollow noopener">http://testing.com?a%20b</a></p>'
html = '<p><a href="http://testing.com?a%20b" class="onebox" target="_blank" rel="noopener nofollow ugc">http://testing.com?a%20b</a></p>'
expect(cooked).to eq(html)
end
it "supports onebox for decoded urls" do
cooked = PrettyText.cook "http://testing.com?a%50b"
html = '<p><a href="http://testing.com?a%50b" class="onebox" target="_blank" rel="nofollow noopener">http://testing.com?aPb</a></p>'
html = '<p><a href="http://testing.com?a%50b" class="onebox" target="_blank" rel="noopener nofollow ugc">http://testing.com?aPb</a></p>'
expect(cooked).to eq(html)
end
@ -1613,7 +1627,7 @@ HTML
cooked = PrettyText.cook(md)
html = <<~HTML
<p><a href="http://www.cnn.com" rel="nofollow noopener">www.cnn.com</a> test.it <a href="http://test.com" rel="nofollow noopener">http://test.com</a> <a href="https://test.ab" rel="nofollow noopener">https://test.ab</a> <a href="https://a" rel="nofollow noopener">https://a</a></p>
<p><a href="http://www.cnn.com" rel="noopener nofollow ugc">www.cnn.com</a> test.it <a href="http://test.com" rel="noopener nofollow ugc">http://test.com</a> <a href="https://test.ab" rel="noopener nofollow ugc">https://test.ab</a> <a href="https://a" rel="noopener nofollow ugc">https://a</a></p>
HTML
expect(cooked).to eq(html.strip)
@ -1623,7 +1637,7 @@ HTML
cooked = PrettyText.cook(md)
html = <<~HTML
<p>www.cnn.com <a href="http://test.it" rel="nofollow noopener">test.it</a> <a href="http://test.com" rel="nofollow noopener">http://test.com</a> <a href="https://test.ab" rel="nofollow noopener">https://test.ab</a> <a href="https://a" rel="nofollow noopener">https://a</a></p>
<p>www.cnn.com <a href="http://test.it" rel="noopener nofollow ugc">test.it</a> <a href="http://test.com" rel="noopener nofollow ugc">http://test.com</a> <a href="https://test.ab" rel="noopener nofollow ugc">https://test.ab</a> <a href="https://a" rel="noopener nofollow ugc">https://a</a></p>
HTML
expect(cooked).to eq(html.strip)
@ -1633,7 +1647,7 @@ HTML
cooked = PrettyText.cook(md)
html = <<~HTML
<p>www.cnn.com test.it <a href="http://test.com" rel="nofollow noopener">http://test.com</a> <a href="https://test.ab" rel="nofollow noopener">https://test.ab</a> <a href="https://a" rel="nofollow noopener">https://a</a></p>
<p>www.cnn.com test.it <a href="http://test.com" rel="noopener nofollow ugc">http://test.com</a> <a href="https://test.ab" rel="noopener nofollow ugc">https://test.ab</a> <a href="https://a" rel="noopener nofollow ugc">https://a</a></p>
HTML
expect(cooked).to eq(html.strip)

View File

@ -1025,7 +1025,7 @@ describe Post do
it "should add nofollow to links in the post for trust levels below 3" do
post.user.trust_level = 2
post.save
expect(post.cooked).to match(/nofollow noopener/)
expect(post.cooked).to match(/noopener nofollow ugc/)
end
it "when tl3_links_no_follow is false, should not add nofollow for trust level 3 and higher" do
@ -1039,7 +1039,7 @@ describe Post do
SiteSetting.tl3_links_no_follow = true
post.user.trust_level = 3
post.save
expect(post.cooked).to match(/nofollow noopener/)
expect(post.cooked).to match(/noopener nofollow ugc/)
end
describe 'mentions' do

View File

@ -138,8 +138,8 @@ describe UserProfile do
it 'includes the link as nofollow if the user is not new' do
user.user_profile.send(:cook)
expect(user_profile.bio_excerpt).to match_html("I love <a href='http://discourse.org' rel='nofollow noopener'>http://discourse.org</a>")
expect(user_profile.bio_processed).to match_html("<p>I love <a href=\"http://discourse.org\" rel=\"nofollow noopener\">http://discourse.org</a></p>")
expect(user_profile.bio_excerpt).to match_html("I love <a href='http://discourse.org' rel='noopener nofollow ugc'>http://discourse.org</a>")
expect(user_profile.bio_processed).to match_html("<p>I love <a href=\"http://discourse.org\" rel=\"noopener nofollow ugc\">http://discourse.org</a></p>")
end
it 'removes the link if the user is new' do
@ -177,8 +177,8 @@ describe UserProfile do
created_user.save
created_user.reload
created_user.change_trust_level!(TrustLevel[2])
expect(created_user.user_profile.bio_excerpt).to match_html("I love <a href='http://discourse.org' rel='nofollow noopener'>http://discourse.org</a>")
expect(created_user.user_profile.bio_processed).to match_html("<p>I love <a href=\"http://discourse.org\" rel=\"nofollow noopener\">http://discourse.org</a></p>")
expect(created_user.user_profile.bio_excerpt).to match_html("I love <a href='http://discourse.org' rel='noopener nofollow ugc'>http://discourse.org</a>")
expect(created_user.user_profile.bio_processed).to match_html("<p>I love <a href=\"http://discourse.org\" rel=\"noopener nofollow ugc\">http://discourse.org</a></p>")
end
end
@ -188,8 +188,8 @@ describe UserProfile do
it 'includes the link with nofollow if the user is trust level 3 or higher' do
user.trust_level = TrustLevel[3]
user_profile.send(:cook)
expect(user_profile.bio_excerpt).to match_html("I love <a href='http://discourse.org' rel='nofollow noopener'>http://discourse.org</a>")
expect(user_profile.bio_processed).to match_html("<p>I love <a href=\"http://discourse.org\" rel=\"nofollow noopener\">http://discourse.org</a></p>")
expect(user_profile.bio_excerpt).to match_html("I love <a href='http://discourse.org' rel='noopener nofollow ugc'>http://discourse.org</a>")
expect(user_profile.bio_processed).to match_html("<p>I love <a href=\"http://discourse.org\" rel=\"noopener nofollow ugc\">http://discourse.org</a></p>")
end
end
end