FIX: subfolder absolute links in summaries

This fixes the `PrettyText.make_all_links_absolute` to better handle subfolder.

In subfolder, when given the cooked version of a post, links to mentions includes the `Discourse.base_path` prefix. Adding the `Discourse.base_url` was doubling the `Discourse.base_path`.

The issue was hidden behind the specs which was stubbing `Discourse.base_url` instead of relying on `Discourse.base_path`.

This fixes both the "algorithm" used in `PrettyText.make_all_links_absolute` to better handle this case and correct the specs to properly handle subfolder cases.

There are lots of changes in the specs due to a refactoring to use squiggly heredoc strings for easier reading and less escaping.
This commit is contained in:
Régis Hanol 2024-05-21 21:53:03 +02:00
parent 9889547475
commit 8f7a3e5b29
2 changed files with 194 additions and 134 deletions

View File

@ -506,17 +506,23 @@ module PrettyText
end
def self.make_all_links_absolute(doc)
site_uri = nil
doc
.css("a")
.each do |link|
href = link["href"].to_s
.css("a[href]")
.each do |a|
begin
uri = URI(href)
site_uri ||= URI(Discourse.base_url)
unless uri.host.present? || href.start_with?("mailto")
link["href"] = "#{site_uri}#{link["href"]}"
end
href = a["href"].to_s
next if href.blank?
next if href.start_with?("mailto:")
next if href.start_with?(Discourse.base_url)
next if URI(href).host.present?
a["href"] = (
if href.start_with?(Discourse.base_path)
"#{Discourse.base_url_no_prefix}#{href}"
else
"#{Discourse.base_url}#{href}"
end
)
rescue URI::Error
# leave it
end

View File

@ -1322,153 +1322,207 @@ RSpec.describe PrettyText do
end
describe "format_for_email" do
let(:base_url) { "http://baseurl.net" }
context "when (sub)domain" do
before { Discourse.stubs(:base_path).returns("") }
before { Discourse.stubs(:base_url).returns(base_url) }
it "does not crash" do
PrettyText.format_for_email(
'<a href="mailto:michael.brown@discourse.org?subject=Your%20post%20at%20http://try.discourse.org/t/discussion-happens-so-much/127/1000?u=supermathie">test</a>',
post,
)
end
it "adds base url to relative links" do
html =
"<p><a class=\"mention\" href=\"/u/wiseguy\">@wiseguy</a>, <a class=\"mention\" href=\"/u/trollol\">@trollol</a> what do you guys think? </p>"
output = described_class.format_for_email(html, post)
expect(output).to eq(
"<p><a class=\"mention\" href=\"#{base_url}/u/wiseguy\">@wiseguy</a>, <a class=\"mention\" href=\"#{base_url}/u/trollol\">@trollol</a> what do you guys think? </p>",
)
end
it "doesn't change external absolute links" do
html = "<p>Check out <a href=\"http://mywebsite.com/users/boss\">this guy</a>.</p>"
expect(described_class.format_for_email(html, post)).to eq(html)
end
it "doesn't change internal absolute links" do
html = "<p>Check out <a href=\"#{base_url}/users/boss\">this guy</a>.</p>"
expect(described_class.format_for_email(html, post)).to eq(html)
end
it "can tolerate invalid URLs" do
html = "<p>Check out <a href=\"not a real url\">this guy</a>.</p>"
expect { described_class.format_for_email(html, post) }.to_not raise_error
end
it "doesn't change mailto" do
html = "<p>Contact me at <a href=\"mailto:username@me.com\">this address</a>.</p>"
expect(PrettyText.format_for_email(html, post)).to eq(html)
end
it "prefers data-original-href attribute to get Vimeo iframe link and escapes it" do
html =
"<p>Check out this video <iframe src='https://player.vimeo.com/video/329875646' data-original-href='https://vimeo.com/329875646/> <script>alert(1)</script>'></iframe>.</p>"
expect(PrettyText.format_for_email(html, post)).to match(
Regexp.escape("https://vimeo.com/329875646/%3E%20%3Cscript%3Ealert(1)%3C/script%3E"),
)
end
it "creates a valid URL when data-original-href is missing from Vimeo link" do
html =
'<iframe src="https://player.vimeo.com/video/508864124?h=fcbbcc92fa" width="640" height="360" frameborder="0" allow="autoplay; fullscreen; picture-in-picture" allowfullscreen></iframe>'
expect(PrettyText.format_for_email(html, post)).to match(
"https://vimeo.com/508864124/fcbbcc92fa",
)
end
describe "#convert_vimeo_iframes" do
it "converts <iframe> to <a>" do
it "does not crash" do
html = <<~HTML
<p>This is a Vimeo link:</p>
<iframe width="640" height="360" src="https://player.vimeo.com/video/1" data-original-href="https://vimeo.com/1" frameborder="0" allowfullscreen="" seamless="seamless" sandbox="allow-same-origin allow-scripts allow-forms allow-popups allow-popups-to-escape-sandbox allow-presentation"></iframe>
<a href="mailto:michael.brown@discourse.org?subject=Your%20post%20at%20http://try.discourse.org/t/discussion-happens-so-much/127/1000?u=supermathie">test</a>
HTML
md = PrettyText.format_for_email(html, post)
expect(md).not_to include("<iframe")
expect(md).to match_html(<<~HTML)
<p>This is a Vimeo link:</p>
<p><a href="https://vimeo.com/1">https://vimeo.com/1</a></p>
expect(described_class.format_for_email(html, post)).to eq <<~HTML
<a href="mailto:michael.brown@discourse.org?subject=Your%20post%20at%20http://try.discourse.org/t/discussion-happens-so-much/127/1000?u=supermathie">test</a>
HTML
end
it "adds base url to relative links" do
html = <<~HTML
<p><a class="mention" href="/u/wiseguy">@wiseguy</a>, <a class="mention" href="/u/trollol">@trollol</a> what do you guys think?</p>
HTML
expect(described_class.format_for_email(html, post)).to eq <<~HTML
<p><a class="mention" href="#{Discourse.base_url}/u/wiseguy">@wiseguy</a>, <a class="mention" href="#{Discourse.base_url}/u/trollol">@trollol</a> what do you guys think?</p>
HTML
end
it "doesn't change external absolute links" do
html = <<~HTML
<p>Check out <a href="http://mywebsite.com/users/boss">this guy</a>.</p>
HTML
expect(described_class.format_for_email(html, post)).to eq(html)
end
it "doesn't change internal absolute links" do
html = <<~HTML
<p>Check out <a href="#{Discourse.base_url}/users/boss">this guy</a>.</p>
HTML
expect(described_class.format_for_email(html, post)).to eq(html)
end
it "can tolerate invalid URLs" do
html = <<~HTML
<p>Check out <a href="not a real url">this guy</a>.</p>
HTML
expect(described_class.format_for_email(html, post)).to eq(html)
end
it "doesn't change mailto" do
html = <<~HTML
<p>Contact me at <a href="mailto:username@me.com">this address</a>.</p>
HTML
expect(described_class.format_for_email(html, post)).to eq(html)
end
it "prefers data-original-href attribute to get Vimeo iframe link and escapes it" do
html = <<~HTML
<p>Check out this video <iframe src='https://player.vimeo.com/video/329875646' data-original-href='https://vimeo.com/329875646/> <script>alert(1)</script>'></iframe>.</p>
HTML
expect(described_class.format_for_email(html, post)).to match(
Regexp.escape("https://vimeo.com/329875646/%3E%20%3Cscript%3Ealert(1)%3C/script%3E"),
)
end
it "creates a valid URL when data-original-href is missing from Vimeo link" do
html = <<~HTML
<iframe src="https://player.vimeo.com/video/508864124?h=fcbbcc92fa" width="640" height="360" frameborder="0" allow="autoplay; fullscreen; picture-in-picture" allowfullscreen></iframe>
HTML
expect(described_class.format_for_email(html, post)).to match(
"https://vimeo.com/508864124/fcbbcc92fa",
)
end
describe "#convert_vimeo_iframes" do
it "converts <iframe> to <a>" do
html = <<~HTML
<p>This is a Vimeo link:</p>
<iframe width="640" height="360" src="https://player.vimeo.com/video/1" data-original-href="https://vimeo.com/1" frameborder="0" allowfullscreen="" seamless="seamless" sandbox="allow-same-origin allow-scripts allow-forms allow-popups allow-popups-to-escape-sandbox allow-presentation"></iframe>
HTML
md = described_class.format_for_email(html, post)
expect(md).not_to include("<iframe")
expect(md).to match_html(<<~HTML)
<p>This is a Vimeo link:</p>
<p><a href="https://vimeo.com/1">https://vimeo.com/1</a></p>
HTML
end
end
describe "#strip_secure_uploads" do
before do
setup_s3
SiteSetting.s3_cdn_url = "https://s3.cdn.com"
SiteSetting.secure_uploads = true
SiteSetting.login_required = true
end
it "replaces secure video content" do
html = <<~HTML
<video width="100%" height="100%" controls="">
<source src="#{Discourse.base_url}/secure-uploads/original/1X/some-video.mp4">
<a href="#{Discourse.base_url}/secure-uploads/original/1X/some-video.mp4">Video label</a>
</source>
</video>
HTML
md = described_class.format_for_email(html, post)
expect(md).not_to include("<video")
expect(md.to_s).to match(I18n.t("emails.secure_uploads_placeholder"))
expect(md.to_s).not_to match(SiteSetting.Upload.s3_cdn_url)
end
it "replaces secure audio content" do
html = <<~HTML
<audio controls>
<source src="#{Discourse.base_url}/secure-uploads/original/1X/some-audio.mp3">
<a href="#{Discourse.base_url}/secure-uploads/original/1X/some-audio.mp3">Audio label</a>
</source>
</audio>
HTML
md = described_class.format_for_email(html, post)
expect(md).not_to include("<audio")
expect(md.to_s).to match(I18n.t("emails.secure_uploads_placeholder"))
expect(md.to_s).not_to match(SiteSetting.Upload.s3_cdn_url)
end
it "replaces secure uploads within a link with a placeholder, keeping the url in an attribute" do
url = "#{Discourse.base_url}\/secure-uploads/original/1X/testimage.png"
html = <<~HTML
<a href="#{url}"><img src="/secure-uploads/original/1X/testimage.png"></a>
HTML
md = described_class.format_for_email(html, post)
expect(md).not_to include("<img")
expect(md).to include("Redacted")
expect(md).to include("data-stripped-secure-upload=\"#{url}\"")
end
it "does not create nested redactions from double processing because of the view media link" do
url = "#{Discourse.base_url}\/secure-uploads/original/1X/testimage.png"
html = <<~HTML
<a href="#{url}"><img src="/secure-uploads/original/1X/testimage.png"></a>
HTML
md = described_class.format_for_email(html, post)
expect(md.scan(/stripped-secure-view-upload/).length).to eq(1)
expect(md.scan(/Redacted/).length).to eq(1)
end
it "replaces secure images with a placeholder, keeping the url in an attribute" do
url = "/secure-uploads/original/1X/testimage.png"
html = <<~HTML
<img src="#{url}" width="20" height="20">
HTML
md = described_class.format_for_email(html, post)
expect(md).not_to include("<img")
expect(md).to include("Redacted")
expect(md).to include("data-stripped-secure-upload=\"#{url}\"")
expect(md).to include("data-width=\"20\"")
expect(md).to include("data-height=\"20\"")
end
end
end
describe "#strip_secure_uploads" do
before do
setup_s3
SiteSetting.s3_cdn_url = "https://s3.cdn.com"
SiteSetting.secure_uploads = true
SiteSetting.login_required = true
end
context "when subfolder" do
before { Discourse.stubs(:base_path).returns("/forum") }
it "replaces secure video content" do
it "adds base url to relative links" do
html = <<~HTML
<video width="100%" height="100%" controls="">
<source src="#{base_url}/secure-uploads/original/1X/some-video.mp4">
<a href="#{base_url}/secure-uploads/original/1X/some-video.mp4">Video label</a>
</source>
</video>
<p><a class="mention" href="/forum/u/wiseguy">@wiseguy</a>, <a class="mention" href="/forum/u/trollol">@trollol</a> what do you guys think?</p>
HTML
md = PrettyText.format_for_email(html, post)
expect(md).not_to include("<video")
expect(md.to_s).to match(I18n.t("emails.secure_uploads_placeholder"))
expect(md.to_s).not_to match(SiteSetting.Upload.s3_cdn_url)
expect(described_class.format_for_email(html, post)).to eq <<~HTML
<p><a class="mention" href="#{Discourse.base_url}/u/wiseguy">@wiseguy</a>, <a class="mention" href="#{Discourse.base_url}/u/trollol">@trollol</a> what do you guys think?</p>
HTML
end
it "replaces secure audio content" do
it "doesn't change external absolute links" do
html = <<~HTML
<audio controls>
<source src="#{base_url}/secure-uploads/original/1X/some-audio.mp3">
<a href="#{base_url}/secure-uploads/original/1X/some-audio.mp3">Audio label</a>
</source>
</audio>
<p>Check out <a href="https://mywebsite.com/users/boss">this guy</a>.</p>
HTML
md = PrettyText.format_for_email(html, post)
expect(md).not_to include("<audio")
expect(md.to_s).to match(I18n.t("emails.secure_uploads_placeholder"))
expect(md.to_s).not_to match(SiteSetting.Upload.s3_cdn_url)
expect(described_class.format_for_email(html, post)).to eq(html)
end
it "replaces secure uploads within a link with a placeholder, keeping the url in an attribute" do
url = "#{Discourse.base_url}\/secure-uploads/original/1X/testimage.png"
it "doesn't change internal absolute links" do
html = <<~HTML
<a href=\"#{url}\"><img src=\"/secure-uploads/original/1X/testimage.png\"></a>
<p>Check out <a href="#{Discourse.base_url}/users/boss">this guy</a>.</p>
HTML
md = PrettyText.format_for_email(html, post)
expect(md).not_to include("<img")
expect(md).to include("Redacted")
expect(md).to include("data-stripped-secure-upload=\"#{url}\"")
end
it "does not create nested redactions from double processing because of the view media link" do
url = "#{Discourse.base_url}\/secure-uploads/original/1X/testimage.png"
html = <<~HTML
<a href=\"#{url}\"><img src=\"/secure-uploads/original/1X/testimage.png\"></a>
HTML
md = PrettyText.format_for_email(html, post)
md = PrettyText.format_for_email(md, post)
expect(md.scan(/stripped-secure-view-upload/).length).to eq(1)
expect(md.scan(/Redacted/).length).to eq(1)
end
it "replaces secure images with a placeholder, keeping the url in an attribute" do
url = "/secure-uploads/original/1X/testimage.png"
html = <<~HTML
<img src=\"#{url}\" width=\"20\" height=\"20\">
HTML
md = PrettyText.format_for_email(html, post)
expect(md).not_to include("<img")
expect(md).to include("Redacted")
expect(md).to include("data-stripped-secure-upload=\"#{url}\"")
expect(md).to include("data-width=\"20\"")
expect(md).to include("data-height=\"20\"")
expect(described_class.format_for_email(html, post)).to eq(html)
end
end
end