DEV: More robust processing of URLs (#11361)

* DEV: More robust processing of URLs

The previous `UrlHelper.encode_component(CGI.unescapeHTML(UrlHelper.unencode(uri))` method would naively process URLs, which could result in a badly formed response.

`Addressable::URI.normalized_encode(uri)` appears to deal with these edge-cases in a more robust way.

* DEV: onebox should use UrlHelper

* DEV: fix spec

* DEV: Escape output when rendering local links
This commit is contained in:
jbrw 2020-12-03 17:16:01 -05:00 committed by GitHub
parent e4d51e5b0a
commit da9b837da0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 12 additions and 9 deletions

View File

@ -170,7 +170,7 @@ module Oneboxer
end end
def self.onebox_raw(url, opts = {}) def self.onebox_raw(url, opts = {})
url = URI(url).to_s url = UrlHelper.escape_uri(url).to_s
local_onebox(url, opts) || external_onebox(url) local_onebox(url, opts) || external_onebox(url)
rescue => e rescue => e
# no point warning here, just cause we have an issue oneboxing a url # no point warning here, just cause we have an issue oneboxing a url
@ -191,7 +191,7 @@ module Oneboxer
when "list" then local_category_html(url, route) when "list" then local_category_html(url, route)
end end
html = html.presence || "<a href='#{url}'>#{url}</a>" html = html.presence || "<a href='#{URI(url).to_s}'>#{URI(url).to_s}</a>"
{ onebox: html, preview: html } { onebox: html, preview: html }
end end

View File

@ -60,11 +60,9 @@ class UrlHelper
self.absolute(Upload.secure_media_url_from_upload_url(url), nil) self.absolute(Upload.secure_media_url_from_upload_url(url), nil)
end end
# Prevents double URL encode
# https://stackoverflow.com/a/37599235
def self.escape_uri(uri) def self.escape_uri(uri)
return uri if s3_presigned_url?(uri) return uri if s3_presigned_url?(uri)
UrlHelper.encode_component(CGI.unescapeHTML(UrlHelper.unencode(uri))) Addressable::URI.normalized_encode(uri)
end end
def self.rails_route_from_url(url) def self.rails_route_from_url(url)

View File

@ -463,7 +463,7 @@ describe FinalDestination do
expect(fd(fragment_url).escape_url.to_s).to eq(fragment_url) expect(fd(fragment_url).escape_url.to_s).to eq(fragment_url)
expect(fd("https://eviltrout.com?s=180&#038;d=mm&#038;r=g").escape_url.to_s) expect(fd("https://eviltrout.com?s=180&#038;d=mm&#038;r=g").escape_url.to_s)
.to eq("https://eviltrout.com?s=180&d=mm&r=g") .to eq("https://eviltrout.com?s=180&#038;d=mm&%23038;r=g")
expect(fd("http://example.com/?a=\11\15").escape_url.to_s).to eq("http://example.com/?a=%09%0D") expect(fd("http://example.com/?a=\11\15").escape_url.to_s).to eq("http://example.com/?a=%09%0D")

View File

@ -114,11 +114,16 @@ describe UrlHelper do
expect(url).to eq('http://example.com/%D9%85%D8%A7%D9%87%DB%8C') expect(url).to eq('http://example.com/%D9%85%D8%A7%D9%87%DB%8C')
end end
it "doesn't escape already escaped chars" do it "doesn't escape already escaped chars (space)" do
url = UrlHelper.escape_uri('http://example.com/foo%20bar/foo bar/') url = UrlHelper.escape_uri('http://example.com/foo%20bar/foo bar/')
expect(url).to eq('http://example.com/foo%20bar/foo%20bar/') expect(url).to eq('http://example.com/foo%20bar/foo%20bar/')
end end
it "doesn't escape already escaped chars (hash)" do
url = UrlHelper.escape_uri('https://calendar.google.com/calendar/embed?src=en.uk%23holiday%40group.v.calendar.google.com&ctz=Europe%2FLondon')
expect(url).to eq('https://calendar.google.com/calendar/embed?src=en.uk%23holiday@group.v.calendar.google.com&ctz=Europe/London')
end
it "doesn't escape S3 presigned URLs" do it "doesn't escape S3 presigned URLs" do
# both of these were originally real presigned URLs and have had all # both of these were originally real presigned URLs and have had all
# sensitive information stripped # sensitive information stripped

View File

@ -349,9 +349,9 @@ describe TopicEmbed do
let(:invalid_url) { 'http://source.com/#double#anchor' } let(:invalid_url) { 'http://source.com/#double#anchor' }
let(:contents) { "hello world new post <a href='/hello'>hello</a>" } let(:contents) { "hello world new post <a href='/hello'>hello</a>" }
it "does not attempt absolutizing on a bad URI" do it "handles badly formed URIs" do
raw = TopicEmbed.absolutize_urls(invalid_url, contents) raw = TopicEmbed.absolutize_urls(invalid_url, contents)
expect(raw).to eq(contents) expect(raw).to eq("hello world new post <a href=\"http://source.com/hello\">hello</a>")
end end
end end