DEV: Rename UriHelper.escape_uri to .normalized_encode

This is a much better description of its function. It performs idempotent normalization of a URL. If consumers truly need to `encode` a URL (including double-encoding of existing encoded entities), they can use the existing `.encode` method.
This commit is contained in:
David Taylor 2022-08-09 11:28:29 +01:00
parent f0a0252526
commit 3c81683955
9 changed files with 40 additions and 34 deletions

View File

@ -16,7 +16,7 @@ class EmbeddableHost < ActiveRecord::Base
def self.record_for_url(uri)
if uri.is_a?(String)
uri = begin
URI(UrlHelper.escape_uri(uri))
URI(UrlHelper.normalized_encode(uri))
rescue URI::Error, Addressable::URI::InvalidURIError
end
end
@ -50,7 +50,7 @@ class EmbeddableHost < ActiveRecord::Base
return true if url&.starts_with?(Discourse.base_url) && EmbeddableHost.exists?
uri = begin
URI(UrlHelper.escape_uri(url))
URI(UrlHelper.normalized_encode(url))
rescue URI::Error
end

View File

@ -110,7 +110,7 @@ class TopicEmbed < ActiveRecord::Base
def self.find_remote(url)
require 'ruby-readability'
url = UrlHelper.escape_uri(url)
url = UrlHelper.normalized_encode(url)
original_uri = URI.parse(url)
fd = FinalDestination.new(
url,
@ -164,7 +164,7 @@ class TopicEmbed < ActiveRecord::Base
unless (src.nil? || src.empty?)
begin
# convert URL to absolute form
node[url_param] = URI.join(url, UrlHelper.escape_uri(src)).to_s
node[url_param] = URI.join(url, UrlHelper.normalized_encode(src)).to_s
rescue URI::Error, Addressable::URI::InvalidURIError
# If there is a mistyped URL, just do nothing
end
@ -200,7 +200,7 @@ class TopicEmbed < ActiveRecord::Base
def self.absolutize_urls(url, contents)
url = normalize_url(url)
begin
uri = URI(UrlHelper.escape_uri(url))
uri = URI(UrlHelper.normalized_encode(url))
rescue URI::Error
return contents
end

View File

@ -36,7 +36,7 @@ class FinalDestination
def initialize(url, opts = nil)
@url = url
@uri = uri(escape_url) if @url
@uri = uri(normalized_url) if @url
@opts = opts || {}
@force_get_hosts = @opts[:force_get_hosts] || []
@ -417,8 +417,8 @@ class FinalDestination
false
end
def escape_url
UrlHelper.escape_uri(@url)
def normalized_url
UrlHelper.normalized_encode(@url)
end
def private_ranges

View File

@ -237,7 +237,7 @@ module Oneboxer
end
def self.onebox_raw(url, opts = {})
url = UrlHelper.escape_uri(url).to_s
url = UrlHelper.normalized_encode(url).to_s
local_onebox(url, opts) || external_onebox(url)
rescue => e
# no point warning here, just cause we have an issue oneboxing a url

View File

@ -488,7 +488,7 @@ module PrettyText
def self.convert_vimeo_iframes(doc)
doc.css("iframe[src*='player.vimeo.com']").each do |iframe|
if iframe["data-original-href"].present?
vimeo_url = UrlHelper.escape_uri(iframe["data-original-href"])
vimeo_url = UrlHelper.normalized_encode(iframe["data-original-href"])
else
vimeo_id = iframe['src'].split('/').last
vimeo_url = "https://vimeo.com/#{vimeo_id}"

View File

@ -60,10 +60,16 @@ class UrlHelper
self.absolute(Upload.secure_media_url_from_upload_url(url), nil)
end
# This is a poorly named method. In reality, it **normalizes** the given URL,
# and does not escape it. Therefore it's idempotent, and can be used on user
# input which includes a mix of escaped and unescaped characters
def self.escape_uri(uri)
Discourse.deprecate(
"UrlHelper.escape_uri is deprecated. For normalization of user input use `.normalized_encode`. For true encoding, use `.encode`",
output_in_test: true,
drop_from: '3.0'
)
normalized_encode(uri)
end
def self.normalized_encode(uri)
validated = nil
url = uri.to_s

View File

@ -147,7 +147,7 @@ after_initialize do
post = options[:post]
replacement = post&.url.present? ?
"<a href='#{UrlHelper.escape_uri(post.url)}'>#{I18n.t("poll.poll")}</a>" :
"<a href='#{UrlHelper.normalized_encode(post.url)}'>#{I18n.t("poll.poll")}</a>" :
I18n.t("poll.poll")
doc.css("div.poll").each do |poll|

View File

@ -606,21 +606,21 @@ RSpec.describe FinalDestination do
end
end
describe "#escape_url" do
it "correctly escapes url" do
describe "#normalized_url" do
it "correctly normalizes url" do
fragment_url = "https://eviltrout.com/2016/02/25/fixing-android-performance.html#discourse-comments"
expect(fd(fragment_url).escape_url.to_s).to eq(fragment_url)
expect(fd(fragment_url).normalized_url.to_s).to eq(fragment_url)
expect(fd("https://eviltrout.com?s=180&#038;d=mm&#038;r=g").escape_url.to_s)
expect(fd("https://eviltrout.com?s=180&#038;d=mm&#038;r=g").normalized_url.to_s)
.to eq("https://eviltrout.com?s=180&#038;d=mm&%23038;r=g")
expect(fd("http://example.com/?a=\11\15").escape_url.to_s).to eq("http://example.com/?a=%09%0D")
expect(fd("http://example.com/?a=\11\15").normalized_url.to_s).to eq("http://example.com/?a=%09%0D")
expect(fd("https://ru.wikipedia.org/wiki/%D0%A1%D0%B2%D0%BE%D0%B1%D0%BE").escape_url.to_s)
expect(fd("https://ru.wikipedia.org/wiki/%D0%A1%D0%B2%D0%BE%D0%B1%D0%BE").normalized_url.to_s)
.to eq('https://ru.wikipedia.org/wiki/%D0%A1%D0%B2%D0%BE%D0%B1%D0%BE')
expect(fd('https://ru.wikipedia.org/wiki/Свобо').escape_url.to_s)
expect(fd('https://ru.wikipedia.org/wiki/Свобо').normalized_url.to_s)
.to eq('https://ru.wikipedia.org/wiki/%D0%A1%D0%B2%D0%BE%D0%B1%D0%BE')
end
end

View File

@ -85,40 +85,40 @@ RSpec.describe UrlHelper do
end
end
describe "#escape_uri" do
describe "#normalized_encode" do
it "does not double escape %3A (:)" do
url = "http://discourse.org/%3A/test"
expect(UrlHelper.escape_uri(url)).to eq(url)
expect(UrlHelper.normalized_encode(url)).to eq(url)
end
it "does not double escape %2F (/)" do
url = "http://discourse.org/%2F/test"
expect(UrlHelper.escape_uri(url)).to eq(url)
expect(UrlHelper.normalized_encode(url)).to eq(url)
end
it "doesn't escape simple URL" do
url = UrlHelper.escape_uri('http://example.com/foo/bar')
url = UrlHelper.normalized_encode('http://example.com/foo/bar')
expect(url).to eq('http://example.com/foo/bar')
end
it "escapes unsafe chars" do
url = UrlHelper.escape_uri("http://example.com/?a=\11\15")
url = UrlHelper.normalized_encode("http://example.com/?a=\11\15")
expect(url).to eq('http://example.com/?a=%09%0D')
end
it "escapes non-ascii chars" do
url = UrlHelper.escape_uri('http://example.com/ماهی')
url = UrlHelper.normalized_encode('http://example.com/ماهی')
expect(url).to eq('http://example.com/%D9%85%D8%A7%D9%87%DB%8C')
end
it "doesn't escape already escaped chars (space)" do
url = UrlHelper.escape_uri('http://example.com/foo%20bar/foo bar/')
url = UrlHelper.normalized_encode('http://example.com/foo%20bar/foo bar/')
expect(url).to eq('http://example.com/foo%20bar/foo%20bar/')
end
it "doesn't escape already escaped chars (hash)" do
url = 'https://calendar.google.com/calendar/embed?src=en.uk%23holiday@group.v.calendar.google.com&ctz=Europe%2FLondon'
escaped = UrlHelper.escape_uri(url)
escaped = UrlHelper.normalized_encode(url)
expect(escaped).to eq(url)
end
@ -126,27 +126,27 @@ RSpec.describe UrlHelper do
skip "see: https://github.com/sporkmonger/addressable/issues/472"
url = "https://example.com/ article/id%3A1.2%2F1/bar"
expected = "https://example.com/%20article/id%3A1.2%2F1/bar"
escaped = UrlHelper.escape_uri(url)
escaped = UrlHelper.normalized_encode(url)
expect(escaped).to eq(expected)
end
it "handles emoji domain names" do
url = "https://💻.example/💻?computer=💻"
expected = "https://xn--3s8h.example/%F0%9F%92%BB?computer=%F0%9F%92%BB"
escaped = UrlHelper.escape_uri(url)
escaped = UrlHelper.normalized_encode(url)
expect(escaped).to eq(expected)
end
it "handles special-character domain names" do
url = "https://éxample.com/test"
expected = "https://xn--xample-9ua.com/test"
escaped = UrlHelper.escape_uri(url)
escaped = UrlHelper.normalized_encode(url)
expect(escaped).to eq(expected)
end
it "performs basic normalization" do
url = "http://EXAMPLE.com/a"
escaped = UrlHelper.escape_uri(url)
escaped = UrlHelper.normalized_encode(url)
expect(escaped).to eq("http://example.com/a")
end
@ -155,7 +155,7 @@ RSpec.describe UrlHelper do
# sensitive information stripped
presigned_url = "https://test.com/original/3X/b/5/575bcc2886bf7a39684b57ca90be85f7d399bbc7.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AK8888999977%2F20200130%2Fus-west-1%2Fs3%2Faws4_request&X-Amz-Date=20200130T064355Z&X-Amz-Expires=15&X-Amz-SignedHeaders=host&X-Amz-Security-Token=blahblah%2Bblahblah%2Fblah%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEQAR&X-Amz-Signature=test"
encoded_presigned_url = "https://test.com/original/3X/b/5/575bcc2886bf7a39684b57ca90be85f7d399bbc7.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AK8888999977/20200130/us-west-1/s3/aws4_request&X-Amz-Date=20200130T064355Z&X-Amz-Expires=15&X-Amz-SignedHeaders=host&X-Amz-Security-Token=blahblah+blahblah/blah//////////wEQA==&X-Amz-Signature=test"
expect(UrlHelper.escape_uri(presigned_url)).not_to eq(encoded_presigned_url)
expect(UrlHelper.normalized_encode(presigned_url)).not_to eq(encoded_presigned_url)
end
end