From 3c816839558aa83a4610bd861da20a75f17ab9b3 Mon Sep 17 00:00:00 2001 From: David Taylor Date: Tue, 9 Aug 2022 11:28:29 +0100 Subject: [PATCH] DEV: Rename `UriHelper.escape_uri` to `.normalized_encode` This is a much better description of its function. It performs idempotent normalization of a URL. If consumers truly need to `encode` a URL (including double-encoding of existing encoded entities), they can use the existing `.encode` method. --- app/models/embeddable_host.rb | 4 ++-- app/models/topic_embed.rb | 6 +++--- lib/final_destination.rb | 6 +++--- lib/oneboxer.rb | 2 +- lib/pretty_text.rb | 2 +- lib/url_helper.rb | 12 +++++++++--- plugins/poll/plugin.rb | 2 +- spec/lib/final_destination_spec.rb | 14 +++++++------- spec/lib/url_helper_spec.rb | 26 +++++++++++++------------- 9 files changed, 40 insertions(+), 34 deletions(-) diff --git a/app/models/embeddable_host.rb b/app/models/embeddable_host.rb index 6dd77cbb2bd..f638ff235ab 100644 --- a/app/models/embeddable_host.rb +++ b/app/models/embeddable_host.rb @@ -16,7 +16,7 @@ class EmbeddableHost < ActiveRecord::Base def self.record_for_url(uri) if uri.is_a?(String) uri = begin - URI(UrlHelper.escape_uri(uri)) + URI(UrlHelper.normalized_encode(uri)) rescue URI::Error, Addressable::URI::InvalidURIError end end @@ -50,7 +50,7 @@ class EmbeddableHost < ActiveRecord::Base return true if url&.starts_with?(Discourse.base_url) && EmbeddableHost.exists? uri = begin - URI(UrlHelper.escape_uri(url)) + URI(UrlHelper.normalized_encode(url)) rescue URI::Error end diff --git a/app/models/topic_embed.rb b/app/models/topic_embed.rb index 1d5676e5d9e..6b0e3662f84 100644 --- a/app/models/topic_embed.rb +++ b/app/models/topic_embed.rb @@ -110,7 +110,7 @@ class TopicEmbed < ActiveRecord::Base def self.find_remote(url) require 'ruby-readability' - url = UrlHelper.escape_uri(url) + url = UrlHelper.normalized_encode(url) original_uri = URI.parse(url) fd = FinalDestination.new( url, @@ -164,7 +164,7 @@ class TopicEmbed < ActiveRecord::Base unless (src.nil? || src.empty?) begin # convert URL to absolute form - node[url_param] = URI.join(url, UrlHelper.escape_uri(src)).to_s + node[url_param] = URI.join(url, UrlHelper.normalized_encode(src)).to_s rescue URI::Error, Addressable::URI::InvalidURIError # If there is a mistyped URL, just do nothing end @@ -200,7 +200,7 @@ class TopicEmbed < ActiveRecord::Base def self.absolutize_urls(url, contents) url = normalize_url(url) begin - uri = URI(UrlHelper.escape_uri(url)) + uri = URI(UrlHelper.normalized_encode(url)) rescue URI::Error return contents end diff --git a/lib/final_destination.rb b/lib/final_destination.rb index 7070bcfdeec..77b4b06821f 100644 --- a/lib/final_destination.rb +++ b/lib/final_destination.rb @@ -36,7 +36,7 @@ class FinalDestination def initialize(url, opts = nil) @url = url - @uri = uri(escape_url) if @url + @uri = uri(normalized_url) if @url @opts = opts || {} @force_get_hosts = @opts[:force_get_hosts] || [] @@ -417,8 +417,8 @@ class FinalDestination false end - def escape_url - UrlHelper.escape_uri(@url) + def normalized_url + UrlHelper.normalized_encode(@url) end def private_ranges diff --git a/lib/oneboxer.rb b/lib/oneboxer.rb index c9037cde3b5..def706b0ef2 100644 --- a/lib/oneboxer.rb +++ b/lib/oneboxer.rb @@ -237,7 +237,7 @@ module Oneboxer end def self.onebox_raw(url, opts = {}) - url = UrlHelper.escape_uri(url).to_s + url = UrlHelper.normalized_encode(url).to_s local_onebox(url, opts) || external_onebox(url) rescue => e # no point warning here, just cause we have an issue oneboxing a url diff --git a/lib/pretty_text.rb b/lib/pretty_text.rb index d80df5bbf8d..8ecf86c2d6f 100644 --- a/lib/pretty_text.rb +++ b/lib/pretty_text.rb @@ -488,7 +488,7 @@ module PrettyText def self.convert_vimeo_iframes(doc) doc.css("iframe[src*='player.vimeo.com']").each do |iframe| if iframe["data-original-href"].present? - vimeo_url = UrlHelper.escape_uri(iframe["data-original-href"]) + vimeo_url = UrlHelper.normalized_encode(iframe["data-original-href"]) else vimeo_id = iframe['src'].split('/').last vimeo_url = "https://vimeo.com/#{vimeo_id}" diff --git a/lib/url_helper.rb b/lib/url_helper.rb index 70fda870911..8065d267969 100644 --- a/lib/url_helper.rb +++ b/lib/url_helper.rb @@ -60,10 +60,16 @@ class UrlHelper self.absolute(Upload.secure_media_url_from_upload_url(url), nil) end - # This is a poorly named method. In reality, it **normalizes** the given URL, - # and does not escape it. Therefore it's idempotent, and can be used on user - # input which includes a mix of escaped and unescaped characters def self.escape_uri(uri) + Discourse.deprecate( + "UrlHelper.escape_uri is deprecated. For normalization of user input use `.normalized_encode`. For true encoding, use `.encode`", + output_in_test: true, + drop_from: '3.0' + ) + normalized_encode(uri) + end + + def self.normalized_encode(uri) validated = nil url = uri.to_s diff --git a/plugins/poll/plugin.rb b/plugins/poll/plugin.rb index 94a1c5570e7..280bb5f3eca 100644 --- a/plugins/poll/plugin.rb +++ b/plugins/poll/plugin.rb @@ -147,7 +147,7 @@ after_initialize do post = options[:post] replacement = post&.url.present? ? - "#{I18n.t("poll.poll")}" : + "#{I18n.t("poll.poll")}" : I18n.t("poll.poll") doc.css("div.poll").each do |poll| diff --git a/spec/lib/final_destination_spec.rb b/spec/lib/final_destination_spec.rb index b3d102be73c..b07b37a288b 100644 --- a/spec/lib/final_destination_spec.rb +++ b/spec/lib/final_destination_spec.rb @@ -606,21 +606,21 @@ RSpec.describe FinalDestination do end end - describe "#escape_url" do - it "correctly escapes url" do + describe "#normalized_url" do + it "correctly normalizes url" do fragment_url = "https://eviltrout.com/2016/02/25/fixing-android-performance.html#discourse-comments" - expect(fd(fragment_url).escape_url.to_s).to eq(fragment_url) + expect(fd(fragment_url).normalized_url.to_s).to eq(fragment_url) - expect(fd("https://eviltrout.com?s=180&d=mm&r=g").escape_url.to_s) + expect(fd("https://eviltrout.com?s=180&d=mm&r=g").normalized_url.to_s) .to eq("https://eviltrout.com?s=180&d=mm&%23038;r=g") - expect(fd("http://example.com/?a=\11\15").escape_url.to_s).to eq("http://example.com/?a=%09%0D") + expect(fd("http://example.com/?a=\11\15").normalized_url.to_s).to eq("http://example.com/?a=%09%0D") - expect(fd("https://ru.wikipedia.org/wiki/%D0%A1%D0%B2%D0%BE%D0%B1%D0%BE").escape_url.to_s) + expect(fd("https://ru.wikipedia.org/wiki/%D0%A1%D0%B2%D0%BE%D0%B1%D0%BE").normalized_url.to_s) .to eq('https://ru.wikipedia.org/wiki/%D0%A1%D0%B2%D0%BE%D0%B1%D0%BE') - expect(fd('https://ru.wikipedia.org/wiki/Свобо').escape_url.to_s) + expect(fd('https://ru.wikipedia.org/wiki/Свобо').normalized_url.to_s) .to eq('https://ru.wikipedia.org/wiki/%D0%A1%D0%B2%D0%BE%D0%B1%D0%BE') end end diff --git a/spec/lib/url_helper_spec.rb b/spec/lib/url_helper_spec.rb index 4a3e64b5c9c..38331dfe7d2 100644 --- a/spec/lib/url_helper_spec.rb +++ b/spec/lib/url_helper_spec.rb @@ -85,40 +85,40 @@ RSpec.describe UrlHelper do end end - describe "#escape_uri" do + describe "#normalized_encode" do it "does not double escape %3A (:)" do url = "http://discourse.org/%3A/test" - expect(UrlHelper.escape_uri(url)).to eq(url) + expect(UrlHelper.normalized_encode(url)).to eq(url) end it "does not double escape %2F (/)" do url = "http://discourse.org/%2F/test" - expect(UrlHelper.escape_uri(url)).to eq(url) + expect(UrlHelper.normalized_encode(url)).to eq(url) end it "doesn't escape simple URL" do - url = UrlHelper.escape_uri('http://example.com/foo/bar') + url = UrlHelper.normalized_encode('http://example.com/foo/bar') expect(url).to eq('http://example.com/foo/bar') end it "escapes unsafe chars" do - url = UrlHelper.escape_uri("http://example.com/?a=\11\15") + url = UrlHelper.normalized_encode("http://example.com/?a=\11\15") expect(url).to eq('http://example.com/?a=%09%0D') end it "escapes non-ascii chars" do - url = UrlHelper.escape_uri('http://example.com/ماهی') + url = UrlHelper.normalized_encode('http://example.com/ماهی') expect(url).to eq('http://example.com/%D9%85%D8%A7%D9%87%DB%8C') end it "doesn't escape already escaped chars (space)" do - url = UrlHelper.escape_uri('http://example.com/foo%20bar/foo bar/') + url = UrlHelper.normalized_encode('http://example.com/foo%20bar/foo bar/') expect(url).to eq('http://example.com/foo%20bar/foo%20bar/') end it "doesn't escape already escaped chars (hash)" do url = 'https://calendar.google.com/calendar/embed?src=en.uk%23holiday@group.v.calendar.google.com&ctz=Europe%2FLondon' - escaped = UrlHelper.escape_uri(url) + escaped = UrlHelper.normalized_encode(url) expect(escaped).to eq(url) end @@ -126,27 +126,27 @@ RSpec.describe UrlHelper do skip "see: https://github.com/sporkmonger/addressable/issues/472" url = "https://example.com/ article/id%3A1.2%2F1/bar" expected = "https://example.com/%20article/id%3A1.2%2F1/bar" - escaped = UrlHelper.escape_uri(url) + escaped = UrlHelper.normalized_encode(url) expect(escaped).to eq(expected) end it "handles emoji domain names" do url = "https://💻.example/💻?computer=💻" expected = "https://xn--3s8h.example/%F0%9F%92%BB?computer=%F0%9F%92%BB" - escaped = UrlHelper.escape_uri(url) + escaped = UrlHelper.normalized_encode(url) expect(escaped).to eq(expected) end it "handles special-character domain names" do url = "https://éxample.com/test" expected = "https://xn--xample-9ua.com/test" - escaped = UrlHelper.escape_uri(url) + escaped = UrlHelper.normalized_encode(url) expect(escaped).to eq(expected) end it "performs basic normalization" do url = "http://EXAMPLE.com/a" - escaped = UrlHelper.escape_uri(url) + escaped = UrlHelper.normalized_encode(url) expect(escaped).to eq("http://example.com/a") end @@ -155,7 +155,7 @@ RSpec.describe UrlHelper do # sensitive information stripped presigned_url = "https://test.com/original/3X/b/5/575bcc2886bf7a39684b57ca90be85f7d399bbc7.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AK8888999977%2F20200130%2Fus-west-1%2Fs3%2Faws4_request&X-Amz-Date=20200130T064355Z&X-Amz-Expires=15&X-Amz-SignedHeaders=host&X-Amz-Security-Token=blahblah%2Bblahblah%2Fblah%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEQAR&X-Amz-Signature=test" encoded_presigned_url = "https://test.com/original/3X/b/5/575bcc2886bf7a39684b57ca90be85f7d399bbc7.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AK8888999977/20200130/us-west-1/s3/aws4_request&X-Amz-Date=20200130T064355Z&X-Amz-Expires=15&X-Amz-SignedHeaders=host&X-Amz-Security-Token=blahblah+blahblah/blah//////////wEQA==&X-Amz-Signature=test" - expect(UrlHelper.escape_uri(presigned_url)).not_to eq(encoded_presigned_url) + expect(UrlHelper.normalized_encode(presigned_url)).not_to eq(encoded_presigned_url) end end