From e30851e45a101a5678dddd608e335dd11d756ae4 Mon Sep 17 00:00:00 2001 From: Gerhard Schlager Date: Tue, 12 Dec 2017 17:50:39 +0100 Subject: [PATCH] Move escape_uri method to a more suitable place --- app/models/topic_embed.rb | 17 ++++------------- lib/final_destination.rb | 3 ++- lib/url_helper.rb | 10 ++++++++++ spec/components/url_helper_spec.rb | 22 ++++++++++++++++++++++ spec/models/topic_embed_spec.rb | 21 --------------------- 5 files changed, 38 insertions(+), 35 deletions(-) diff --git a/app/models/topic_embed.rb b/app/models/topic_embed.rb index ad0e407d802..1692b1a7277 100644 --- a/app/models/topic_embed.rb +++ b/app/models/topic_embed.rb @@ -1,4 +1,5 @@ require_dependency 'nokogiri' +require_dependency 'url_helper' class TopicEmbed < ActiveRecord::Base include Trashable @@ -26,16 +27,6 @@ class TopicEmbed < ActiveRecord::Base "\n
\n#{I18n.t('embed.imported_from', link: "#{url}")}\n" end - DOUBLE_ESCAPED_EXPR = /%25([0-9a-f]{2})/i - - # Prevents double URL encode - # https://stackoverflow.com/a/37599235 - def self.escape_uri(uri, pattern = URI::UNSAFE) - encoded = URI.encode(uri, pattern) - encoded.gsub!(DOUBLE_ESCAPED_EXPR, '%\1') - encoded - end - # Import an article from a source (RSS/Atom/Other) def self.import(user, url, title, contents) return unless url =~ /^https?\:\/\// @@ -87,7 +78,7 @@ class TopicEmbed < ActiveRecord::Base def self.find_remote(url) require 'ruby-readability' - url = escape_uri(url) + url = UrlHelper.escape_uri(url) original_uri = URI.parse(url) opts = { tags: %w[div p code pre h1 h2 h3 b em i strong a img ul li ol blockquote], @@ -130,7 +121,7 @@ class TopicEmbed < ActiveRecord::Base src = node[url_param] unless (src.nil? || src.empty?) begin - uri = URI.parse(escape_uri(src)) + uri = URI.parse(UrlHelper.escape_uri(src)) unless uri.host uri.scheme = original_uri.scheme uri.host = original_uri.host @@ -170,7 +161,7 @@ class TopicEmbed < ActiveRecord::Base # Convert any relative URLs to absolute. RSS is annoying for this. def self.absolutize_urls(url, contents) url = normalize_url(url) - uri = URI(escape_uri(url)) + uri = URI(UrlHelper.escape_uri(url)) prefix = "#{uri.scheme}://#{uri.host}" prefix << ":#{uri.port}" if uri.port != 80 && uri.port != 443 diff --git a/lib/final_destination.rb b/lib/final_destination.rb index e7746ad931e..acc922e61fd 100644 --- a/lib/final_destination.rb +++ b/lib/final_destination.rb @@ -2,6 +2,7 @@ require 'socket' require 'ipaddr' require 'excon' require 'rate_limiter' +require 'url_helper' # Determine the final endpoint for a Web URI, following redirects class FinalDestination @@ -237,7 +238,7 @@ class FinalDestination end def escape_url - TopicEmbed.escape_uri( + UrlHelper.escape_uri( CGI.unescapeHTML(@url), Regexp.new("[^#{URI::PATTERN::UNRESERVED}#{URI::PATTERN::RESERVED}#]") ) diff --git a/lib/url_helper.rb b/lib/url_helper.rb index df25e5463c2..758528a4a31 100644 --- a/lib/url_helper.rb +++ b/lib/url_helper.rb @@ -21,4 +21,14 @@ class UrlHelper url.sub(/^http:/i, "") end + DOUBLE_ESCAPED_REGEXP ||= /%25([0-9a-f]{2})/i + + # Prevents double URL encode + # https://stackoverflow.com/a/37599235 + def self.escape_uri(uri, pattern = URI::UNSAFE) + encoded = URI.encode(uri, pattern) + encoded.gsub!(DOUBLE_ESCAPED_REGEXP, '%\1') + encoded + end + end diff --git a/spec/components/url_helper_spec.rb b/spec/components/url_helper_spec.rb index 972f3daa567..2dd48c726c7 100644 --- a/spec/components/url_helper_spec.rb +++ b/spec/components/url_helper_spec.rb @@ -79,4 +79,26 @@ describe UrlHelper do end + describe "#escape_uri" do + it "doesn't escape simple URL" do + url = UrlHelper.escape_uri('http://example.com/foo/bar') + expect(url).to eq('http://example.com/foo/bar') + end + + it "escapes unsafe chars" do + url = UrlHelper.escape_uri("http://example.com/?a=\11\15") + expect(url).to eq('http://example.com/?a=%09%0D') + end + + it "escapes non-ascii chars" do + url = UrlHelper.escape_uri('http://example.com/ماهی') + expect(url).to eq('http://example.com/%D9%85%D8%A7%D9%87%DB%8C') + end + + it "doesn't escape already escaped chars" do + url = UrlHelper.escape_uri('http://example.com/foo%20bar/foo bar/') + expect(url).to eq('http://example.com/foo%20bar/foo%20bar/') + end + end + end diff --git a/spec/models/topic_embed_spec.rb b/spec/models/topic_embed_spec.rb index 130323eec34..3438c615fe2 100644 --- a/spec/models/topic_embed_spec.rb +++ b/spec/models/topic_embed_spec.rb @@ -233,25 +233,4 @@ describe TopicEmbed do end end - context ".escape_uri" do - it "doesn't escape simple URL" do - url = TopicEmbed.escape_uri('http://example.com/foo/bar') - expect(url).to eq('http://example.com/foo/bar') - end - - it "escapes unsafe chars" do - url = TopicEmbed.escape_uri("http://example.com/?a=\11\15") - expect(url).to eq('http://example.com/?a=%09%0D') - end - - it "escapes non-ascii chars" do - url = TopicEmbed.escape_uri('http://example.com/ماهی') - expect(url).to eq('http://example.com/%D9%85%D8%A7%D9%87%DB%8C') - end - - it "doesn't escape already escaped chars" do - url = TopicEmbed.escape_uri('http://example.com/foo%20bar/foo bar/') - expect(url).to eq('http://example.com/foo%20bar/foo%20bar/') - end - end end