diff --git a/app/models/topic_embed.rb b/app/models/topic_embed.rb index 8803a29b053..279ea3bfaf2 100644 --- a/app/models/topic_embed.rb +++ b/app/models/topic_embed.rb @@ -26,6 +26,14 @@ class TopicEmbed < ActiveRecord::Base "\n
\n#{I18n.t('embed.imported_from', link: "#{url}")}\n" end + DOUBLE_ESCAPED_EXPR = /%25([0-9a-f]{2})/i + + # Prevents double URL encode + # https://stackoverflow.com/a/37599235 + def self.escape_uri(uri) + URI.encode(uri).gsub(DOUBLE_ESCAPED_EXPR, '%\1') + end + # Import an article from a source (RSS/Atom/Other) def self.import(user, url, title, contents) return unless url =~ /^https?\:\/\// @@ -77,7 +85,8 @@ class TopicEmbed < ActiveRecord::Base def self.find_remote(url) require 'ruby-readability' - original_uri = URI.parse(URI.encode(url)) + url = escape_uri(url) + original_uri = URI.parse(url) opts = { tags: %w[div p code pre h1 h2 h3 b em i strong a img ul li ol blockquote], attributes: %w[href src class], @@ -90,7 +99,7 @@ class TopicEmbed < ActiveRecord::Base response = FetchResponse.new begin - html = open(URI.encode(url), allow_redirections: :safe).read + html = open(url, allow_redirections: :safe).read rescue OpenURI::HTTPError, Net::OpenTimeout return end @@ -119,7 +128,7 @@ class TopicEmbed < ActiveRecord::Base src = node[url_param] unless (src.nil? || src.empty?) begin - uri = URI.parse(URI.encode(src)) + uri = URI.parse(escape_uri(src)) unless uri.host uri.scheme = original_uri.scheme uri.host = original_uri.host @@ -148,6 +157,8 @@ class TopicEmbed < ActiveRecord::Base def self.import_remote(import_user, url, opts = nil) opts = opts || {} response = find_remote(url) + return if response.nil? + response.title = opts[:title] if opts[:title].present? import_user = response.author if response.author.present? @@ -157,7 +168,7 @@ class TopicEmbed < ActiveRecord::Base # Convert any relative URLs to absolute. RSS is annoying for this. def self.absolutize_urls(url, contents) url = normalize_url(url) - uri = URI(URI.encode(url)) + uri = URI(escape_uri(url)) prefix = "#{uri.scheme}://#{uri.host}" prefix << ":#{uri.port}" if uri.port != 80 && uri.port != 443 diff --git a/spec/models/topic_embed_spec.rb b/spec/models/topic_embed_spec.rb index 44a37959ed0..130323eec34 100644 --- a/spec/models/topic_embed_spec.rb +++ b/spec/models/topic_embed_spec.rb @@ -186,7 +186,8 @@ describe TopicEmbed do before do file.stubs(:read).returns contents - TopicEmbed.stubs(:open).returns file + TopicEmbed.stubs(:open) + .with('http://eviltrout.com/test/%D9%85%D8%A7%D9%87%DB%8C', allow_redirections: :safe).returns file end it "doesn't throw an error" do @@ -195,6 +196,24 @@ describe TopicEmbed do end end + context "encoded URL" do + let(:url) { 'http://example.com/hello%20world' } + let(:contents) { "Hello World!" } + let!(:embeddable_host) { Fabricate(:embeddable_host) } + let!(:file) { StringIO.new } + + before do + file.stubs(:read).returns contents + TopicEmbed.stubs(:open) + .with('http://example.com/hello%20world', allow_redirections: :safe).returns file + end + + it "doesn't throw an error" do + response = TopicEmbed.find_remote(url) + expect(response.title).to eq("Hello World!") + end + end + context "emails" do let(:url) { 'http://example.com/foo' } let(:contents) { '

URL encoded @ symbol

normal mailto link

' } @@ -214,4 +233,25 @@ describe TopicEmbed do end end + context ".escape_uri" do + it "doesn't escape simple URL" do + url = TopicEmbed.escape_uri('http://example.com/foo/bar') + expect(url).to eq('http://example.com/foo/bar') + end + + it "escapes unsafe chars" do + url = TopicEmbed.escape_uri("http://example.com/?a=\11\15") + expect(url).to eq('http://example.com/?a=%09%0D') + end + + it "escapes non-ascii chars" do + url = TopicEmbed.escape_uri('http://example.com/ماهی') + expect(url).to eq('http://example.com/%D9%85%D8%A7%D9%87%DB%8C') + end + + it "doesn't escape already escaped chars" do + url = TopicEmbed.escape_uri('http://example.com/foo%20bar/foo bar/') + expect(url).to eq('http://example.com/foo%20bar/foo%20bar/') + end + end end