diff --git a/app/models/topic_embed.rb b/app/models/topic_embed.rb
index 8803a29b053..279ea3bfaf2 100644
--- a/app/models/topic_embed.rb
+++ b/app/models/topic_embed.rb
@@ -26,6 +26,14 @@ class TopicEmbed < ActiveRecord::Base
"\n
\n#{I18n.t('embed.imported_from', link: "#{url}")}\n"
end
+ DOUBLE_ESCAPED_EXPR = /%25([0-9a-f]{2})/i
+
+ # Prevents double URL encode
+ # https://stackoverflow.com/a/37599235
+ def self.escape_uri(uri)
+ URI.encode(uri).gsub(DOUBLE_ESCAPED_EXPR, '%\1')
+ end
+
# Import an article from a source (RSS/Atom/Other)
def self.import(user, url, title, contents)
return unless url =~ /^https?\:\/\//
@@ -77,7 +85,8 @@ class TopicEmbed < ActiveRecord::Base
def self.find_remote(url)
require 'ruby-readability'
- original_uri = URI.parse(URI.encode(url))
+ url = escape_uri(url)
+ original_uri = URI.parse(url)
opts = {
tags: %w[div p code pre h1 h2 h3 b em i strong a img ul li ol blockquote],
attributes: %w[href src class],
@@ -90,7 +99,7 @@ class TopicEmbed < ActiveRecord::Base
response = FetchResponse.new
begin
- html = open(URI.encode(url), allow_redirections: :safe).read
+ html = open(url, allow_redirections: :safe).read
rescue OpenURI::HTTPError, Net::OpenTimeout
return
end
@@ -119,7 +128,7 @@ class TopicEmbed < ActiveRecord::Base
src = node[url_param]
unless (src.nil? || src.empty?)
begin
- uri = URI.parse(URI.encode(src))
+ uri = URI.parse(escape_uri(src))
unless uri.host
uri.scheme = original_uri.scheme
uri.host = original_uri.host
@@ -148,6 +157,8 @@ class TopicEmbed < ActiveRecord::Base
def self.import_remote(import_user, url, opts = nil)
opts = opts || {}
response = find_remote(url)
+ return if response.nil?
+
response.title = opts[:title] if opts[:title].present?
import_user = response.author if response.author.present?
@@ -157,7 +168,7 @@ class TopicEmbed < ActiveRecord::Base
# Convert any relative URLs to absolute. RSS is annoying for this.
def self.absolutize_urls(url, contents)
url = normalize_url(url)
- uri = URI(URI.encode(url))
+ uri = URI(escape_uri(url))
prefix = "#{uri.scheme}://#{uri.host}"
prefix << ":#{uri.port}" if uri.port != 80 && uri.port != 443
diff --git a/spec/models/topic_embed_spec.rb b/spec/models/topic_embed_spec.rb
index 44a37959ed0..130323eec34 100644
--- a/spec/models/topic_embed_spec.rb
+++ b/spec/models/topic_embed_spec.rb
@@ -186,7 +186,8 @@ describe TopicEmbed do
before do
file.stubs(:read).returns contents
- TopicEmbed.stubs(:open).returns file
+ TopicEmbed.stubs(:open)
+ .with('http://eviltrout.com/test/%D9%85%D8%A7%D9%87%DB%8C', allow_redirections: :safe).returns file
end
it "doesn't throw an error" do
@@ -195,6 +196,24 @@ describe TopicEmbed do
end
end
+ context "encoded URL" do
+ let(:url) { 'http://example.com/hello%20world' }
+ let(:contents) { "Hello World!" }
+ let!(:embeddable_host) { Fabricate(:embeddable_host) }
+ let!(:file) { StringIO.new }
+
+ before do
+ file.stubs(:read).returns contents
+ TopicEmbed.stubs(:open)
+ .with('http://example.com/hello%20world', allow_redirections: :safe).returns file
+ end
+
+ it "doesn't throw an error" do
+ response = TopicEmbed.find_remote(url)
+ expect(response.title).to eq("Hello World!")
+ end
+ end
+
context "emails" do
let(:url) { 'http://example.com/foo' }
let(:contents) { 'URL encoded @ symbol
normal mailto link
' }
@@ -214,4 +233,25 @@ describe TopicEmbed do
end
end
+ context ".escape_uri" do
+ it "doesn't escape simple URL" do
+ url = TopicEmbed.escape_uri('http://example.com/foo/bar')
+ expect(url).to eq('http://example.com/foo/bar')
+ end
+
+ it "escapes unsafe chars" do
+ url = TopicEmbed.escape_uri("http://example.com/?a=\11\15")
+ expect(url).to eq('http://example.com/?a=%09%0D')
+ end
+
+ it "escapes non-ascii chars" do
+ url = TopicEmbed.escape_uri('http://example.com/ماهی')
+ expect(url).to eq('http://example.com/%D9%85%D8%A7%D9%87%DB%8C')
+ end
+
+ it "doesn't escape already escaped chars" do
+ url = TopicEmbed.escape_uri('http://example.com/foo%20bar/foo bar/')
+ expect(url).to eq('http://example.com/foo%20bar/foo%20bar/')
+ end
+ end
end