diff --git a/app/models/topic_embed.rb b/app/models/topic_embed.rb index 99bc6279cef..8e45c1d60ea 100644 --- a/app/models/topic_embed.rb +++ b/app/models/topic_embed.rb @@ -69,7 +69,7 @@ class TopicEmbed < ActiveRecord::Base def self.find_remote(url) require 'ruby-readability' - original_uri = URI.parse(url) + original_uri = URI.parse(URI.encode(url)) opts = { tags: %w[div p code pre h1 h2 h3 b em i strong a img ul li ol blockquote], attributes: %w[href src class], @@ -81,7 +81,7 @@ class TopicEmbed < ActiveRecord::Base embed_classname_whitelist = SiteSetting.embed_classname_whitelist if SiteSetting.embed_classname_whitelist.present? response = FetchResponse.new - html = open(url, allow_redirections: :safe).read + html = open(URI.encode(url), allow_redirections: :safe).read raw_doc = Nokogiri::HTML(html) auth_element = raw_doc.at('meta[@name="author"]') @@ -107,7 +107,7 @@ class TopicEmbed < ActiveRecord::Base src = node[url_param] unless (src.nil? || src.empty?) begin - uri = URI.parse(src) + uri = URI.parse(URI.encode(src)) unless uri.host uri.scheme = original_uri.scheme uri.host = original_uri.host @@ -145,7 +145,7 @@ class TopicEmbed < ActiveRecord::Base # Convert any relative URLs to absolute. RSS is annoying for this. def self.absolutize_urls(url, contents) url = normalize_url(url) - uri = URI(url) + uri = URI(URI.encode(url)) prefix = "#{uri.scheme}://#{uri.host}" prefix << ":#{uri.port}" if uri.port != 80 && uri.port != 443 diff --git a/lib/topic_retriever.rb b/lib/topic_retriever.rb index 7401619c36f..672ca60a696 100644 --- a/lib/topic_retriever.rb +++ b/lib/topic_retriever.rb @@ -34,6 +34,7 @@ class TopicRetriever # It's possible another process or job found the embed already. So if that happened bail out. return if TopicEmbed.where(embed_url: @embed_url).exists? + # First check RSS if that is enabled if SiteSetting.feed_polling_enabled? Jobs::PollFeed.new.execute({}) diff --git a/spec/models/topic_embed_spec.rb b/spec/models/topic_embed_spec.rb index c7bfa0d43a3..7270c4fc7e9 100644 --- a/spec/models/topic_embed_spec.rb +++ b/spec/models/topic_embed_spec.rb @@ -176,7 +176,23 @@ describe TopicEmbed do it 'img node doesn\'t have other class' do expect(response.body).to have_tag('img', without: { class: 'other' }) end + end + context "non-ascii URL" do + let(:url) { 'http://eviltrout.com/test/ماهی' } + let(:contents) { "<title>سلام</title><body>این یک پاراگراف آزمون است.</body>" } + let!(:embeddable_host) { Fabricate(:embeddable_host) } + let!(:file) { StringIO.new } + + before do + file.stubs(:read).returns contents + TopicEmbed.stubs(:open).returns file + end + + it "doesn't throw an error" do + response = TopicEmbed.find_remote(url) + expect(response.title).to eq("سلام") + end end end