FIX: Onebox fails on encoded URL.

https://meta.discourse.org/t/onebox-breaks-if-theres-chinese-text-in-url/67364
This commit is contained in:
Guo Xiang Tan 2017-09-26 18:34:54 +08:00
parent a324a8a9d4
commit 367fb1c524
3 changed files with 19 additions and 5 deletions

View File

@ -30,8 +30,10 @@ class TopicEmbed < ActiveRecord::Base
# Prevents double URL encode
# https://stackoverflow.com/a/37599235
def self.escape_uri(uri)
URI.encode(uri).gsub(DOUBLE_ESCAPED_EXPR, '%\1')
def self.escape_uri(uri, pattern = URI::UNSAFE)
encoded = URI.encode(uri, pattern)
encoded.gsub!(DOUBLE_ESCAPED_EXPR, '%\1')
encoded
end
# Import an article from a source (RSS/Atom/Other)

View File

@ -180,7 +180,10 @@ class FinalDestination
end
def escape_url
URI.escape(CGI.unescapeHTML(@url), Regexp.new("[^#{URI::PATTERN::UNRESERVED}#{URI::PATTERN::RESERVED}#]"))
TopicEmbed.escape_uri(
CGI.unescapeHTML(@url),
Regexp.new("[^#{URI::PATTERN::UNRESERVED}#{URI::PATTERN::RESERVED}#]")
)
end
def private_ranges

View File

@ -301,13 +301,22 @@ describe FinalDestination do
end
end
describe ".escape_url" do
describe "#escape_url" do
it "correctly escapes url" do
fragment_url = "https://eviltrout.com/2016/02/25/fixing-android-performance.html#discourse-comments"
expect(fd(fragment_url).escape_url.to_s).to eq(fragment_url)
expect(fd("https://eviltrout.com?s=180&#038;d=mm&#038;r=g").escape_url.to_s).to eq("https://eviltrout.com?s=180&d=mm&r=g")
expect(fd("https://eviltrout.com?s=180&#038;d=mm&#038;r=g").escape_url.to_s)
.to eq("https://eviltrout.com?s=180&d=mm&r=g")
expect(fd("http://example.com/?a=\11\15").escape_url.to_s).to eq("http://example.com/?a=%09%0D")
expect(fd("https://ru.wikipedia.org/wiki/%D0%A1%D0%B2%D0%BE%D0%B1%D0%BE").escape_url.to_s)
.to eq('https://ru.wikipedia.org/wiki/%D0%A1%D0%B2%D0%BE%D0%B1%D0%BE')
expect(fd('https://ru.wikipedia.org/wiki/Свобо').escape_url.to_s)
.to eq('https://ru.wikipedia.org/wiki/%D0%A1%D0%B2%D0%BE%D0%B1%D0%BE')
end
end